[llvm] 42601e1 - [ASAN] Support memory checks on vp.load/store.
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Sun May 7 04:30:23 PDT 2023
Author: Yeting Kuo
Date: 2023-05-07T19:30:16+08:00
New Revision: 42601e116b662a2329f9bf6db9e16b561a9d7337
URL: https://github.com/llvm/llvm-project/commit/42601e116b662a2329f9bf6db9e16b561a9d7337
DIFF: https://github.com/llvm/llvm-project/commit/42601e116b662a2329f9bf6db9e16b561a9d7337.diff
LOG: [ASAN] Support memory checks on vp.load/store.
The patch adds new member MaybeEVL into InterestingMemoryOperand to represent
the effective vector length for vp intrinsics. It may be extended for some target intrinsics in the future.
Reviewed By: kito-cheng
Differential Revision: https://reviews.llvm.org/D146208
Added:
llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
Modified:
llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 8182442b4a6f0..93aadcc34f3c9 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -31,12 +31,15 @@ class InterestingMemoryOperand {
MaybeAlign Alignment;
// The mask Value, if we're looking at a masked load/store.
Value *MaybeMask;
+ // The EVL Value, if we're looking at a vp intrinsic.
+ Value *MaybeEVL;
InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
class Type *OpType, MaybeAlign Alignment,
- Value *MaybeMask = nullptr)
+ Value *MaybeMask = nullptr,
+ Value *MaybeEVL = nullptr)
: IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
- MaybeMask(MaybeMask) {
+ MaybeMask(MaybeMask), MaybeEVL(MaybeEVL) {
const DataLayout &DL = I->getModule()->getDataLayout();
TypeStoreSize = DL.getTypeStoreSizeInBits(OpType);
PtrUse = &I->getOperandUse(OperandNo);
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 74501e0244aa8..7c296b414eabd 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -494,6 +494,18 @@ void SplitBlockAndInsertForEachLane(ElementCount EC, Type *IndexTy,
Instruction *InsertBefore,
std::function<void(IRBuilderBase&, Value*)> Func);
+/// Utility function for performing a given action on each lane of a vector
+/// with \p EVL effective length. EVL is assumed > 0. To simplify porting legacy
+/// code, this defaults to unrolling the implied loop for non-scalable element
+/// counts, but this is not considered to be part of the contract of this
+/// routine, and is expected to change in the future. The callback takes as
+/// arguments an IRBuilder whose insert point is correctly set for instantiating
+/// the given index, and a value which is (at runtime) the index to access. This
+/// index *may* be a constant.
+void SplitBlockAndInsertForEachLane(
+ Value *End, Instruction *InsertBefore,
+ std::function<void(IRBuilderBase &, Value *)> Func);
+
/// Check whether BB is the merge point of a if-region.
/// If so, return the branch instruction that determines which entry into
/// BB will be taken. Also, return by references the block that will be
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b3d4c40655562..7682568cbf77f 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -656,6 +656,7 @@ struct AddressSanitizer {
: UseAfterReturn),
SSGI(SSGI) {
C = &(M.getContext());
+ DL = &M.getDataLayout();
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
Int8PtrTy = Type::getInt8PtrTy(*C);
@@ -734,6 +735,7 @@ struct AddressSanitizer {
};
LLVMContext *C;
+ const DataLayout *DL;
Triple TargetTriple;
int LongSize;
bool CompileKernel;
@@ -1319,8 +1321,9 @@ void AddressSanitizer::getInterestingMemoryOperands(
XCHG->getCompareOperand()->getType(),
std::nullopt);
} else if (auto CI = dyn_cast<CallInst>(I)) {
- if (CI->getIntrinsicID() == Intrinsic::masked_load ||
- CI->getIntrinsicID() == Intrinsic::masked_store) {
+ switch (CI->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store: {
bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store;
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
@@ -1337,7 +1340,24 @@ void AddressSanitizer::getInterestingMemoryOperands(
Alignment = Op->getMaybeAlignValue();
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
- } else {
+ break;
+ }
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store: {
+ auto *VPI = cast<VPIntrinsic>(CI);
+ unsigned IID = CI->getIntrinsicID();
+ bool IsWrite = IID == Intrinsic::vp_store;
+ if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
+ return;
+ unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
+ Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+ MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL);
+ Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
+ VPI->getMaskParam(),
+ VPI->getVectorLengthParam());
+ break;
+ }
+ default:
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
ignoreAccess(I, CI->getArgOperand(ArgNo)))
@@ -1434,17 +1454,35 @@ static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
const DataLayout &DL, Type *IntptrTy,
- Value *Mask, Instruction *I,
+ Value *Mask, Value *EVL, Instruction *I,
Value *Addr, MaybeAlign Alignment,
unsigned Granularity, Type *OpType,
bool IsWrite, Value *SizeArgument,
bool UseCalls, uint32_t Exp) {
auto *VTy = cast<VectorType>(OpType);
-
TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
auto Zero = ConstantInt::get(IntptrTy, 0);
- SplitBlockAndInsertForEachLane(VTy->getElementCount(), IntptrTy, I,
+ IRBuilder IB(I);
+ Instruction *LoopInsertBefore = I;
+ if (EVL) {
+ // The end argument of SplitBlockAndInsertForLane is assumed bigger
+ // than zero, so we should check whether EVL is zero here.
+ Type *EVLType = EVL->getType();
+ Value *IsEVLZero = IB.CreateICmpNE(EVL, ConstantInt::get(EVLType, 0));
+ LoopInsertBefore = SplitBlockAndInsertIfThen(IsEVLZero, I, false);
+ IB.SetInsertPoint(LoopInsertBefore);
+ // Cast EVL to IntptrTy.
+ EVL = IB.CreateZExtOrTrunc(EVL, IntptrTy);
+ // To avoid undefined behavior for extracting with out of range index, use
+ // the minimum of evl and element count as trip count.
+ Value *EC = IB.CreateElementCount(IntptrTy, VTy->getElementCount());
+ EVL = IB.CreateBinaryIntrinsic(Intrinsic::umin, EVL, EC);
+ } else {
+ EVL = IB.CreateElementCount(IntptrTy, VTy->getElementCount());
+ }
+
+ SplitBlockAndInsertForEachLane(EVL, LoopInsertBefore,
[&](IRBuilderBase &IRB, Value *Index) {
Value *MaskElem = IRB.CreateExtractElement(Mask, Index);
if (auto *MaskElemC = dyn_cast<ConstantInt>(MaskElem)) {
@@ -1454,14 +1492,15 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
// Unconditional check
} else {
// Conditional check
- Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, &*IRB.GetInsertPoint(), false);
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(
+ MaskElem, &*IRB.GetInsertPoint(), false);
IRB.SetInsertPoint(ThenTerm);
}
Value *InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index});
- doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, Alignment,
- Granularity, ElemTypeSize, IsWrite, SizeArgument,
- UseCalls, Exp);
+ doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(),
+ InstrumentedAddress, Alignment, Granularity,
+ ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
});
}
@@ -1510,9 +1549,9 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
unsigned Granularity = 1 << Mapping.Scale;
if (O.MaybeMask) {
- instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(),
- Addr, O.Alignment, Granularity, O.OpType,
- O.IsWrite, nullptr, UseCalls, Exp);
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL,
+ O.getInsn(), Addr, O.Alignment, Granularity,
+ O.OpType, O.IsWrite, nullptr, UseCalls, Exp);
} else {
doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment,
Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls,
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5a66100ca9ea6..9d86afa347a3e 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1651,6 +1651,27 @@ void llvm::SplitBlockAndInsertForEachLane(ElementCount EC,
}
}
+void llvm::SplitBlockAndInsertForEachLane(
+ Value *EVL, Instruction *InsertBefore,
+ std::function<void(IRBuilderBase &, Value *)> Func) {
+
+ IRBuilder<> IRB(InsertBefore);
+ Type *Ty = EVL->getType();
+
+ if (!isa<ConstantInt>(EVL)) {
+ auto [BodyIP, Index] = SplitBlockAndInsertSimpleForLoop(EVL, InsertBefore);
+ IRB.SetInsertPoint(BodyIP);
+ Func(IRB, Index);
+ return;
+ }
+
+ unsigned Num = cast<ConstantInt>(EVL)->getZExtValue();
+ for (unsigned Idx = 0; Idx < Num; ++Idx) {
+ IRB.SetInsertPoint(InsertBefore);
+ Func(IRB, ConstantInt::get(Ty, Idx));
+ }
+}
+
BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
new file mode 100644
index 0000000000000..094169c7e8122
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \
+; RUN: | FileCheck %s
+; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
+; RUN: | FileCheck %s -check-prefix=DISABLED
+
+; Support ASan instrumentation for constant-mask llvm.vp.{load,store}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;;;;;;;;;;;;;;;; STORE
+declare void @llvm.vp.store.v4f32.p0(<4 x float>, ptr, <4 x i1>, i32) argmemonly nounwind
+
+define void @store.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
+; CHECK-LABEL: @store.v4f32.variable(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
+; CHECK-NEXT: br label [[TMP9]]
+; CHECK: 9:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: 10:
+; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
+; CHECK-NEXT: ret void
+;
+; DISABLED-LABEL: @store.v4f32.variable(
+; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret void
+;
+ tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> %mask, i32 %evl)
+ ret void
+}
+
+;; Store using two vp.stores, which should instrument them both.
+define void @store.v4f32.1010.split(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
+; CHECK-LABEL: @store.v4f32.1010.split(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT: call void @__asan_store4(i64 [[TMP8]])
+; CHECK-NEXT: br label [[TMP9]]
+; CHECK: 9:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: 10:
+; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
+; CHECK: 12:
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
+; CHECK: .split1:
+; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i64 [[IV2]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
+; CHECK: 16:
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
+; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
+; CHECK-NEXT: call void @__asan_store4(i64 [[TMP18]])
+; CHECK-NEXT: br label [[TMP19]]
+; CHECK: 19:
+; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
+; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
+; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
+; CHECK: .split1.split:
+; CHECK-NEXT: br label [[TMP20]]
+; CHECK: 20:
+; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
+; CHECK-NEXT: ret void
+;
+; DISABLED-LABEL: @store.v4f32.1010.split(
+; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 [[EVL:%.*]])
+; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL]])
+; DISABLED-NEXT: ret void
+;
+ tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 true>, i32 %evl)
+ tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
+ ret void
+}
+
+;; Store using a vp.store after a full store. Shouldn't instrument the second one.
+define void @store.v4f32.0010.after.full.store(ptr align 4 %p, <4 x float> %arg, i32 %evl) sanitize_address {
+; CHECK-LABEL: @store.v4f32.0010.after.full.store(
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT: call void @__asan_store16(i64 [[TMP1]])
+; CHECK-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P]], align 16
+; CHECK-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
+; CHECK-NEXT: ret void
+;
+; DISABLED-LABEL: @store.v4f32.0010.after.full.store(
+; DISABLED-NEXT: store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16
+; DISABLED-NEXT: tail call void @llvm.vp.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret void
+;
+ store <4 x float> %arg, ptr %p
+ tail call void @llvm.vp.store.v4f32.p0(<4 x float> %arg, ptr %p, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, i32 %evl)
+ ret void
+}
+
+;;;;;;;;;;;;;;;; LOAD
+declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32) argmemonly nounwind
+declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32) argmemonly nounwind
+
+define <4 x float> @load.v4f32.variable(ptr align 4 %p, <4 x float> %arg, <4 x i1> %mask, i32 %evl) sanitize_address {
+; CHECK-LABEL: @load.v4f32.variable(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
+; CHECK-NEXT: br label [[TMP9]]
+; CHECK: 9:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: 10:
+; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> [[MASK]], i32 [[EVL]])
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+; DISABLED-LABEL: @load.v4f32.variable(
+; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret <4 x float> [[RES]]
+;
+ %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> %mask, i32 %evl)
+ ret <4 x float> %res
+}
+
+;; Load using two vp.loads, which should instrument them both.
+define <4 x float> @load.v4f32.1001.split(ptr align 4 %p, i32 %evl) sanitize_address {
+; CHECK-LABEL: @load.v4f32.1001.split(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT: call void @__asan_load4(i64 [[TMP8]])
+; CHECK-NEXT: br label [[TMP9]]
+; CHECK: 9:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: 10:
+; CHECK-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i32 [[EVL]], 0
+; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP20:%.*]]
+; CHECK: 12:
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP13]], i64 4)
+; CHECK-NEXT: br label [[DOTSPLIT1:%.*]]
+; CHECK: .split1:
+; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, [[TMP12]] ], [ [[IV2_NEXT:%.*]], [[TMP19:%.*]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i64 [[IV2]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP19]]
+; CHECK: 16:
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 [[IV2]]
+; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
+; CHECK-NEXT: call void @__asan_load4(i64 [[TMP18]])
+; CHECK-NEXT: br label [[TMP19]]
+; CHECK: 19:
+; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i64 [[IV2]], 1
+; CHECK-NEXT: [[IV2_CHECK:%.*]] = icmp eq i64 [[IV2_NEXT]], [[TMP14]]
+; CHECK-NEXT: br i1 [[IV2_CHECK]], label [[DOTSPLIT1_SPLIT:%.*]], label [[DOTSPLIT1]]
+; CHECK: .split1.split:
+; CHECK-NEXT: br label [[TMP20]]
+; CHECK: 20:
+; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
+; CHECK-NEXT: ret <4 x float> [[RES2]]
+;
+; DISABLED-LABEL: @load.v4f32.1001.split(
+; DISABLED-NEXT: [[RES:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P:%.*]], <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 [[EVL:%.*]])
+; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL]])
+; DISABLED-NEXT: ret <4 x float> [[RES2]]
+;
+ %res = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, i32 %evl)
+ %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
+ ret <4 x float> %res2
+}
+
+;; Load using a vp.load after a full load. Shouldn't instrument the second one.
+define <4 x float> @load.v4f32.1001.after.full.load(ptr align 4 %p, i32 %evl) sanitize_address {
+; CHECK-LABEL: @load.v4f32.1001.after.full.load(
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT: call void @__asan_load16(i64 [[TMP1]])
+; CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16
+; CHECK-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
+; CHECK-NEXT: ret <4 x float> [[RES2]]
+;
+; DISABLED-LABEL: @load.v4f32.1001.after.full.load(
+; DISABLED-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
+; DISABLED-NEXT: [[RES2:%.*]] = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr [[P]], <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret <4 x float> [[RES2]]
+;
+ %res = load <4 x float>, ptr %p
+ %res2 = tail call <4 x float> @llvm.vp.load.v4f32.p0(ptr %p, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, i32 %evl)
+ ret <4 x float> %res2
+}
+
+;; Scalable vector tests
+;; ---------------------------
+declare <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
+declare void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float>, ptr, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
+; CHECK-LABEL: @scalable.load.nxv4f32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT: call void @__asan_load4(i64 [[TMP10]])
+; CHECK-NEXT: br label [[TMP11]]
+; CHECK: 11:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP12]]
+; CHECK: 12:
+; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
+;
+; DISABLED-LABEL: @scalable.load.nxv4f32(
+; DISABLED-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
+;
+ %res = tail call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x float> %res
+}
+
+define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
+; CHECK-LABEL: @scalable.store.nxv4f32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[EVL:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP12:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT: call void @__asan_store4(i64 [[TMP10]])
+; CHECK-NEXT: br label [[TMP11]]
+; CHECK: 11:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK: .split.split:
+; CHECK-NEXT: br label [[TMP12]]
+; CHECK: 12:
+; CHECK-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], <vscale x 4 x i1> [[MASK]], i32 [[EVL]])
+; CHECK-NEXT: ret void
+;
+; DISABLED-LABEL: @scalable.store.nxv4f32(
+; DISABLED-NEXT: tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; DISABLED-NEXT: ret void
+;
+ tail call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, <vscale x 4 x i1> %mask, i32 %evl)
+ ret void
+}
More information about the llvm-commits
mailing list