[llvm] [msan] Handle NEON vector load (PR #130457)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 8 19:40:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
This adds an explicit handler for:
- llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4
- llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4
- llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane
- llvm.aarch64.neon.ld2r, llvm.aarch64.neon.ld3r, llvm.aarch64.neon.ld4r
instead of relying on the default strict handler.
Updates the tests from https://github.com/llvm/llvm-project/pull/125267
---
Patch is 289.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130457.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+100-4)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll (+1110-919)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a077c85ffc410..9ce29ba7618a2 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4073,10 +4073,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CallInst *CI =
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
- // The intrinsic may require floating-point but shadows can be arbitrary
- // bit patterns, of which some would be interpreted as "invalid"
- // floating-point values (NaN etc.); we assume the intrinsic will happily
- // copy them.
+ // The AVX masked load intrinsics do not have integer variants. We use the
+ // floating-point variants, and assume that the intrinsic will happily copy
+ // the shadows even if they are interpreted as "invalid" floating-point
+ // values (NaN etc.).
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
if (!MS.TrackOrigins)
@@ -4242,6 +4242,82 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // Handle Arm NEON vector load intrinsics (vld*).
+ //
+ // The WithLane instructions (ld[234]lane) are similar to:
+ // call {<4 x i32>, <4 x i32>, <4 x i32>}
+ // @llvm.aarch64.neon.ld3lane.v4i32.p0
+ // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
+ // %A)
+ //
+ // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
+ // to:
+ // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
+ void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
+ unsigned int numArgs = I.arg_size();
+
+ // Return type is a struct of vectors of integers or floating-point
+ assert(I.getType()->isStructTy());
+ [[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
+ assert(RetTy->getNumElements() > 0);
+ assert(isa<FixedVectorType>(RetTy->getElementType(0)));
+ assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
+ RetTy->getElementType(0)->isFPOrFPVectorTy());
+ for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
+ assert(RetTy->getElementType(i) == RetTy->getElementType(0));
+
+ if (WithLane) {
+ // 2, 3 or 4 vectors, plus lane number, plus input pointer
+ assert(numArgs >= 4);
+ assert(numArgs <= 6);
+
+ // Return type is a struct of the input vectors
+ assert(RetTy->getNumElements() + 2 == numArgs);
+ for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
+ assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
+ } else
+ assert(numArgs == 1);
+
+ IRBuilder<> IRB(&I);
+
+ SmallVector<Value *, 6> ShadowArgs;
+ if (WithLane) {
+ for (unsigned int i = 0; i < numArgs - 2; i++)
+ ShadowArgs.push_back(getShadow(I.getArgOperand(i)));
+
+ // Lane number, passed verbatim
+ Value *LaneNumber = I.getArgOperand(numArgs - 2);
+ ShadowArgs.push_back(LaneNumber);
+
+ // TODO: blend shadow of lane number into output shadow?
+ insertShadowCheck(LaneNumber, &I);
+ }
+
+ Value *Src = I.getArgOperand(numArgs - 1);
+ assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
+
+ const Align Alignment = Align(1);
+
+ Type *SrcShadowTy = getShadowTy(Src);
+ Value *SrcShadowPtr, *SrcOriginPtr;
+ std::tie(SrcShadowPtr, SrcOriginPtr) =
+ getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
+ ShadowArgs.push_back(SrcShadowPtr);
+
+ CallInst *CI;
+ // The NEON vector load instructions handled by this function all have
+ // integer variants. It is easier to use those rather than trying to cast
+ // a struct of vectors of floats into a struct of vectors of integers.
+ CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
+ setShadow(&I, CI);
+
+ if (!MS.TrackOrigins)
+ return;
+
+ Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
+ setOrigin(&I, PtrSrcOrigin);
+ }
+
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
/// and vst{2,3,4}lane).
///
@@ -4946,6 +5022,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;
+ case Intrinsic::aarch64_neon_ld1x2:
+ case Intrinsic::aarch64_neon_ld1x3:
+ case Intrinsic::aarch64_neon_ld1x4:
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_ld2r:
+ case Intrinsic::aarch64_neon_ld3r:
+ case Intrinsic::aarch64_neon_ld4r: {
+ handleNEONVectorLoad(I, /*WithLane=*/false);
+ break;
+ }
+
+ case Intrinsic::aarch64_neon_ld2lane:
+ case Intrinsic::aarch64_neon_ld3lane:
+ case Intrinsic::aarch64_neon_ld4lane: {
+ handleNEONVectorLoad(I, /*WithLane=*/true);
+ break;
+ }
+
// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
index 9bae334b2831f..99e9ab939847c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
; Forked from llvm/test/CodeGen/AArch64/arm64-ld1.ll
-; Incorrectly handled (handleUnknownInstruction):
+;
+; Explicitly handled (handleNEONVectorLoad):
; - llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4
; - llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4
; - llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane
@@ -19,20 +21,21 @@ define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind #0 {
; and from the argument of the function also defined by ABI (i.e., x0)
; CHECK-LABEL: define %struct.__neon_int8x8x2_t @ld2_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP13]], <8 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
@@ -43,22 +46,25 @@ define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x3_t @ld3_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP15]], <8 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP12]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
@@ -69,24 +75,29 @@ define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x4_t @ld4_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP17]], <8 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP21]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], <8 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP10]], <8 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
@@ -105,20 +116,21 @@ define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x2_t @ld2_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8> } [[TMP13]], <16 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
@@ -129,22 +141,25 @@ define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x3_t @ld3_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP15]], <16 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
@@ -155,24 +170,29 @@ define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x4_t @ld4_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/130457
More information about the llvm-commits
mailing list