[llvm] [msan] Support vst{2,3,4}_lane instructions (PR #101215)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 11:25:42 PDT 2024
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/101215
This generalizes MSan's Arm NEON vst support, to include the lane-specific variants.
This also updates the test from https://github.com/llvm/llvm-project/pull/100645.
>From 3a7ecf540de6510bfa6835493c66e972e58af90b Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Tue, 30 Jul 2024 17:55:43 +0000
Subject: [PATCH] [msan] Support vst{2,3,4}_lane instructions
This generalizes MSan's Arm NEON vst support, to include the lane-specific
variants.
This also updates the test from https://github.com/llvm/llvm-project/pull/100645.
---
.../Instrumentation/MemorySanitizer.cpp | 56 +++--
.../MemorySanitizer/AArch64/neon_vst_lane.ll | 236 +++++++-----------
2 files changed, 126 insertions(+), 166 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 45b3edf2b8f23..fefca9e7c49c7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- /// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
+ /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
+ /// and vst{2,3,4}lane).
///
/// Arm NEON vector store intrinsics have the output address (pointer) as the
- /// last argument, with the initial arguments being the inputs. They return
- /// void.
+ /// last argument, with the initial arguments being the inputs (and lane
+ /// number for vst{2,3,4}lane). They return void.
///
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
/// abcdabcdabcdabcd... into *outP
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
/// writes aaaa...bbbb...cccc...dddd... into *outP
+ /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
/// These instructions can all be instrumented with essentially the same
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
- void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
+ void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
IRBuilder<> IRB(&I);
// Don't use getNumOperands() because it includes the callee
int numArgOperands = I.arg_size();
- assert(numArgOperands >= 1);
- // The last arg operand is the output
+ // The last arg operand is the output (pointer)
+ assert(numArgOperands >= 1);
Value *Addr = I.getArgOperand(numArgOperands - 1);
assert(Addr->getType()->isPointerTy());
+ unsigned int skipTrailingOperands = 1;
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- SmallVector<Value *, 8> Shadows;
- // Every arg operand, other than the last one, is an input vector
- for (int i = 0; i < numArgOperands - 1; i++) {
+ // Second-last operand is the lane number (for vst{2,3,4}lane)
+ if (useLane) {
+ skipTrailingOperands ++;
+ assert(numArgOperands >= (int)skipTrailingOperands);
+ assert(isa<IntegerType>(I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
+ }
+
+ SmallVector<Value *, 8> ShadowArgs;
+ // All the initial operands are the inputs
+ for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
Value *Shadow = getShadow(&I, i);
- Shadows.append(1, Shadow);
+ ShadowArgs.append(1, Shadow);
}
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,20 +3931,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
FixedVectorType *OutputVectorTy = FixedVectorType::get(
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
- (numArgOperands - 1));
+ (numArgOperands - skipTrailingOperands));
Type *OutputShadowTy = getShadowTy(OutputVectorTy);
+ if (useLane)
+ ShadowArgs.append(1, I.getArgOperand(numArgOperands - skipTrailingOperands));
+
Value *OutputShadowPtr, *OutputOriginPtr;
// AArch64 NEON does not need alignment (unless OS requires it)
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
- Shadows.append(1, OutputShadowPtr);
+ ShadowArgs.append(1, OutputShadowPtr);
- // CreateIntrinsic will select the correct (integer) type for the
- // intrinsic; the original instruction I may have either integer- or
- // float-type inputs.
CallInst *CI =
- IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
+ IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);
if (MS.TrackOrigins) {
@@ -3942,8 +3952,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// more accurately track the origins (e.g., if both inputs are
// uninitialized for vst2, we currently blame the second input, even
// though part of the output depends only on the first input).
+ //
+ // This is particularly imprecise for vst{2,3,4}lane, since only one
+ // lane of each input is actually copied to the output.
OriginCombiner OC(this, IRB);
- for (int i = 0; i < numArgOperands - 1; i++)
+ for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++)
OC.Add(I.getArgOperand(i));
const DataLayout &DL = F.getDataLayout();
@@ -4299,7 +4312,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4: {
- handleNEONVectorStoreIntrinsic(I);
+ handleNEONVectorStoreIntrinsic(I, false);
+ break;
+ }
+
+ case Intrinsic::aarch64_neon_st2lane:
+ case Intrinsic::aarch64_neon_st3lane:
+ case Intrinsic::aarch64_neon_st4lane: {
+ handleNEONVectorStoreIntrinsic(I, true);
break;
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
index b7a2721e80e2b..9ed364df3e677 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst_lane.ll
@@ -1601,22 +1601,20 @@ define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) sanitiz
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_16b(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[A]], <16 x i8> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1627,22 +1625,20 @@ define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) sanitize_memory {
define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_8h(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[A]], <8 x i16> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1653,22 +1649,20 @@ define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) sanitize_memory {
define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_4s(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[A]], <4 x i32> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1679,22 +1673,20 @@ define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) sanitize_memory {
define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st2lane_2d(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 8:
; CHECK-NEXT: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[A]], <2 x i64> [[B]], i64 1, ptr [[D]])
; CHECK-NEXT: ret void
;
@@ -1710,22 +1702,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_16b(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1740,22 +1727,17 @@ define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) sanit
define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_8h(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1770,22 +1752,17 @@ define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) saniti
define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_4s(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1800,22 +1777,17 @@ define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) saniti
define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %D) sanitize_memory {
; CHECK-LABEL: define void @st3lane_2d(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i64> [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1835,30 +1807,22 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %E) sanitize_memory {
; CHECK-LABEL: define void @st4lane_16b(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], ptr [[E:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP5]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[E]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 1, ptr [[TMP8]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
-; CHECK: 10:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK: 10:
; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], i64 1, ptr [[E]])
; CHECK-NEXT: ret void
;
@@ -1869,30 +1833,22 @@ define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D,
define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %E) sanitize_memory {
; CHECK-LABEL: define void @st4lane_8h(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i16> [[D:%.*]], ptr [[E:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP5]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[E]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i64 1, ptr [[TMP8]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
-; CHECK: 10:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK: 10:
; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[A]], <8 x i16> [[B]], <8 x i16> [[C]], <8 x i16> [[D]], i64 1, ptr [[E]])
; CHECK-NEXT: ret void
;
@@ -1903,30 +1859,22 @@ define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D,
define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %E) sanitize_memory {
; CHECK-LABEL: define void @st4lane_4s(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i32> [[D:%.*]], ptr [[E:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP5]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[E]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i64 1, ptr [[TMP8]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
-; CHECK: 10:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK: 10:
; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> [[C]], <4 x i32> [[D]], i64 1, ptr [[E]])
; CHECK-NEXT: ret void
;
@@ -1937,30 +1885,22 @@ define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D,
define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %E) sanitize_memory {
; CHECK-LABEL: define void @st4lane_2d(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i64> [[C:%.*]], <2 x i64> [[D:%.*]], ptr [[E:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP5]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[E]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i64 1, ptr [[TMP8]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
-; CHECK: 10:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK: 10:
; CHECK-NEXT: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[A]], <2 x i64> [[B]], <2 x i64> [[C]], <2 x i64> [[D]], i64 1, ptr [[E]])
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list