[llvm] [msan] Add handlers for AVX masked load/store intrinsics (PR #123857)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 16:20:17 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-compiler-rt-sanitizer
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
This patch adds explicit support for AVX masked load/store intrinsics, largely by applying the intrinsics to the shadows (but subtly different to handleIntrinsicByApplyingToShadow()).
We do not reuse the handleMaskedLoad/Store functions. The key challenge is that the LLVM masked intrinsics require a vector of booleans, while AVX masked intrinsics use the MSBs of a vector of integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that the x86 backend does not know how to efficiently convert from a vector of booleans back into the AVX mask format; therefore, they (and we) do not reduce AVX masked intrinsics into LLVM masked intrinsics.
---
Patch is 76.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123857.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+137)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll (+88-72)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll (+80-72)
- (modified) llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll (+88-72)
- (modified) llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll (+80-72)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 587e5c1cc842e0..ba913fafb08bdd 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3751,6 +3751,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, Origin);
}
+ // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
+ // dst mask src
+ //
+ // Note: it is difficult to combine this function with handleMaskedStore. The
+ // key challenge is that the LLVM masked intrinsics require a vector of
+ // booleans, while AVX masked intrinsics use the MSBs of a vector of
+ // integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that
+ // the x86 backend does not know how to efficiently convert from a vector of
+ // booleans back into the AVX mask format; therefore, they (and we) do not
+ // reduce AVX masked intrinsics into LLVM masked intrinsics.
+ void handleAVXMaskedStore(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+
+ Value *Dst = I.getArgOperand(0);
+ assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
+
+ Value *Mask = I.getArgOperand(1);
+ assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
+
+ Value *Src = I.getArgOperand(2);
+ assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
+
+ const Align Alignment = Align(1);
+
+ Value *SrcShadow = getShadow(Src);
+
+ if (ClCheckAccessAddress) {
+ insertShadowCheck(Dst, &I);
+ insertShadowCheck(Mask, &I);
+ }
+
+ Value *DstShadowPtr;
+ Value *DstOriginPtr;
+ std::tie(DstShadowPtr, DstOriginPtr) = getShadowOriginPtr(
+ Dst, IRB, SrcShadow->getType(), Alignment, /*isStore*/ true);
+
+ SmallVector<Value *, 2> ShadowArgs;
+ ShadowArgs.append(1, DstShadowPtr);
+ ShadowArgs.append(1, Mask);
+ // The intrinsic may require floating-point but shadows can be arbitrary
+ // bit patterns, of which some would be interpreted as "invalid"
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
+ // copy them.
+ ShadowArgs.append(1, IRB.CreateBitCast(SrcShadow, Src->getType()));
+
+ CallInst *CI =
+ IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
+ setShadow(&I, CI);
+
+ if (!MS.TrackOrigins)
+ return;
+
+ // Approximation only
+ auto &DL = F.getDataLayout();
+ paintOrigin(IRB, getOrigin(Src), DstOriginPtr,
+ DL.getTypeStoreSize(SrcShadow->getType()),
+ std::max(Alignment, kMinOriginAlignment));
+ }
+
+ // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
+ // return src mask
+ //
+ // Masked-off values are replaced with 0, which conveniently also represents
+ // initialized memory.
+ //
+ // We do not combine this with handleMaskedLoad; see comment in
+ // handleAVXMaskedStore for the rationale.
+ //
+ // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
+ // because we need to apply getShadowOriginPtr, not getShadow, to the first
+ // parameter.
+ void handleAVXMaskedLoad(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+
+ Value *Src = I.getArgOperand(0);
+ assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
+
+ Value *Mask = I.getArgOperand(1);
+ assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
+
+ const Align Alignment = Align(1);
+
+ if (ClCheckAccessAddress) {
+ insertShadowCheck(Mask, &I);
+ }
+
+ Type *SrcShadowTy = getShadowTy(Src);
+ Value *SrcShadowPtr, *SrcOriginPtr;
+ std::tie(SrcShadowPtr, SrcOriginPtr) =
+ getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
+
+ SmallVector<Value *, 2> ShadowArgs;
+ ShadowArgs.append(1, SrcShadowPtr);
+ ShadowArgs.append(1, Mask);
+
+ CallInst *CI =
+ IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
+ // The intrinsic may require floating-point but shadows can be arbitrary
+ // bit patterns, of which some would be interpreted as "invalid"
+ // floating-point values (NaN etc.); we assume the intrinsic will happily
+ // copy them.
+ setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
+
+ if (!MS.TrackOrigins)
+ return;
+
+ // The "pass-through" value is always zero (initialized). To the extent
+ // that that results in initialized aligned 4-byte chunks, the origin value
+ // is ignored. It is therefore correct to simply copy the origin from src.
+ Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
+ setOrigin(&I, PtrSrcOrigin);
+ }
+
// Instrument BMI / BMI2 intrinsics.
// All of these intrinsics are Z = I(X, Y)
// where the types of all operands and the result match, and are either i32 or
@@ -4408,6 +4521,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVtestIntrinsic(I);
break;
+ case Intrinsic::x86_avx_maskstore_ps:
+ case Intrinsic::x86_avx_maskstore_pd:
+ case Intrinsic::x86_avx_maskstore_ps_256:
+ case Intrinsic::x86_avx_maskstore_pd_256:
+ case Intrinsic::x86_avx2_maskstore_d:
+ case Intrinsic::x86_avx2_maskstore_q:
+ case Intrinsic::x86_avx2_maskstore_d_256:
+ case Intrinsic::x86_avx2_maskstore_q_256: {
+ handleAVXMaskedStore(I);
+ break;
+ }
+
+ case Intrinsic::x86_avx_maskload_ps:
+ case Intrinsic::x86_avx_maskload_pd:
+ case Intrinsic::x86_avx_maskload_ps_256:
+ case Intrinsic::x86_avx_maskload_pd_256:
+ case Intrinsic::x86_avx2_maskload_d:
+ case Intrinsic::x86_avx2_maskload_q:
+ case Intrinsic::x86_avx2_maskload_d_256:
+ case Intrinsic::x86_avx2_maskload_q_256: {
+ handleAVXMaskedLoad(I);
+ break;
+ }
+
case Intrinsic::fshl:
case Intrinsic::fshr:
handleFunnelShift(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index 48ecd53b40c72a..8ccdc74eab6e05 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -520,20 +520,22 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr) nounwind readonly
define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 {
; CHECK-LABEL: @test_x86_avx_maskload_pd(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[TMP4]], <2 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]])
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[A0]], <2 x i64> [[MASK]])
+; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x double> [[RES]]
;
%res = call <2 x double> @llvm.x86.avx.maskload.pd(ptr %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
@@ -544,20 +546,22 @@ declare <2 x double> @llvm.x86.avx.maskload.pd(ptr, <2 x i64>) nounwind readonly
define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 {
; CHECK-LABEL: @test_x86_avx_maskload_pd_256(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[TMP4]], <4 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[TMP5]] to <4 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]])
-; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[A0]], <4 x i64> [[MASK]])
+; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x double> [[RES]]
;
%res = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
@@ -568,20 +572,22 @@ declare <4 x double> @llvm.x86.avx.maskload.pd.256(ptr, <4 x i64>) nounwind read
define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 {
; CHECK-LABEL: @test_x86_avx_maskload_ps(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[TMP4]], <4 x i32> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[A0]], <4 x i32> [[MASK]])
+; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.avx.maskload.ps(ptr %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
@@ -592,20 +598,22 @@ declare <4 x float> @llvm.x86.avx.maskload.ps(ptr, <4 x i32>) nounwind readonly
define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 {
; CHECK-LABEL: @test_x86_avx_maskload_ps_256(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[TMP4]], <8 x i32> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x float> [[TMP5]] to <8 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[A0]], <8 x i32> [[MASK]])
+; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x float> [[RES]]
;
%res = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
@@ -616,23 +624,25 @@ declare <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>) nounwind reado
define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2) #0 {
; CHECK-LABEL: @test_x86_avx_maskstore_pd(
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to <2 x double>
+; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[TMP6]], <2 x i64> [[MASK:%.*]], <2 x double> [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]], <2 x double> [[A2:%.*]])
+; CHECK: 10:
+; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[A0]], <2 x i64> [[MASK]], <2 x double> [[A2:%.*]])
; CHECK-NEXT: ret void
;
call void @llvm.x86.avx.maskstore.pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2)
@@ -643,23 +653,25 @@ declare void @llvm.x86.avx.maskstore.pd(ptr, <2 x i64>, <2 x double>) nounwind
define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double> %a2) #0 {
; CHECK-LABEL: @test_x86_avx_maskstore_pd_256(
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP3]] to <4 x double>
+; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[TMP6]], <4 x i64> [[MASK:%.*]], <4 x double> [[TMP7]])
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]], <4 x double> [[A2:%.*]])
+; CHECK: 10:
+; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[A0]], <4 x i64> [[MASK]], <4 x double> [[A2:%.*]])
; CHECK-NEXT: ret void
;
call void @llvm.x86.avx.maskstore.pd.256(ptr %a0, <4 x i64> %mask, <4 x double> %a2)
@@ -670,23 +682,25 @@ declare void @llvm.x86.avx.maskstore.pd.256(ptr, <4 x i64>, <4 x double>) nounwi
define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2) #0 {
; CHECK-LABEL: @test_x86_avx_maskstore_ps(
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float>
+; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps(ptr [[TMP6]], <4 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/123857
More information about the llvm-commits
mailing list