[llvm] [msan] Add handlePairwiseShadowOrIntrinsic and use it to handle Arm NEON pairwise add (PR #126008)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 21:29:51 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
This patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled).
Updates the tests from https://github.com/llvm/llvm-project/pull/125820.
---
Patch is 75.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126008.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+62)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll (+192-238)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll (+35-21)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5df51de9e24cb3..fc5f5079c59f0d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2602,6 +2602,57 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SC.Done(&I);
}
+ /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
+ /// fields.
+ ///
+ /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
+ /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
+ ///
+ /// TODO: adapt this function to handle horizontal add/sub?
+ void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) {
+ assert(I.arg_size() == 1 || I.arg_size() == 2);
+
+ assert(I.getType()->isVectorTy());
+ assert(I.getArgOperand(0)->getType()->isVectorTy());
+
+ FixedVectorType *ParamType =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType());
+ if (I.arg_size() == 2)
+ assert(ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType()));
+ FixedVectorType *ReturnType = cast<FixedVectorType>(I.getType());
+ assert(ParamType->getNumElements() * I.arg_size() ==
+ 2 * ReturnType->getNumElements());
+
+ IRBuilder<> IRB(&I);
+ unsigned Width = ParamType->getNumElements() * I.arg_size();
+
+ // Horizontal OR of shadow
+ SmallVector<int, 8> EvenMask;
+ SmallVector<int, 8> OddMask;
+ for (unsigned X = 0; X < Width; X += 2) {
+ EvenMask.append(1, X);
+ OddMask.append(1, X + 1);
+ }
+
+ Value *EvenShadow;
+ Value *OddShadow;
+ if (I.arg_size() == 2) {
+ EvenShadow =
+ IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), EvenMask);
+ OddShadow =
+ IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), OddMask);
+ } else {
+ EvenShadow = IRB.CreateShuffleVector(getShadow(&I, 0), EvenMask);
+ OddShadow = IRB.CreateShuffleVector(getShadow(&I, 0), OddMask);
+ }
+
+ Value *OrShadow = IRB.CreateOr(EvenShadow, OddShadow);
+ OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I));
+
+ setShadow(&I, OrShadow);
+ setOriginForNaryOp(I);
+ }
+
void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
// Handle multiplication by constant.
@@ -4776,6 +4827,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
break;
+ // Add Pairwise
+ case Intrinsic::aarch64_neon_addp:
+ // Floating-point Add Pairwise
+ case Intrinsic::aarch64_neon_faddp:
+ // Add Long Pairwise
+ case Intrinsic::aarch64_neon_saddlp:
+ case Intrinsic::aarch64_neon_uaddlp: {
+ handlePairwiseShadowOrIntrinsic(I);
+ break;
+ }
+
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
index f9b223dc420b92..ad0856d38c1e9a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
@@ -1601,7 +1601,7 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1611,15 +1611,12 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
;
%tmpvar1 = load <8 x i8>, ptr %A
@@ -1633,7 +1630,7 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1643,15 +1640,12 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%tmpvar1 = load <4 x i16>, ptr %A
@@ -1665,7 +1659,7 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1675,15 +1669,12 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> <i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
;
%tmpvar1 = load <2 x i32>, ptr %A
@@ -1697,7 +1688,7 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1707,15 +1698,12 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[TMP9]] to <8 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%tmpvar1 = load <16 x i8>, ptr %A
@@ -1729,7 +1717,7 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1739,15 +1727,12 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%tmpvar1 = load <8 x i16>, ptr %A
@@ -1761,7 +1746,7 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1771,15 +1756,12 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i32> [[TMP9]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%tmpvar1 = load <4 x i32>, ptr %A
@@ -1801,7 +1783,7 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1811,15 +1793,12 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
;
%tmpvar1 = load <8 x i8>, ptr %A
@@ -1833,7 +1812,7 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1843,15 +1822,12 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%tmpvar1 = load <4 x i16>, ptr %A
@@ -1865,7 +1841,7 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1875,15 +1851,12 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> <i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
;
%tmpvar1 = load <2 x i32>, ptr %A
@@ -1897,7 +1870,7 @@ define ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126008
More information about the llvm-commits
mailing list