[llvm] c153c61 - Handle instrumentation of scalar single-precision (_ss) intrinsics
Vitaly Buka via llvm-commits
llvm-commits at lists.llvm.org
Thu May 19 13:57:05 PDT 2022
Author: Nicolas Capens
Date: 2022-05-19T13:56:51-07:00
New Revision: c153c61fadf9499062981658113335bcd43c33fe
URL: https://github.com/llvm/llvm-project/commit/c153c61fadf9499062981658113335bcd43c33fe
DIFF: https://github.com/llvm/llvm-project/commit/c153c61fadf9499062981658113335bcd43c33fe.diff
LOG: Handle instrumentation of scalar single-precision (_ss) intrinsics
Instrumentation of scalar double-precision intrinsics such as
x86_sse41_round_sd was already handled by https://reviews.llvm.org/D82398,
but not their single-precision counterparts.
https://issuetracker.google.com/172238865
Reviewed By: vitalybuka
Differential Revision: https://reviews.llvm.org/D124871
Added:
Modified:
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll
llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 79adb5639dfbf..bc1a340fdc7e3 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3200,27 +3200,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SOC.Done(&I);
}
- // Instrument _mm_*_sd intrinsics
- void handleUnarySdIntrinsic(IntrinsicInst &I) {
+ // Instrument _mm_*_sd|ss intrinsics
+ void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
+ unsigned Width =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
Value *First = getShadow(&I, 0);
Value *Second = getShadow(&I, 1);
- // High word of first operand, low word of second
- Value *Shadow =
- IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
+ // First element of second operand, remaining elements of first operand
+ SmallVector<int, 16> Mask;
+ Mask.push_back(Width);
+ for (unsigned i = 1; i < Width; i++)
+ Mask.push_back(i);
+ Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
setShadow(&I, Shadow);
setOriginForNaryOp(I);
}
- void handleBinarySdIntrinsic(IntrinsicInst &I) {
+ void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
+ unsigned Width =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
Value *First = getShadow(&I, 0);
Value *Second = getShadow(&I, 1);
Value *OrShadow = IRB.CreateOr(First, Second);
- // High word of first operand, low word of both OR'd together
- Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
- llvm::makeArrayRef<int>({2, 1}));
+ // First element of both OR'd together, remaining elements of first operand
+ SmallVector<int, 16> Mask;
+ Mask.push_back(Width);
+ for (unsigned i = 1; i < Width; i++)
+ Mask.push_back(i);
+ Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
setShadow(&I, Shadow);
setOriginForNaryOp(I);
@@ -3495,11 +3505,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
case Intrinsic::x86_sse41_round_sd:
- handleUnarySdIntrinsic(I);
+ case Intrinsic::x86_sse41_round_ss:
+ handleUnarySdSsIntrinsic(I);
break;
case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse2_min_sd:
- handleBinarySdIntrinsic(I);
+ case Intrinsic::x86_sse_min_ss:
+ handleBinarySdSsIntrinsic(I);
break;
case Intrinsic::fshl:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll
index 2cfaaccd32eb5..8972027e40c23 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/sse-intrinsics-x86.ll
@@ -249,9 +249,10 @@ define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
-; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -281,9 +282,10 @@ define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 {
; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
-; CHECK-NEXT: store <4 x i32> [[_MSPROP]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
; CHECK-NEXT: ret <4 x float> [[RES]]
;
%res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll
index 0fd304e358744..46724491940e2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/sse41-intrinsics-x86.ll
@@ -432,30 +432,29 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>
}
-define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_x86_sse41_round_ss(
-; CHECK-DAG: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
-; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8
+define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, <4 x float>* %a1) #0 {
+; CHECK-LABEL: @test_x86_sse41_round_ss_load(
+; CHECK-DAG: [[TMP1:%.*]] = load i64, i64* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to i64*), align 8
+; CHECK-DAG: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
-; CHECK: 7:
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_with_origin_noreturn(i32 0) #[[ATTR3]]
; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 7)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
+; CHECK: 4:
+; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, <4 x float>* [[A1:%.*]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <4 x float>* [[A1]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i32>*
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7)
+; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8
; CHECK-NEXT: ret <4 x float> [[RES]]
;
- %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ %a1b = load <4 x float>, <4 x float>* %a1
+ %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
More information about the llvm-commits
mailing list