[llvm] 4c997e1 - [SLP]Fix PR70507: emit freeeze whenever required for bool logical ops in
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 12:25:53 PDT 2023
Author: Alexey Bataev
Date: 2023-10-31T12:23:12-07:00
New Revision: 4c997e1536e4f5b43c38f6a4325441a7804d0da1
URL: https://github.com/llvm/llvm-project/commit/4c997e1536e4f5b43c38f6a4325441a7804d0da1
DIFF: https://github.com/llvm/llvm-project/commit/4c997e1536e4f5b43c38f6a4325441a7804d0da1.diff
LOG: [SLP]Fix PR70507: emit freeeze whenever required for bool logical ops in
the middle of reduction ops.
Need to emit freeze instruction not only in the case, where the root is
bool logical op, but also if we reduce several scalars, but unable to
say precisely, if the root is bool logical op.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4bb6301f4612f52..58974eefc4df580 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14122,6 +14122,10 @@ class HorizontalReduction {
// Initialize the final value in the reduction.
return Res;
};
+ bool AnyBoolLogicOp =
+ any_of(ReductionOps.back(), [](Value *V) {
+ return isBoolLogicOp(cast<Instruction>(V));
+ });
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
@@ -14454,7 +14458,9 @@ class HorizontalReduction {
// To prevent poison from leaking across what used to be sequential,
// safe, scalar boolean logic operations, the reduction operand must be
// frozen.
- if (isBoolLogicOp(RdxRootInst))
+ if ((isBoolLogicOp(RdxRootInst) ||
+ (AnyBoolLogicOp && VL.size() != TrackedVals.size())) &&
+ !isGuaranteedNotToBePoison(VectorizedRoot))
VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
// Emit code to correctly handle reused reduced values, if required.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
index 1070c49093821ef..481ff15a523f8a9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
@@ -26,8 +26,9 @@ define i1 @test1(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[B]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[C]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 true, i1 [[CMP3]]
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%cmp = icmp sgt i32 %x, 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
index 45fa59bb61fe1c3..f032d4b6ecd4556 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll
@@ -9,14 +9,13 @@ define void @tes() {
; CHECK: 1:
; CHECK-NEXT: [[TMP2:%.*]] = select i1 false, i1 false, i1 false
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP4]], i1 false
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP2]], i1 [[OP_RDX]], i1 false
-; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP5:%.*]], label [[TMP6:%.*]]
+; CHECK: 5:
; CHECK-NEXT: ret void
-; CHECK: 7:
+; CHECK: 6:
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list