[llvm] 25ee55c - [SLP] match logical and/or as reduction candidates
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 06:04:00 PDT 2021
Author: Sanjay Patel
Date: 2021-07-14T09:02:31-04:00
New Revision: 25ee55c0baff316d3a7b1d7d2830a168af3fc46a
URL: https://github.com/llvm/llvm-project/commit/25ee55c0baff316d3a7b1d7d2830a168af3fc46a
DIFF: https://github.com/llvm/llvm-project/commit/25ee55c0baff316d3a7b1d7d2830a168af3fc46a.diff
LOG: [SLP] match logical and/or as reduction candidates
This has been a work-in-progress for a long time...we finally have all of
the pieces in place to handle vectorization of compare code as shown in:
https://llvm.org/PR41312
To do this (see PhaseOrdering tests), we converted SimplifyCFG and
InstCombine to the poison-safe (select) forms of the logic ops, so now we
need to have SLP recognize those patterns and insert a freeze op to make
a safe reduction:
https://alive2.llvm.org/ce/z/NH54Ah
We get the minimal patterns with this patch, but the PhaseOrdering tests
show that we still need adjustments to get the ideal IR in some or all of
the motivating cases.
Differential Revision: https://reviews.llvm.org/D105730
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c7f36b2de0889..3dd3c1fb613be 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7295,11 +7295,22 @@ class HorizontalReduction {
RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
}
+ // And/or are potentially poison-safe logical patterns like:
+ // select x, y, false
+ // select x, true, y
+ static bool isBoolLogicOp(Instruction *I) {
+ return match(I, m_LogicalAnd(m_Value(), m_Value())) ||
+ match(I, m_LogicalOr(m_Value(), m_Value()));
+ }
+
/// Checks if instruction is associative and can be vectorized.
static bool isVectorizable(RecurKind Kind, Instruction *I) {
if (Kind == RecurKind::None)
return false;
- if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
+
+ // Integer ops that map to select instructions or intrinsics are fine.
+ if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) ||
+ isBoolLogicOp(I))
return true;
if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
@@ -7312,6 +7323,16 @@ class HorizontalReduction {
return I->isAssociative();
}
+ static Value *getRdxOperand(Instruction *I, unsigned Index) {
+ // Poison-safe 'or' takes the form: select X, true, Y
+ // To make that work with the normal operand processing, we skip the
+ // true value operand.
+ // TODO: Change the code and data structures to handle this without a hack.
+ if (getRdxKind(I) == RecurKind::Or && isa<SelectInst>(I) && Index == 1)
+ return I->getOperand(2);
+ return I->getOperand(Index);
+ }
+
/// Checks if the ParentStackElem.first should be marked as a reduction
/// operation with an extra argument or as extra argument itself.
void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
@@ -7420,9 +7441,11 @@ class HorizontalReduction {
return RecurKind::Add;
if (match(I, m_Mul(m_Value(), m_Value())))
return RecurKind::Mul;
- if (match(I, m_And(m_Value(), m_Value())))
+ if (match(I, m_And(m_Value(), m_Value())) ||
+ match(I, m_LogicalAnd(m_Value(), m_Value())))
return RecurKind::And;
- if (match(I, m_Or(m_Value(), m_Value())))
+ if (match(I, m_Or(m_Value(), m_Value())) ||
+ match(I, m_LogicalOr(m_Value(), m_Value())))
return RecurKind::Or;
if (match(I, m_Xor(m_Value(), m_Value())))
return RecurKind::Xor;
@@ -7664,7 +7687,7 @@ class HorizontalReduction {
}
// Visit operands.
- Value *EdgeVal = TreeN->getOperand(EdgeToVisit);
+ Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
if (!EdgeInst) {
// Edge value is not a reduction instruction or a leaf instruction.
@@ -7849,6 +7872,11 @@ class HorizontalReduction {
else
Builder.SetInsertPoint(RdxRootInst);
+ // To prevent poison from leaking across what used to be sequential, safe,
+ // scalar boolean logic operations, the reduction operand must be frozen.
+ if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
+ VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
+
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
index ea8a6c0c90f67..479ad437ba1ac 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -7,31 +7,23 @@ target triple = "x86_64--"
define float @test_merge_allof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_merge_allof_v4sf(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00
-; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1
-; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP4]], i1 false
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2
-; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP9]], i1 false
-; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3
-; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP14]], i1 false
-; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[SPEC_SELECT:%.*]], [[LOR_LHS_FALSE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret float [[COMMON_RET_OP]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00
-; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP18]], i1 [[CMP23]], i1 false
-; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP28]], i1 false
-; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP33]], i1 false
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]]
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]]
+; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]]
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
@@ -99,26 +91,23 @@ return:
define float @test_merge_anyof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_merge_anyof_v4sf(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00
-; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1
-; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]]
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2
-; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP9]]
-; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3
-; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP14]]
-; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP19]]
-; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
-; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
-; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
+; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
+; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
+; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP19]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP24]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP29]]
+; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
+; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP34]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: ret float [[RETVAL_0]]
;
@@ -187,31 +176,23 @@ return:
define float @test_separate_allof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_separate_allof_v4sf(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00
-; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1
-; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP4]], i1 false
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2
-; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP9]], i1 false
-; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3
-; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP14]], i1 false
-; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret float [[COMMON_RET_OP]]
; CHECK: if.end:
-; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00
-; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP18]], i1 [[CMP23]], i1 false
-; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP28]], i1 false
-; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP33]], i1 false
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]]
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]]
+; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]]
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
@@ -282,26 +263,23 @@ return:
define float @test_separate_anyof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_separate_anyof_v4sf(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00
-; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1
-; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]]
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2
-; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP9]]
-; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3
-; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP14]]
-; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP18]]
-; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]]
-; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]]
-; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00
-; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]]
-; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
+; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
+; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
+; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP18]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP23]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP28]]
+; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
+; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP33]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: ret float [[RETVAL_0]]
;
@@ -373,29 +351,21 @@ return:
define float @test_merge_allof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_merge_allof_v4si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1
-; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP2]], i1 false
-; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2
-; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP5]], i1 false
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3
-; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP8]], i1 false
-; CHECK-NEXT: br i1 [[OR_COND2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255
-; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 [[CMP13]], i1 false
-; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP16]], i1 false
-; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP19]], i1 false
-; CHECK-NEXT: br i1 [[OR_COND5]], label [[RETURN]], label [[IF_END:%.*]]
+; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT: br i1 [[TMP5]], label [[RETURN]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
@@ -460,26 +430,23 @@ return:
define float @test_merge_anyof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_merge_anyof_v4si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1
-; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP2]]
-; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2
-; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP5]]
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3
-; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP8]]
-; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[VECEXT]], 255
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP11]]
-; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[VECEXT1]], 255
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP14]]
-; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[VECEXT4]], 255
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP17]]
-; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[VECEXT7]], 255
-; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP20]]
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]]
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[T]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[T]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[T]], i32 0
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255
+; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255
+; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP11]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP14]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP17]]
+; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[TMP0]], 255
+; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP20]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
; CHECK-NEXT: ret float [[RETVAL_0]]
@@ -542,31 +509,23 @@ return:
define i32 @test_separate_allof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_separate_allof_v4si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1
-; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP2]], i1 false
-; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2
-; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP5]], i1 false
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3
-; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP8]], i1 false
-; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1
+; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
; CHECK: if.end:
-; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255
-; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 [[CMP13]], i1 false
-; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP16]], i1 false
-; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP19]], i1 false
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]]
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], i32 0, i32 [[ADD]]
+; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], i32 0, i32 [[ADD]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
@@ -629,31 +588,23 @@ return:
define i32 @test_separate_anyof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_separate_anyof_v4si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1
-; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP2]]
-; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2
-; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1
-; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP5]]
-; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3
-; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1
-; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP8]]
-; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
; CHECK: if.end:
-; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255
-; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255
-; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 true, i1 [[CMP13]]
-; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255
-; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP16]]
-; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255
-; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP19]]
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[VECEXT]], [[VECEXT1]]
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], i32 0, i32 [[ADD]]
+; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[T_FR6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[SHIFT]], [[T]]
+; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[DOTNOT7]], i32 [[ADD]], i32 0
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 141ede2705f03..312217d4af963 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -3,18 +3,10 @@
define i1 @logical_and_icmp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 0
-; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0
-; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0
-; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; CHECK-NEXT: ret i1 [[S3]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP3]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
@@ -32,22 +24,10 @@ define i1 @logical_and_icmp(<4 x i32> %x) {
define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @logical_or_icmp(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0
-; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1
-; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
-; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], [[Y0]]
-; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], [[Y1]]
-; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], [[Y2]]
-; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], [[Y3]]
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 true, i1 [[C1]]
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[C2]]
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 true, i1 [[C3]]
-; CHECK-NEXT: ret i1 [[S3]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP3]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
@@ -69,18 +49,10 @@ define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
define i1 @logical_and_fcmp(<4 x float> %x) {
; CHECK-LABEL: @logical_and_fcmp(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = fcmp olt float [[X0]], 0.000000e+00
-; CHECK-NEXT: [[C1:%.*]] = fcmp olt float [[X1]], 0.000000e+00
-; CHECK-NEXT: [[C2:%.*]] = fcmp olt float [[X2]], 0.000000e+00
-; CHECK-NEXT: [[C3:%.*]] = fcmp olt float [[X3]], 0.000000e+00
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; CHECK-NEXT: ret i1 [[S3]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP3]]
;
%x0 = extractelement <4 x float> %x, i32 0
%x1 = extractelement <4 x float> %x, i32 1
@@ -98,18 +70,10 @@ define i1 @logical_and_fcmp(<4 x float> %x) {
define i1 @logical_or_fcmp(<4 x float> %x) {
; CHECK-LABEL: @logical_or_fcmp(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = fcmp olt float [[X0]], 0.000000e+00
-; CHECK-NEXT: [[C1:%.*]] = fcmp olt float [[X1]], 0.000000e+00
-; CHECK-NEXT: [[C2:%.*]] = fcmp olt float [[X2]], 0.000000e+00
-; CHECK-NEXT: [[C3:%.*]] = fcmp olt float [[X3]], 0.000000e+00
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 true, i1 [[C1]]
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[C2]]
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 true, i1 [[C3]]
-; CHECK-NEXT: ret i1 [[S3]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP3]]
;
%x0 = extractelement <4 x float> %x, i32 0
%x1 = extractelement <4 x float> %x, i32 1
@@ -156,18 +120,10 @@ define i1 @logical_and_icmp_
diff _preds(<4 x i32> %x) {
define i1 @logical_and_icmp_
diff _const(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_
diff _const(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = icmp sgt i32 [[X0]], 0
-; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[X1]], 1
-; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 2
-; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X3]], 3
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; CHECK-NEXT: ret i1 [[S3]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP3]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
@@ -214,25 +170,21 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) {
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp(
-; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
-; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
-; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
-; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
-; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
-; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
-; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
-; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42
-; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
-; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
-; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
-; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
-; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
-; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
-; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
-; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
-; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
-; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
-; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[X]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[TMP4]], 17
+; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[TMP3]], 17
+; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[TMP2]], 17
+; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[D0]]
+; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[D1]]
+; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[D2]]
+; CHECK-NEXT: [[S7:%.*]] = select i1 [[TMP10]], i1 [[D3]], i1 false
; CHECK-NEXT: ret i1 [[S7]]
;
%x0 = extractelement <4 x i32> %x, i32 0
More information about the llvm-commits
mailing list