[llvm] [LV] Improve AnyOf reduction codegen. (PR #78304)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 16 08:00:21 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Update AnyOf reduction code generation to only keep track of the AnyOf property in a boolean vector in the loop, only selecting either the new or start value in the middle block.
The patch incorporates feedback from https://reviews.llvm.org/D153697.
This fixes the #<!-- -->62565, as now there aren't multiple uses of the start/new values.
Fixes https://github.com/llvm/llvm-project/issues/62565
---
Patch is 78.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78304.diff
10 Files Affected:
- (modified) llvm/include/llvm/Transforms/Utils/LoopUtils.h (-9)
- (modified) llvm/lib/Transforms/Utils/LoopUtils.cpp (+6-18)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+1)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+41-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll (+15-23)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll (+44-76)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll (+15-14)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp.ll (+74-72)
- (modified) llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll (+18-25)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 5a1385d01d8e44..3bad7b616d9d75 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -363,15 +363,6 @@ Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK);
/// Returns the comparison predicate used when expanding a min/max reduction.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
-/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
-/// are trying to match. In this pattern, we are only ever selecting between two
-/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
-/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
-/// to \p StartVal, select the loop invariant value. This is done by selecting
-/// \p Right iff \p Left is equal to \p StartVal.
-Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
- Value *Left, Value *Right);
-
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
/// The Builder's fast-math-flags must be set to propagate the expected values.
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 59485126b280ab..c0582fb7d7e150 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -962,15 +962,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
}
}
-Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
- RecurKind RK, Value *Left, Value *Right) {
- if (auto VTy = dyn_cast<VectorType>(Left->getType()))
- StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
- Value *Cmp =
- Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
- return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
-}
-
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
@@ -1079,16 +1070,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
NewVal = SI->getTrueValue();
}
- // Create a splat vector with the new value and compare this to the vector
- // we want to reduce.
- ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
- Value *Right = Builder.CreateVectorSplat(EC, InitVal);
- Value *Cmp =
- Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
-
// If any predicate is true it means that we want to select the new value.
- Cmp = Builder.CreateOrReduce(Cmp);
- return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
+ Value *AnyOf =
+ Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
+ // The compares in the loop may yield poison, which propagates through the
+ // bitwise ORs. Freeze it here before the condition is used.
+ AnyOf = Builder.CreateFreeze(AnyOf);
+ return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
}
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index a7ebf78e54ceb6..9d3ef5b96c72fb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -68,6 +68,7 @@ class VPBuilder {
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
+ VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa5d1bfa57d535..973bcf4f6c2d7c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7428,7 +7428,8 @@ static void createAndCollectMergePhiForReduction(
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
+ TrackingVH<Value> ReductionStartValue =
+ State.get(PhiR->getStartValue(), VPIteration(0, 0));
Value *FinalValue =
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
auto *ResumePhi =
@@ -7452,7 +7453,7 @@ static void createAndCollectMergePhiForReduction(
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
Incoming);
else
- BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
+ BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
}
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -9054,6 +9055,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ // Adjust AnyOf reductions; replace the reduction phi for the selected value
+ // with a boolean reduction phi node to check if the condition is true in
+ // any iteration. The final value is selected by the final
+ // ComputeReductionResult.
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
+ return isa<VPWidenSelectRecipe>(U) ||
+ (isa<VPReplicateRecipe>(U) &&
+ cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
+ Instruction::Select);
+ }));
+ VPValue *Cmp = Select->getOperand(0);
+ // If the compare is checking the reduction PHI node, adjust it to check
+ // the start value.
+ if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
+ for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
+ if (CmpR->getOperand(I) == PhiR)
+ CmpR->setOperand(I, PhiR->getStartValue());
+ }
+ VPBuilder::InsertPointGuard Guard(Builder);
+ Builder.setInsertPoint(Select);
+
+ // If the true value of the select is the reduction phi, the new value is
+ // selected if the negated condition is true in any iteration.
+ if (Select->getOperand(1) == PhiR)
+ Cmp = Builder.createNot(Cmp);
+ VPValue *Or = Builder.createOr(PhiR, Cmp);
+ Select->getVPSingleValue()->replaceAllUsesWith(Or);
+
+ // Convert the reduction phi to operate on bools.
+ PhiR->setOperand(0, Plan->getVPValueOrAddLiveIn(ConstantInt::getFalse(
+ OrigLoop->getHeader()->getContext())));
+ }
+
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
@@ -9086,7 +9122,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
- if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
+ if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
+ !RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxTy = RdxDesc.getRecurrenceType();
auto *Trunc =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c553e2c9e76839..f87461fd548bd7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -452,8 +452,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
TrackingVH<Value> ReductionStartValue =
RdxDesc.getRecurrenceStartValue();
- ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
- ReducedPartRdx, RdxPart);
+ ReducedPartRdx = Builder.CreateOr(ReducedPartRdx, RdxPart);
} else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -461,7 +460,9 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if (State.VF.isVector() && !PhiR->isInLoop()) {
+ if ((State.VF.isVector() ||
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
+ !PhiR->isInLoop()) {
ReducedPartRdx =
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
index 1c26ee8479e578..493ffd2ca569de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -8,13 +8,12 @@ target triple = "aarch64-linux-gnu"
define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
+; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp
@@ -62,20 +61,15 @@ exit: ; preds = %for.body
define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
; CHECK-VF4IC1-LABEL: @select_i32_from_icmp
; CHECK-VF4IC1: vector.ph:
-; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %b, i64 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32>
+; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32>
; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[SPLAT_OF_B]]
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0
-; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[FIN_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_CMP]])
+; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a
; CHECK-VF4IC4-LABEL: @select_i32_from_icmp
@@ -101,13 +95,12 @@ exit: ; preds = %for.body
define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp
; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float>
; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
+; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2
; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp
@@ -156,16 +149,15 @@ exit: ; preds = %for.body
define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 {
; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp
; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
+; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i1> [[VEC_SEL_TMP]], <vscale x 4 x i1> [[VEC_PHI]]
; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], zeroinitializer
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
+; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0
; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
index 34a7987bb40abe..6c50915c88e824 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
@@ -12,25 +12,22 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/78304
More information about the llvm-commits
mailing list