[llvm] 66be00d - [VPlan] Introduce m_Cmp; match more compares (#154771)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 24 05:27:09 PDT 2025
Author: Ramkumar Ramachandra
Date: 2025-08-24T13:27:06+01:00
New Revision: 66be00d6352c9537a9e4213f2ca813aeffebca29
URL: https://github.com/llvm/llvm-project/commit/66be00d6352c9537a9e4213f2ca813aeffebca29
DIFF: https://github.com/llvm/llvm-project/commit/66be00d6352c9537a9e4213f2ca813aeffebca29.diff
LOG: [VPlan] Introduce m_Cmp; match more compares (#154771)
Extend [Specific]Cmp_match to handle floating-point compares, and
introduce m_Cmp that matches both integer and floating-point compares.
Use it in simplifyRecipe to match and simplify the general case of
compares. The change has necessitated a bugfix in
VPReplicateRecipe::execute.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
llvm/test/Transforms/LoopVectorize/select-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a669e5938a12d..a0f306c12754f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7002,12 +7002,12 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
if (Instruction *UI = GetInstructionForCost(&R)) {
// If we adjusted the predicate of the recipe, the cost in the legacy
// cost model may be
diff erent.
- if (auto *WidenCmp = dyn_cast<VPWidenRecipe>(&R)) {
- if ((WidenCmp->getOpcode() == Instruction::ICmp ||
- WidenCmp->getOpcode() == Instruction::FCmp) &&
- WidenCmp->getPredicate() != cast<CmpInst>(UI)->getPredicate())
- return true;
- }
+ using namespace VPlanPatternMatch;
+ CmpPredicate Pred;
+ if (match(&R, m_Cmp(Pred, m_VPValue(), m_VPValue())) &&
+ cast<VPRecipeWithIRFlags>(R).getPredicate() !=
+ cast<CmpInst>(UI)->getPredicate())
+ return true;
SeenInstrs.insert(UI);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6814dc5de6716..d6bc462a0dfab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -805,6 +805,9 @@ class VPIRFlags {
GEPNoWrapFlags getGEPNoWrapFlags() const { return GEPFlags; }
+ /// Returns true if the recipe has a comparison predicate.
+ bool hasPredicate() const { return OpType == OperationType::Cmp; }
+
/// Returns true if the recipe has fast-math flags.
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 214d5228b535e..1ec6ae677374c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -397,16 +397,24 @@ m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_c_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
-/// ICmp_match is a variant of BinaryRecipe_match that also binds the comparison
-/// predicate.
-template <typename Op0_t, typename Op1_t> struct ICmp_match {
+/// Cmp_match is a variant of BinaryRecipe_match that also binds the comparison
+/// predicate. Opcodes must either be Instruction::ICmp or Instruction::FCmp, or
+/// both.
+template <typename Op0_t, typename Op1_t, unsigned... Opcodes>
+struct Cmp_match {
+ static_assert((sizeof...(Opcodes) == 1 || sizeof...(Opcodes) == 2) &&
+ "Expected one or two opcodes");
+ static_assert(
+ ((Opcodes == Instruction::ICmp || Opcodes == Instruction::FCmp) && ...) &&
+ "Expected a compare instruction opcode");
+
CmpPredicate *Predicate = nullptr;
Op0_t Op0;
Op1_t Op1;
- ICmp_match(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1)
+ Cmp_match(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1)
: Predicate(&Pred), Op0(Op0), Op1(Op1) {}
- ICmp_match(const Op0_t &Op0, const Op1_t &Op1) : Op0(Op0), Op1(Op1) {}
+ Cmp_match(const Op0_t &Op0, const Op1_t &Op1) : Op0(Op0), Op1(Op1) {}
bool match(const VPValue *V) const {
auto *DefR = V->getDefiningRecipe();
@@ -414,7 +422,7 @@ template <typename Op0_t, typename Op1_t> struct ICmp_match {
}
bool match(const VPRecipeBase *V) const {
- if (m_Binary<Instruction::ICmp>(Op0, Op1).match(V)) {
+ if ((m_Binary<Opcodes>(Op0, Op1).match(V) || ...)) {
if (Predicate)
*Predicate = cast<VPRecipeWithIRFlags>(V)->getPredicate();
return true;
@@ -423,38 +431,63 @@ template <typename Op0_t, typename Op1_t> struct ICmp_match {
}
};
-/// SpecificICmp_match is a variant of ICmp_match that matches the comparison
+/// SpecificCmp_match is a variant of Cmp_match that matches the comparison
/// predicate, instead of binding it.
-template <typename Op0_t, typename Op1_t> struct SpecificICmp_match {
+template <typename Op0_t, typename Op1_t, unsigned... Opcodes>
+struct SpecificCmp_match {
const CmpPredicate Predicate;
Op0_t Op0;
Op1_t Op1;
- SpecificICmp_match(CmpPredicate Pred, const Op0_t &LHS, const Op1_t &RHS)
+ SpecificCmp_match(CmpPredicate Pred, const Op0_t &LHS, const Op1_t &RHS)
: Predicate(Pred), Op0(LHS), Op1(RHS) {}
bool match(const VPValue *V) const {
CmpPredicate CurrentPred;
- return ICmp_match<Op0_t, Op1_t>(CurrentPred, Op0, Op1).match(V) &&
+ return Cmp_match<Op0_t, Op1_t, Opcodes...>(CurrentPred, Op0, Op1)
+ .match(V) &&
CmpPredicate::getMatching(CurrentPred, Predicate);
}
};
template <typename Op0_t, typename Op1_t>
-inline ICmp_match<Op0_t, Op1_t> m_ICmp(const Op0_t &Op0, const Op1_t &Op1) {
- return ICmp_match<Op0_t, Op1_t>(Op0, Op1);
+inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp> m_ICmp(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return Cmp_match<Op0_t, Op1_t, Instruction::ICmp>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline ICmp_match<Op0_t, Op1_t> m_ICmp(CmpPredicate &Pred, const Op0_t &Op0,
- const Op1_t &Op1) {
- return ICmp_match<Op0_t, Op1_t>(Pred, Op0, Op1);
+inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp>
+m_ICmp(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1) {
+ return Cmp_match<Op0_t, Op1_t, Instruction::ICmp>(Pred, Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline SpecificICmp_match<Op0_t, Op1_t>
+inline SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp>
m_SpecificICmp(CmpPredicate MatchPred, const Op0_t &Op0, const Op1_t &Op1) {
- return SpecificICmp_match<Op0_t, Op1_t>(MatchPred, Op0, Op1);
+ return SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp>(MatchPred, Op0,
+ Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
+m_Cmp(const Op0_t &Op0, const Op1_t &Op1) {
+ return Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(Op0,
+ Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
+m_Cmp(CmpPredicate &Pred, const Op0_t &Op0, const Op1_t &Op1) {
+ return Cmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(
+ Pred, Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>
+m_SpecificCmp(CmpPredicate MatchPred, const Op0_t &Op0, const Op1_t &Op1) {
+ return SpecificCmp_match<Op0_t, Op1_t, Instruction::ICmp, Instruction::FCmp>(
+ MatchPred, Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a92540f457c73..c4fdcccc6d62b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2930,6 +2930,9 @@ static void scalarizeInstruction(const Instruction *Instr,
RepRecipe->applyFlags(*Cloned);
RepRecipe->applyMetadata(*Cloned);
+ if (RepRecipe->hasPredicate())
+ cast<CmpInst>(Cloned)->setPredicate(RepRecipe->getPredicate());
+
if (auto DL = RepRecipe->getDebugLoc())
State.setDebugLocFrom(DL);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e0d63214f135a..e0bf241c73fdd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1108,33 +1108,31 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
// Try to fold Not into compares by adjusting the predicate in-place.
- if (auto *WideCmp = dyn_cast<VPWidenRecipe>(A)) {
- if ((WideCmp->getOpcode() == Instruction::ICmp ||
- WideCmp->getOpcode() == Instruction::FCmp) &&
- all_of(WideCmp->users(), [&WideCmp](VPUser *U) {
- return match(U, m_CombineOr(m_Not(m_Specific(WideCmp)),
- m_Select(m_Specific(WideCmp),
- m_VPValue(), m_VPValue())));
+ CmpPredicate Pred;
+ if (match(A, m_Cmp(Pred, m_VPValue(), m_VPValue()))) {
+ auto *Cmp = cast<VPRecipeWithIRFlags>(A);
+ if (all_of(Cmp->users(), [&Cmp](VPUser *U) {
+ return match(U, m_CombineOr(m_Not(m_Specific(Cmp)),
+ m_Select(m_Specific(Cmp), m_VPValue(),
+ m_VPValue())));
})) {
- WideCmp->setPredicate(
- CmpInst::getInversePredicate(WideCmp->getPredicate()));
- for (VPUser *U : to_vector(WideCmp->users())) {
+ Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
+ for (VPUser *U : to_vector(Cmp->users())) {
auto *R = cast<VPSingleDefRecipe>(U);
- if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X),
- m_VPValue(Y)))) {
+ if (match(R, m_Select(m_Specific(Cmp), m_VPValue(X), m_VPValue(Y)))) {
// select (cmp pred), x, y -> select (cmp inv_pred), y, x
R->setOperand(1, Y);
R->setOperand(2, X);
} else {
// not (cmp pred) -> cmp inv_pred
- assert(match(R, m_Not(m_Specific(WideCmp))) && "Unexpected user");
- R->replaceAllUsesWith(WideCmp);
+ assert(match(R, m_Not(m_Specific(Cmp))) && "Unexpected user");
+ R->replaceAllUsesWith(Cmp);
}
}
- // If WideCmp doesn't have a debug location, use the one from the
- // negation, to preserve the location.
- if (!WideCmp->getDebugLoc() && R.getDebugLoc())
- WideCmp->setDebugLoc(R.getDebugLoc());
+ // If Cmp doesn't have a debug location, use the one from the negation,
+ // to preserve the location.
+ if (!Cmp->getDebugLoc() && R.getDebugLoc())
+ Cmp->setDebugLoc(R.getDebugLoc());
}
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 11bb4d234f3f3..e5697e121e9e9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -973,18 +973,16 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
-; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP19:.*]] ]
-; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
-; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
+; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP18:.*]] ]
+; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP18]] ]
+; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP18]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
-; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
-; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = xor i1 [[TMP7]], true
-; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP8]], true
-; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP9]], i1 false
-; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP10]], i1 false
+; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ule double [[TMP5]], 0.000000e+00
+; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ule double [[TMP6]], 0.000000e+00
+; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP7]], i1 false
+; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP8]], i1 false
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], double 1.000000e+00, double 0.000000e+00
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP12]], double 1.000000e+00, double 0.000000e+00
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[PREDPHI]]
@@ -993,11 +991,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
-; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
-; TFA_INTERLEAVE: [[BB18]]:
+; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB16:.*]], label %[[TMP18]]
+; TFA_INTERLEAVE: [[BB16]]:
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
-; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
-; TFA_INTERLEAVE: [[TMP19]]:
+; TFA_INTERLEAVE-NEXT: br label %[[TMP18]]
+; TFA_INTERLEAVE: [[TMP18]]:
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 5e48b1f72b111..e4922d3e4f627 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -134,18 +134,14 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
-; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
-; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
-; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
-; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
-; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
-; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
-; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
-; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
-; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
-; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
-; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
-; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
+; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
+; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
+; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
+; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
+; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
+; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
+; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -510,18 +506,14 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
-; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
-; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
-; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
-; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
-; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
-; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
-; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
-; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
-; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
-; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
-; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
-; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
+; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
+; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
+; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
+; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
+; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
+; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
+; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -700,18 +692,14 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
-; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
-; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
-; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
-; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
-; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
-; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
-; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
-; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast one float [[TMP8]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast one float [[TMP9]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast one float [[TMP10]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast one float [[TMP11]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
+; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
+; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
+; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -890,18 +878,14 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
-; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00
-; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
-; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
-; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
-; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
-; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
-; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
-; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
-; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp one float [[TMP8]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp one float [[TMP9]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp one float [[TMP10]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp one float [[TMP11]], 3.000000e+00
+; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
+; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
+; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
+; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -1049,8 +1033,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3
-; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = xor i1 [[TMP0]], true
+; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A]], 3
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -1058,10 +1041,10 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP4]]
-; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP4]]
-; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP4]]
-; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP4]]
+; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP0]]
+; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP0]]
+; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP0]]
+; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP0]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
More information about the llvm-commits
mailing list