[llvm] [VPlan] Merge `fcmp uno` feeding AnyOf. (PR #166823)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 03:39:01 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/166823
>From c83adad3980bf9fb80d1410dee98d71c7606de4f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 6 Nov 2025 12:42:04 +0000
Subject: [PATCH 1/3] [VPlan] Merge `fcmp uno` feeding AnyOf.
Fold
any-of (fcmp uno %A, %A), (fcmp uno %B, %B), ... ->
any-of (fcmp uno %A, %B), ...
This pattern is generated to check if any vector lane is NaN, and
combining multiple compares is beneficial on architectures that have
dedicated instructions.
Alive2 Proof: https://alive2.llvm.org/ce/z/vA_aoM
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 4 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 23 +++++++++++++++++++
.../AArch64/fmax-without-fast-math-flags.ll | 7 ++----
.../AArch64/fmin-without-fast-math-flags.ll | 7 ++----
...fmax-without-fast-math-flags-interleave.ll | 7 ++----
5 files changed, 33 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b57c44872c1b6..8b2931637113f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -417,6 +417,10 @@ m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<VPInstruction::BranchOnCount>(Op0, Op1);
}
+inline VPInstruction_match<VPInstruction::AnyOf> m_AnyOf() {
+ return m_VPInstruction<VPInstruction::AnyOf>();
+}
+
template <typename Op0_t>
inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
m_AnyOf(const Op0_t &Op0) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 634df51a12965..0c04cd6174a7a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1221,6 +1221,29 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
}
}
+ // Fold any-of (fcmp uno %A, %A), (fcmp uno %B, %B), ... ->
+ // any-of (fcmp uno %A, %B), ...
+ if (match(Def, m_AnyOf()) && Def->getNumOperands() % 2 == 0) {
+ SmallVector<VPValue *, 4> NewOps;
+ unsigned NumOps = Def->getNumOperands();
+ for (unsigned I = 0; I < NumOps; I += 2) {
+ VPValue *A, *B;
+ if (!match(
+ Def->getOperand(I),
+ m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(A), m_Deferred(A))) ||
+ !match(Def->getOperand(I + 1),
+ m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(B), m_Deferred(B))))
+ break;
+
+ NewOps.push_back(Builder.createFCmp(CmpInst::FCMP_UNO, A, B));
+ }
+
+ if (NewOps.size() == NumOps / 2) {
+ VPValue *NewAnyOf = Builder.createNaryOp(VPInstruction::AnyOf, NewOps);
+ return Def->replaceAllUsesWith(NewAnyOf);
+ }
+ }
+
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(Def, m_DerivedIV(m_ZeroInt(), m_VPValue(A), m_One())) ||
match(Def, m_DerivedIV(m_ZeroInt(), m_ZeroInt(), m_VPValue()))) &&
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
index 7e58d9d6a8ec9..b65a7e999a780 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
@@ -59,11 +59,8 @@ define float @fmaxnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
index 1cc4c152649b4..193424d3eb70a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll
@@ -59,11 +59,8 @@ define float @fminnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
index 01fab87209a35..ebd4ab54e9b74 100644
--- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
@@ -59,11 +59,8 @@ define float @fmaxnum(ptr %src, i64 %n) {
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
-; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
>From bde709869a97aead5ce8c100e39b91c3b84d3e78 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 15 Nov 2025 16:03:47 +0000
Subject: [PATCH 2/3] !fixup look for unpaired ops
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 35 ++++++++++++-------
.../LoopVectorize/fcmp-uno-fold-interleave.ll | 31 +++++-----------
2 files changed, 30 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6710260597d0c..0661de3f99d08 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1222,25 +1222,34 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// Fold any-of (fcmp uno %A, %A), (fcmp uno %B, %B), ... ->
// any-of (fcmp uno %A, %B), ...
- if (match(Def, m_AnyOf()) && Def->getNumOperands() % 2 == 0) {
+ if (match(Def, m_AnyOf())) {
SmallVector<VPValue *, 4> NewOps;
- unsigned NumOps = Def->getNumOperands();
- for (unsigned I = 0; I < NumOps; I += 2) {
- VPValue *A, *B;
- if (!match(
- Def->getOperand(I),
- m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(A), m_Deferred(A))) ||
- !match(Def->getOperand(I + 1),
- m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(B), m_Deferred(B))))
- break;
-
- NewOps.push_back(Builder.createFCmp(CmpInst::FCMP_UNO, A, B));
+ VPRecipeBase *UnpairedCmp = nullptr;
+ for (VPValue *Op : Def->operands()) {
+ VPValue *X;
+ if (Op->getNumUsers() > 1 ||
+ !match(Op, m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(X),
+ m_Deferred(X)))) {
+ NewOps.push_back(Op);
+ continue;
+ }
+ if (UnpairedCmp) {
+ NewOps.push_back(Builder.createFCmp(CmpInst::FCMP_UNO,
+ UnpairedCmp->getOperand(0), X));
+ UnpairedCmp = nullptr;
+ } else {
+ UnpairedCmp = Op->getDefiningRecipe();
+ }
}
- if (NewOps.size() == NumOps / 2) {
+ if (UnpairedCmp)
+ NewOps.push_back(UnpairedCmp->getVPSingleValue());
+
+ if (NewOps.size() < Def->getNumOperands()) {
VPValue *NewAnyOf = Builder.createNaryOp(VPInstruction::AnyOf, NewOps);
return Def->replaceAllUsesWith(NewAnyOf);
}
+ }
// Fold (fcmp uno %X, %X) or (fcmp uno %Y, %Y) -> fcmp uno %X, %Y
// This is useful for fmax/fmin without fast-math flags, where we need to
diff --git a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll
index 7b8e1997a5011..22226a711bcf0 100644
--- a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll
@@ -28,14 +28,11 @@ define float @fmaxnum(ptr %src, i64 %n) {
; IC3-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD3]])
; IC3-NEXT: [[TMP5]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD4]])
; IC3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 12
-; IC3-NEXT: [[TMP6:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; IC3-NEXT: [[TMP7:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD3]]
; IC3-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
-; IC3-NEXT: [[TMP9:%.*]] = freeze <4 x i1> [[TMP6]]
+; IC3-NEXT: [[TMP7:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; IC3-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP7]]
-; IC3-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP9]], [[TMP10]]
; IC3-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP8]]
-; IC3-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP11]], [[TMP12]]
+; IC3-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; IC3-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; IC3-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IC3-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
@@ -86,17 +83,11 @@ define float @fmaxnum(ptr %src, i64 %n) {
; IC4-NEXT: [[TMP6]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD5]])
; IC4-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD6]])
; IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; IC4-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; IC4-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
-; IC4-NEXT: [[TMP24:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
-; IC4-NEXT: [[TMP25:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
-; IC4-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP8]]
-; IC4-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP9]]
-; IC4-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
+; IC4-NEXT: [[TMP24:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]]
+; IC4-NEXT: [[TMP25:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD6]]
; IC4-NEXT: [[TMP26:%.*]] = freeze <4 x i1> [[TMP24]]
-; IC4-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP12]], [[TMP26]]
; IC4-NEXT: [[TMP28:%.*]] = freeze <4 x i1> [[TMP25]]
-; IC4-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP28]]
+; IC4-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP26]], [[TMP28]]
; IC4-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP29]])
; IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IC4-NEXT: [[TMP15:%.*]] = or i1 [[TMP13]], [[TMP14]]
@@ -153,18 +144,12 @@ define float @fmaxnum(ptr %src, i64 %n) {
; IC5-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI3]], <4 x float> [[WIDE_LOAD7]])
; IC5-NEXT: [[TMP9]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI4]], <4 x float> [[WIDE_LOAD8]])
; IC5-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 20
-; IC5-NEXT: [[TMP10:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; IC5-NEXT: [[TMP11:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
-; IC5-NEXT: [[TMP12:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
-; IC5-NEXT: [[TMP13:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD7]]
; IC5-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD8]]
-; IC5-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]]
-; IC5-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP11]]
-; IC5-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP16]]
+; IC5-NEXT: [[TMP12:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD5]]
+; IC5-NEXT: [[TMP13:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD7]]
; IC5-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP12]]
-; IC5-NEXT: [[TMP19:%.*]] = or <4 x i1> [[TMP17]], [[TMP18]]
; IC5-NEXT: [[TMP20:%.*]] = freeze <4 x i1> [[TMP13]]
-; IC5-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP19]], [[TMP20]]
+; IC5-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP18]], [[TMP20]]
; IC5-NEXT: [[TMP22:%.*]] = freeze <4 x i1> [[TMP14]]
; IC5-NEXT: [[TMP23:%.*]] = or <4 x i1> [[TMP21]], [[TMP22]]
; IC5-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
>From 0511fa8fd38b362af9148fc6b52be2348ece93e4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 18 Nov 2025 11:38:25 +0000
Subject: [PATCH 3/3] !fixup reorder code as suggested, thanks
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fca6554ad77c6..267f5f27202cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -540,7 +540,6 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::ExtractValue:
case Instruction::Freeze:
case Instruction::Load:
- case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
case VPInstruction::Broadcast:
case VPInstruction::BuildStructVector:
@@ -580,6 +579,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::GetElementPtr:
case Instruction::PHI:
case Instruction::Switch:
+ case VPInstruction::AnyOf:
case VPInstruction::SLPLoad:
case VPInstruction::SLPStore:
// Cannot determine the number of operands from the opcode.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f0c3ebd6ab6f1..2244106979fb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1247,14 +1247,12 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
!match(Op, m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(X),
m_Deferred(X)))) {
NewOps.push_back(Op);
- continue;
- }
- if (UnpairedCmp) {
+ } else if (!UnpairedCmp) {
+ UnpairedCmp = Op->getDefiningRecipe();
+ } else {
NewOps.push_back(Builder.createFCmp(CmpInst::FCMP_UNO,
UnpairedCmp->getOperand(0), X));
UnpairedCmp = nullptr;
- } else {
- UnpairedCmp = Op->getDefiningRecipe();
}
}
More information about the llvm-commits
mailing list