[llvm] [VPlan] Merge fcmp uno feeding Or. (PR #167251)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 9 13:24:17 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Fold
or (fcmp uno %A, %A), (fcmp uno %B, %B), ... ->
or (fcmp uno %A, %B), ...
This pattern is generated to check if any vector lane is NaN, and combining multiple compares is beneficial on architectures that have dedicated instructions.
Alive2 Proof: https://alive2.llvm.org/ce/z/vA_aoM
Combine suggested as part of #<!-- -->161735
---
Full diff: https://github.com/llvm/llvm-project/pull/167251.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 634df51a12965..74d4f6528d627 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1221,6 +1221,17 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
}
}
+ // Fold (fcmp uno %X, %X) or (fcmp uno %Y, %Y) -> fcmp uno %X, %Y
+ // This is useful for fmax/fmin without fast-math flags, where we need to
+ // check if any operand is NaN.
+ if (match(Def, m_BinaryOr(m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(X),
+ m_Deferred(X)),
+ m_SpecificCmp(CmpInst::FCMP_UNO, m_VPValue(Y),
+ m_Deferred(Y))))) {
+ VPValue *NewCmp = Builder.createFCmp(CmpInst::FCMP_UNO, X, Y);
+ return Def->replaceAllUsesWith(NewCmp);
+ }
+
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(Def, m_DerivedIV(m_ZeroInt(), m_VPValue(A), m_One())) ||
match(Def, m_DerivedIV(m_ZeroInt(), m_ZeroInt(), m_VPValue()))) &&
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
index 7e58d9d6a8ec9..f3d649b899686 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll
@@ -142,14 +142,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-NEXT: [[TMP6]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD5]])
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD6]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
-; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
-; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
-; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD4]]
+; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP8]]
+; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
index 01fab87209a35..ca6e5bc2d0dcb 100644
--- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll
@@ -141,14 +141,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-NEXT: [[TMP6]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD5]])
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD6]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
-; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
-; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
-; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
-; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD4]]
+; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP8]]
+; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
index e028bec138faf..a4f7631435bb3 100644
--- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll
@@ -697,10 +697,8 @@ define float @test_fmax_and_fmax(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK-NEXT: [[TMP2]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP7:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP11]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/167251
More information about the llvm-commits
mailing list