[llvm] [SelectionDAG] Remove `NoNaNsFPMath` in `visitFCmp` (PR #163519)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 16 02:28:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-risc-v
Author: None (paperchalice)
<details>
<summary>Changes</summary>
User should use `nnan` instead.
The rest uses are related to intrinsic form of `fcmp`.
- Remove `NoNaNsFPMath` uses when building selection dag for `fcmp`.
- let `isKnownNeverNaN` return true if all users have flag `nnan`.
- Regenerate/split some tests.
---
Patch is 394.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163519.diff
17 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+6)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+1-1)
- (modified) llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll (+138-113)
- (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+72-39)
- (modified) llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll (+1370-946)
- (modified) llvm/test/CodeGen/Mips/fcmp.ll (+3-3)
- (removed) llvm/test/CodeGen/PowerPC/change-no-infs.ll (-67)
- (modified) llvm/test/CodeGen/PowerPC/fsel.ll (+125-70)
- (modified) llvm/test/CodeGen/PowerPC/scalar-equal.ll (+35-77)
- (modified) llvm/test/CodeGen/PowerPC/scalar_cmp.ll (+575-919)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll (+224-226)
- (modified) llvm/test/CodeGen/X86/2006-05-22-FPSetEQ.ll (+10-6)
- (modified) llvm/test/CodeGen/X86/avx-minmax.ll (+9-9)
- (added) llvm/test/CodeGen/X86/sse-minmax-fast.ll (+735)
- (added) llvm/test/CodeGen/X86/sse-minmax-finite.ll (+735)
- (modified) llvm/test/CodeGen/X86/sse-minmax.ll (+405-927)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 358e060d2c6d3..393431e92a858 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6715,6 +6715,9 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
if (NewOpcode != ISD::DELETED_NODE) {
+ // Propagate fast-math flags from setcc.
+ SelectionDAG::FlagInserter FlagInserter(DAG, LHS->getFlags() &
+ RHS->getFlags());
SDValue MinMaxValue =
DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 90edaf3ef5471..8d0699769e8c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5869,6 +5869,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN,
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
+ // If all users of this operand is annotated with nnan, we can assume
+ // this operand is not NaN, since nnan also affects inputs.
+ if (llvm::all_of(Op->users(),
+ [](const SDNode *N) { return N->getFlags().hasNoNaNs(); }))
+ return true;
+
return isKnownNeverNaN(Op, DemandedElts, SNaN, Depth);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b5188fc10a..aa8b1c0601dc4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3711,7 +3711,7 @@ void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) {
ISD::CondCode Condition = getFCmpCondCode(predicate);
auto *FPMO = cast<FPMathOperator>(&I);
- if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
+ if (FPMO->hasNoNaNs())
Condition = getFCmpCodeWithoutNaN(Condition);
SDNodeFlags Flags;
diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
index ac0b8e89519dd..f03ceddc685d2 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFULLFP16
-; RUN: llc < %s -mtriple=aarch64 --enable-no-nans-fp-math | FileCheck %s --check-prefixes=CHECK,CHECK-NONANS
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FULLFP16
define <1 x float> @dup_v1i32_oeq(float %a, float %b) {
@@ -69,27 +68,13 @@ entry:
}
define <1 x float> @dup_v1i32_one(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_one:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NOFULLFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_one:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
-; CHECK-NONANS-NEXT: mvn v0.8b, v0.8b
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_one:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FULLFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_one:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s2, s0, s1
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%0 = fcmp one float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -98,6 +83,20 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_one_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_one_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmeq s0, s0, s1
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan one float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
define <1 x float> @dup_v1i32_ord(float %a, float %b) {
; CHECK-LABEL: dup_v1i32_ord:
; CHECK: // %bb.0: // %entry
@@ -115,26 +114,13 @@ entry:
}
define <1 x float> @dup_v1i32_ueq(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_ueq:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_ueq:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1
-; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_ueq:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s2, s0, s1
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
entry:
%0 = fcmp ueq float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -143,23 +129,25 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_ueq_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ueq_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmeq s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan ueq float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
define <1 x float> @dup_v1i32_ugt(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_ugt:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmge s0, s1, s0
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_ugt:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmgt s0, s0, s1
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_ugt:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmge s0, s1, s0
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_ugt:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s1, s0
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
entry:
%0 = fcmp ugt float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -168,23 +156,25 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_ugt_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ugt_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan ugt float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
define <1 x float> @dup_v1i32_uge(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_uge:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_uge:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmge s0, s0, s1
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_uge:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_uge:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
entry:
%0 = fcmp uge float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -193,23 +183,26 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_uge_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_uge_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s0, s1
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan uge float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
+
define <1 x float> @dup_v1i32_ult(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_ult:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmge s0, s0, s1
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_ult:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmgt s0, s1, s0
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_ult:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmge s0, s0, s1
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_ult:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s0, s1
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
entry:
%0 = fcmp ult float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -218,23 +211,25 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_ult_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ult_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s1, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan ult float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
define <1 x float> @dup_v1i32_ule(float %a, float %b) {
-; CHECK-NOFULLFP16-LABEL: dup_v1i32_ule:
-; CHECK-NOFULLFP16: // %bb.0: // %entry
-; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1
-; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-NOFULLFP16-NEXT: ret
-;
-; CHECK-NONANS-LABEL: dup_v1i32_ule:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcmge s0, s1, s0
-; CHECK-NONANS-NEXT: ret
-;
-; CHECK-FULLFP16-LABEL: dup_v1i32_ule:
-; CHECK-FULLFP16: // %bb.0: // %entry
-; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1
-; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b
-; CHECK-FULLFP16-NEXT: ret
+; CHECK-LABEL: dup_v1i32_ule:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmgt s0, s0, s1
+; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: ret
entry:
%0 = fcmp ule float %a, %b
%vcmpd.i = sext i1 %0 to i32
@@ -243,6 +238,19 @@ entry:
ret <1 x float> %1
}
+define <1 x float> @dup_v1i32_ule_nnan(float %a, float %b) {
+; CHECK-LABEL: dup_v1i32_ule_nnan:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcmge s0, s1, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = fcmp nnan ule float %a, %b
+ %vcmpd.i = sext i1 %0 to i32
+ %vecinit.i = insertelement <1 x i32> poison, i32 %vcmpd.i, i64 0
+ %1 = bitcast <1 x i32> %vecinit.i to <1 x float>
+ ret <1 x float> %1
+}
+
define <1 x float> @dup_v1i32_une(float %a, float %b) {
; CHECK-LABEL: dup_v1i32_une:
; CHECK: // %bb.0: // %entry
@@ -326,13 +334,6 @@ define <8 x half> @dup_v8i16(half %a, half %b) {
; CHECK-NOFULLFP16-NEXT: fcmeq s0, s0, s1
; CHECK-NOFULLFP16-NEXT: ret
;
-; CHECK-NONANS-LABEL: dup_v8i16:
-; CHECK-NONANS: // %bb.0: // %entry
-; CHECK-NONANS-NEXT: fcvt s1, h1
-; CHECK-NONANS-NEXT: fcvt s0, h0
-; CHECK-NONANS-NEXT: fcmeq s0, s0, s1
-; CHECK-NONANS-NEXT: ret
-;
; CHECK-FULLFP16-LABEL: dup_v8i16:
; CHECK-FULLFP16: // %bb.0: // %entry
; CHECK-FULLFP16-NEXT: fcmp h0, h1
@@ -350,6 +351,30 @@ define <8 x half> @dup_v8i16(half %a, half %b) {
ret <8 x half> %1
}
+define <8 x half> @dup_v8i16_nnan(half %a, half %b) {
+; FIXME: Could be replaced with fcmeq + dup but the type of the former is
+; promoted to i32 during selection and then the optimization does not apply.
+; CHECK-NOFULLFP16-LABEL: dup_v8i16_nnan:
+; CHECK-NOFULLFP16: // %bb.0: // %entry
+; CHECK-NOFULLFP16-NEXT: fcvt s1, h1
+; CHECK-NOFULLFP16-NEXT: fcvt s0, h0
+; CHECK-NOFULLFP16-NEXT: fcmeq s0, s0, s1
+; CHECK-NOFULLFP16-NEXT: ret
+;
+; CHECK-FULLFP16-LABEL: dup_v8i16_nnan:
+; CHECK-FULLFP16: // %bb.0: // %entry
+; CHECK-FULLFP16-NEXT: fcmp h0, h1
+; CHECK-FULLFP16-NEXT: csetm w8, eq
+; CHECK-FULLFP16-NEXT: fmov s0, w8
+; CHECK-FULLFP16-NEXT: ret
+ entry:
+ %0 = fcmp nnan oeq half %a, %b
+ %vcmpd.i = sext i1 %0 to i16
+ %vecinit.i = insertelement <8 x i16> poison, i16 %vcmpd.i, i64 0
+ %1 = bitcast <8 x i16> %vecinit.i to <8 x half>
+ ret <8 x half> %1
+}
+
; Check that a mask is not generated for non-vectorized users.
define i32 @mask_i32(float %a, float %b) {
; CHECK-LABEL: mask_i32:
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 11b3b62ec1c8d..a82ead2406945 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -3249,36 +3249,51 @@ define <2 x i64> @fcmone2xdouble_fast(<2 x double> %A, <2 x double> %B) {
}
define <2 x i32> @fcmord2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmord2xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmord2xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.2s, v0.2s, v0.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmord2xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast ord <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @fcmord4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmord4xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmord4xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmord4xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast ord <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmord2xdouble_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmord2xdouble_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.2d, v0.2d, v0.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmord2xdouble_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast ord <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -3286,39 +3301,57 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) {
define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) {
-; CHECK-LABEL: fcmuno2xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: mvn v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.2s, v0.2s, v0.2s
+; CHECK-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b
+; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: fcmuno4xfloat_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno4xfloat_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno4xfloat_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) {
-; CHECK-LABEL: fcmuno2xdouble_fast:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fcmuno2xdouble_fast:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.2d, v0.2d, v0.2d
+; CHECK-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmuno2xdouble_fast:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-GI-NEXT: ret
%tmp3 = fcmp fast uno <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 57a1e4cb795bf..094f39206b23f 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck %s -check-prefixes=GCN,GFX11,GFX11-TRUE16
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck %s -check-prefixes=GCN,GFX11,GFX11-FAKE16
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -enable-no-nans-fp-math < %s | FileCheck %s -check-prefixes=GCN,GFX11NONANS,GCN-TRUE16,GFX11NONANS-TRUE16
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -enable-no-nans-fp-math < %s | FileCheck %s -check-prefixes=GCN,GFX11NONANS,GCN-FAKE16,GFX11NONANS-FAKE16
; The tests check the following optimization of DAGCombiner:
; CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
@@ -855,93 +853,117 @@ define i1 @test57(float %arg1, float %arg2, float %arg3) #0 {
}
define i1 @test58(double %arg1, double %arg2, d...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/163519
More information about the llvm-commits
mailing list