[llvm] [SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform (PR #184590)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 03:15:32 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
minnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead.
To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern.
This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#<!-- -->93575) appears to be blocked.
Fixes https://github.com/llvm/llvm-project/issues/176624.
---
Patch is 37.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184590.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+8-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+5-5)
- (modified) llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll (+4-2)
- (modified) llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll (+57-29)
- (modified) llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll (+4-3)
- (modified) llvm/test/CodeGen/ARM/vminmaxnm-safe.ll (+89-49)
- (modified) llvm/test/CodeGen/SystemZ/vec-max-05.ll (+13-3)
- (modified) llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll (+8-4)
- (modified) llvm/test/CodeGen/SystemZ/vec-min-05.ll (+13-3)
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll (+36-24)
- (modified) llvm/test/CodeGen/X86/sse-minmax.ll (+5-17)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will differentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum differentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/184590
More information about the llvm-commits
mailing list