[llvm] DAG: Assert fcmp uno runtime calls are boolean values (PR #142898)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 8 19:35:57 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/142898
>From 0da7c19522a89bbb90b94d6eae682a6bf56be59c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 5 Jun 2025 13:18:51 +0900
Subject: [PATCH 1/3] DAG: Assert fcmp uno runtime calls are boolean values
This saves 2 instructions in the ARM soft float case for fcmp ueq.
This code is written in an confusingly overly general way. The point
of getCmpLibcallCC is to express that the compiler-rt implementations
of the FP compares are different aliases around functions which may
return -1 in some cases. This does not apply to the call for unordered,
which returns a normal boolean.
Also stop overriding the default value for the unordered compare for ARM.
This was setting it to the same value as the default, which is now assumed.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 12 +++++++++++
llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++--
llvm/test/CodeGen/ARM/fpcmp_ueq.ll | 13 ++++++------
llvm/test/CodeGen/RISCV/double-fcmp-strict.ll | 20 +++++++++----------
llvm/test/CodeGen/RISCV/double-fcmp.ll | 10 +++++-----
llvm/test/CodeGen/RISCV/float-fcmp-strict.ll | 20 +++++++++----------
llvm/test/CodeGen/RISCV/float-fcmp.ll | 10 +++++-----
llvm/test/CodeGen/Thumb2/float-cmp.ll | 7 ++++++-
8 files changed, 57 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e8e820ac1f695..a3c4cb4ea0582 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -429,8 +429,20 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Update Chain.
Chain = Call.second;
} else {
+ assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
+ "unordered call should be simple boolean");
+
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ if (RetVT == SetCCVT &&
+ getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
+ // FIXME: Checking the type matches is a hack in case the calling
+ // convention lowering inserted some instructions after the
+ // CopyFromReg. Combines fail to look through the AssertZext.
+ NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
+ DAG.getValueType(MVT::i1));
+ }
+
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 574281d12e3cb..c2e5805b827d0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -613,7 +613,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
@@ -630,7 +630,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
diff --git a/llvm/test/CodeGen/ARM/fpcmp_ueq.ll b/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
index 698c7506cc593..f77720fd935f7 100644
--- a/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -9,12 +9,13 @@ entry:
}
; CHECK-ARMv4-LABEL: f7:
-; CHECK-ARMv4-DAG: bl ___eqsf2
-; CHECK-ARMv4-DAG: bl ___unordsf2
-; CHECK-ARMv4: cmp r0, #0
-; CHECK-ARMv4: movne r0, #1
-; CHECK-ARMv4: orrs r0, r0,
-; CHECK-ARMv4: moveq r0, #42
+; CHECK-ARMv4: bl ___eqsf2
+; CHECK-ARMv4-NEXT: rsbs r1, r0, #0
+; CHECK-ARMv4-NEXT: adc r6, r0, r1
+
+; CHECK-ARMv4: bl ___unordsf2
+; CHECK-ARMv4-NEXT: orrs r0, r0, r6
+; CHECK-ARMv4-NEXT: mov r0, #154
; CHECK-ARMv7-LABEL: f7:
; CHECK-ARMv7: vcmp.f32
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
index 949668f640dbd..a8a4554cc2f4b 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
@@ -471,15 +471,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1199,15 +1199,15 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll
index 1e609f8081ebf..542ec6ede56b8 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll
@@ -403,15 +403,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
index 0cbfc96bf485e..f0551d3405ad3 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
@@ -382,13 +382,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -991,13 +991,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll
index 265d553a3e5d9..393d76a31f1f6 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll
@@ -344,13 +344,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Thumb2/float-cmp.ll b/llvm/test/CodeGen/Thumb2/float-cmp.ll
index 73e0063a9278e..ed80544377204 100644
--- a/llvm/test/CodeGen/Thumb2/float-cmp.ll
+++ b/llvm/test/CodeGen/Thumb2/float-cmp.ll
@@ -200,8 +200,13 @@ define i1 @cmp_d_one(double %a, double %b) {
; CHECK-LABEL: cmp_d_one:
; NONE: bl __aeabi_dcmpeq
; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpun
+; SP: eor r8, r0, #1
+; SP: bl __aeabi_dcmpeq
+; SP-NEXT: clz r0, r0
+; SP-NEXT: lsrs r0, r0, #5
+; SP-NEXT: ands.w r0, r0, r8
+
; DP: vcmp.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
>From 64e071b7f4959fe1001af6668bbef385963421cf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 6 Jun 2025 09:34:23 +0900
Subject: [PATCH 2/3] DAG: Combine AssertZext with and AssertSext
https://github.com/llvm/llvm-project/pull/142898#discussion_r2130676278
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 ++++++++++++++++++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 6 +-----
llvm/test/CodeGen/RISCV/double-fcmp-strict.ll | 20 +++++++++----------
llvm/test/CodeGen/RISCV/double-fcmp.ll | 10 +++++-----
llvm/test/CodeGen/RISCV/float-fcmp-strict.ll | 20 +++++++++----------
llvm/test/CodeGen/RISCV/float-fcmp.ll | 10 +++++-----
.../test/CodeGen/X86/fp128-libcalls-strict.ll | 4 ----
llvm/test/CodeGen/X86/fpcmp-soft-fp.ll | 2 --
8 files changed, 50 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eff152812b7c1..14b5bb721127c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14933,6 +14933,25 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
}
}
+ // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
+ // than X, and the And doesn't change the lower iY bits, we can move the
+ // AssertZext in front of the And and drop the AssertSext.
+ if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
+ N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
+ if (AssertVT.bitsLT(BigA_AssertVT) &&
+ Mask.countr_one() >= AssertVT.getScalarSizeInBits()) {
+ SDLoc DL(N);
+ SDValue NewAssert =
+ DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
+ N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a3c4cb4ea0582..4472a031c39f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -434,11 +434,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
- if (RetVT == SetCCVT &&
- getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
- // FIXME: Checking the type matches is a hack in case the calling
- // convention lowering inserted some instructions after the
- // CopyFromReg. Combines fail to look through the AssertZext.
+ if (getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
DAG.getValueType(MVT::i1));
}
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
index a8a4554cc2f4b..7c5332f719867 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
@@ -498,13 +498,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -1226,13 +1226,13 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll
index 542ec6ede56b8..f73e6865cf47d 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll
@@ -430,13 +430,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqdf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unorddf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
index f0551d3405ad3..fd3baa0575250 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
@@ -405,13 +405,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unordsf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqsf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -1014,13 +1014,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unordsf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqsf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll
index 393d76a31f1f6..2e9c39f331bbc 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll
@@ -367,13 +367,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: call __eqsf2
-; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: call __unordsf2
+; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
-; RV64I-NEXT: call __unordsf2
-; RV64I-NEXT: snez a0, a0
-; RV64I-NEXT: or a0, a0, s2
+; RV64I-NEXT: call __eqsf2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index 3ac4415d075c9..a7eea04181f60 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -3443,8 +3443,6 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __unordtf2
; X86-NEXT: addl $32, %esp
-; X86-NEXT: testl %eax, %eax
-; X86-NEXT: setne %al
; X86-NEXT: orb %bl, %al
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
@@ -3526,8 +3524,6 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
-; WIN-X86-NEXT: testl %eax, %eax
-; WIN-X86-NEXT: setne %al
; WIN-X86-NEXT: orb %bl, %al
; WIN-X86-NEXT: jne LBB39_1
; WIN-X86-NEXT: # %bb.2:
diff --git a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
index e89acc6bb2684..480a47e57f073 100644
--- a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
+++ b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
@@ -99,8 +99,6 @@ entry:
; CHECK: calll __eqdf2
; CHECK: sete
; CHECK: calll __unorddf2
-; CHECK: setne
-; CHECK: or
; CHECK: retl
define i1 @test11(double %d) #0 {
>From bde5b5ed3267e1da02d6af5dd9374e5981ea5032 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 9 Jun 2025 11:35:11 +0900
Subject: [PATCH 3/3] Comments
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 14b5bb721127c..5a4d01ccae5fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14934,7 +14934,7 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
}
// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
- // than X, and the And doesn't change the lower iY bits, we can move the
+ // than X, and the And doesn't change the lower iX bits, we can move the
// AssertZext in front of the And and drop the AssertSext.
if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext &&
@@ -14943,7 +14943,7 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
const APInt &Mask = N0.getConstantOperandAPInt(1);
if (AssertVT.bitsLT(BigA_AssertVT) &&
- Mask.countr_one() >= AssertVT.getScalarSizeInBits()) {
+ Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
SDLoc DL(N);
SDValue NewAssert =
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
More information about the llvm-commits
mailing list