[llvm] [AArch64] Remove `UnsafeFPMath` (PR #150876)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 02:59:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (paperchalice)
<details>
<summary>Changes</summary>
We should always use fast math flags, remove these global flags incrementally.
See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797
---
Patch is 71.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150876.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+10-11)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+3-6)
- (modified) llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir (+29-29)
- (modified) llvm/test/CodeGen/AArch64/arm64-fml-combines.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/fcsel-zero.ll (+16-19)
- (modified) llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir (+8-14)
- (modified) llvm/test/CodeGen/AArch64/machine-combiner.ll (+524-363)
- (modified) llvm/test/CodeGen/AArch64/machine-combiner.mir (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sqrt-fastmath.ll (+26-27)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b49754ee7e1f..107ad1bc8f9fb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11325,7 +11325,7 @@ static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal,
SDValue AArch64TargetLowering::LowerSELECT_CC(
ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal,
- iterator_range<SDNode::user_iterator> Users, bool HasNoNaNs,
+ iterator_range<SDNode::user_iterator> Users, SDNodeFlags Flags,
const SDLoc &DL, SelectionDAG &DAG) const {
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
@@ -11523,7 +11523,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
return true;
}
})) {
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || HasNoNaNs;
+ bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Flags.hasNoNaNs();
SDValue VectorCmp =
emitFloatCompareMask(LHS, RHS, TVal, FVal, CC, NoNaNs, DL, DAG);
if (VectorCmp)
@@ -11537,7 +11537,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (Flags.hasNoSignedZeros()) {
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
@@ -11616,10 +11616,9 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);
- bool HasNoNans = Op->getFlags().hasNoNaNs();
+ SDNodeFlags Flags = Op->getFlags();
SDLoc DL(Op);
- return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL,
- DAG);
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), Flags, DL, DAG);
}
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
@@ -11627,7 +11626,6 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);
- bool HasNoNans = Op->getFlags().hasNoNaNs();
SDLoc DL(Op);
EVT Ty = Op.getValueType();
@@ -11694,8 +11692,8 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
DAG.getUNDEF(MVT::f32), FVal);
}
- SDValue Res =
- LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL, DAG);
+ SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(),
+ Op->getFlags(), DL, DAG);
if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
@@ -12292,7 +12290,8 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
- SDNodeFlags Flags = SDNodeFlags::AllowReassociation;
+ SDNodeFlags Flags =
+ SDNodeFlags::AllowReassociation | SDNodeFlags::NoSignedZeros;
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
@@ -16674,7 +16673,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath));
+ I->getFastMathFlags().allowContract()));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 95d0e3be70d16..ea63edd86210e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -662,7 +662,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal,
iterator_range<SDNode::user_iterator> Users,
- bool HasNoNans, const SDLoc &dl,
+ SDNodeFlags Flags, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8685d7a04ac9c..59d4fd26f6f91 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6574,10 +6574,8 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
// the target options or if FADD/FSUB has the contract fast-math flag.
- return Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ return Options.AllowFPOpFusion == FPOpFusion::Fast ||
Inst.getFlag(MachineInstr::FmContract);
- return true;
}
return false;
}
@@ -6680,9 +6678,8 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
case AArch64::FMUL_ZZZ_H:
case AArch64::FMUL_ZZZ_S:
case AArch64::FMUL_ZZZ_D:
- return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
- (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
- Inst.getFlag(MachineInstr::MIFlag::FmNsz));
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// == Integer types ==
// -- Base instructions --
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
index cf4f321ebae18..491d693ec527a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
@@ -1,8 +1,8 @@
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
#
name: f1_2s
registers:
@@ -16,18 +16,18 @@ body: |
%2:fpr64 = COPY $d2
%1:fpr64 = COPY $d1
%0:fpr64 = COPY $d0
- %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr
+ %3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr
$d0 = COPY %4
RET_ReallyLR implicit $d0
...
# UNPROFITABLE-LABEL: name: f1_2s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_4s
@@ -42,18 +42,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_4s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_4s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2
# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_2d
@@ -68,18 +68,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_2d
-# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2d
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2
# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_both_fmul_2s
@@ -97,15 +97,15 @@ body: |
%2:fpr64 = COPY $q2
%1:fpr64 = COPY $q1
%0:fpr64 = COPY $q0
- %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr
- %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr
+ %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr
+ %6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2s
-# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_4s
@@ -123,15 +123,15 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_4s
-# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_2d
@@ -149,14 +149,14 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2d
-# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr
diff --git a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
index ce3581030646d..60c48bfce3f85 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s
define void @foo_2d(ptr %src) {
@@ -130,9 +130,9 @@ for.end: ; preds = %for.body
; CHECK: fnmadd h0, h0, h1, h2
define half @test0(half %a, half %b, half %c) {
entry:
- %0 = fmul half %a, %b
- %mul = fsub half -0.000000e+00, %0
- %sub1 = fsub half %mul, %c
+ %0 = fmul contract half %a, %b
+ %mul = fsub contract half -0.000000e+00, %0
+ %sub1 = fsub contract half %mul, %c
ret half %sub1
}
@@ -140,9 +140,9 @@ entry:
; CHECK: fnmadd s0, s0, s1, s2
define float @test1(float %a, float %b, float %c) {
entry:
- %0 = fmul float %a, %b
- %mul = fsub float -0.000000e+00, %0
- %sub1 = fsub float %mul, %c
+ %0 = fmul contract float %a, %b
+ %mul = fsub contract float -0.000000e+00, %0
+ %sub1 = fsub contract float %mul, %c
ret float %sub1
}
@@ -150,9 +150,9 @@ entry:
; CHECK: fnmadd d0, d0, d1, d2
define double @test2(double %a, double %b, double %c) {
entry:
- %0 = fmul double %a, %b
- %mul = fsub double -0.000000e+00, %0
- %sub1 = fsub double %mul, %c
+ %0 = fmul contract double %a, %b
+ %mul = fsub contract double -0.000000e+00, %0
+ %sub1 = fsub contract double %mul, %c
ret double %sub1
}
diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
index 3fbcd106d08ab..3db588b6c0276 100644
--- a/llvm/test/CodeGen/AArch64/fcsel-zero.ll
+++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s
-define float @foeq(float %a, float %b) #0 {
- %t = fcmp oeq float %a, 0.0
+define float @foeq(float %a, float %b) {
+ %t = fcmp nsz oeq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: foeq
@@ -11,8 +11,8 @@ define float @foeq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
}
-define float @fueq(float %a, float %b) #0 {
- %t = fcmp ueq float %a, 0.0
+define float @fueq(float %a, float %b) {
+ %t = fcmp nsz ueq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: fueq
@@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs
}
-define float @fone(float %a, float %b) #0 {
- %t = fcmp one float %a, 0.0
+define float @fone(float %a, float %b) {
+ %t = fcmp nsz one float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fone
@@ -31,8 +31,8 @@ define float @fone(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt
}
-define float @fune(float %a, float %b) #0 {
- %t = fcmp une float %a, 0.0
+define float @fune(float %a, float %b) {
+ %t = fcmp nsz une float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fune
@@ -40,8 +40,8 @@ define float @fune(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne
}
-define double @doeq(double %a, double %b) #0 {
- %t = fcmp oeq double %a, 0.0
+define double @doeq(double %a, double %b) {
+ %t = fcmp nsz oeq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: doeq
@@ -49,8 +49,8 @@ define double @doeq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
}
-define double @dueq(double %a, double %b) #0 {
- %t = fcmp ueq double %a, 0.0
+define double @dueq(double %a, double %b) {
+ %t = fcmp nsz ueq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: dueq
@@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs
}
-define double @done(double %a, double %b) #0 {
- %t = fcmp one double %a, 0.0
+define double @done(double %a, double %b) {
+ %t = fcmp nsz one double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: done
@@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt
}
-define double @dune(double %a, double %b) #0 {
- %t = fcmp une double %a, 0.0
+define double @dune(double %a, double %b) {
+ %t = fcmp nsz une double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: dune
; CHECK: fcmp [[R:d[0-9]+]], #0.0
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne
}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
-
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
index 525f6dd05c6c6..184c9ef7a6ea8 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
@@ -1,14 +1,11 @@
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s
# fadd without the reassoc flags can be reassociate only when unsafe fp math is
# enabled.
# CHECK-LABEL: name: fadd_no_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
---
name: fadd_no_reassoc
alignment: 4
@@ -49,10 +46,9 @@ body: |
# the reassoc flag is ignored.
# CHECK-LABEL: name: fadd_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150876
More information about the llvm-commits
mailing list