[llvm] [PowerPC] Remove `UnsafeFPMath` uses (PR #154901)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 01:14:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: None (paperchalice)
<details>
<summary>Changes</summary>
Try to remove `UnsafeFPMath` uses in PowerPC backend. These global flags block some improvements like https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797. Remove them incrementally.
FP operations may raise exceptions are replaced by constrained intrinsics.
---
Patch is 140.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154901.diff
6 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+30-27)
- (modified) llvm/lib/Target/PowerPC/PPCInstrVSX.td (+14-14)
- (modified) llvm/test/CodeGen/PowerPC/i64_fp_round.ll (+26-13)
- (modified) llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll (+316-129)
- (modified) llvm/test/CodeGen/PowerPC/vector-llrint.ll (-1413)
- (modified) llvm/test/CodeGen/PowerPC/vector-lrint.ll (-1588)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 74ae8502dccea..5013b76a33136 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -446,14 +446,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
- Subtarget.hasFRE()))
+ if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
- Subtarget.hasFRES()))
+ !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
@@ -569,16 +566,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::LRINT, MVT::f64, Legal);
- setOperationAction(ISD::LRINT, MVT::f32, Legal);
- setOperationAction(ISD::LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::LROUND, MVT::f64, Legal);
- setOperationAction(ISD::LROUND, MVT::f32, Legal);
- setOperationAction(ISD::LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::LLROUND, MVT::f32, Legal);
- }
+
+ setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom);
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
// The nearbyint variants are not allowed to raise the inexact exception
- // so we can only code-gen them with unsafe math.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- }
+ // so we can only code-gen them with fpexcept.ignore.
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -8925,9 +8919,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
//
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
- if (Op.getValueType() == MVT::f32 &&
- !Subtarget.hasFPCVT() &&
- !DAG.getTarget().Options.UnsafeFPMath) {
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
+ !Op->getFlags().hasNoFPExcept()) {
// Twiddle input to make sure the low 11 bits are zero. (If this
// is the case, we are guaranteed the value will fit into the 53 bit
@@ -12694,6 +12687,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerADDSUBO_CARRY(Op, DAG);
+
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FNEARBYINT:
+ if (Op->getFlags().hasNoFPExcept())
+ return Op;
+ return SDValue();
}
}
@@ -18504,11 +18506,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
const Function *F = I->getFunction();
const DataLayout &DL = F->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
+ bool AllowContract = I->getFastMathFlags().allowContract() &&
+ User->getFastMathFlags().allowContract();
- return !(
- isFMAFasterThanFMulAndFAdd(*F, Ty) &&
- isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+ return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
}
case Instruction::Load: {
// Don't break "store (load float*)" pattern, this pattern will be combined
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..598fc57ff65f2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2800,14 +2800,14 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
// these need to be defined after the any_frint versions so ISEL will correctly
// add the chain to the strict versions.
-def : Pat<(f32 (fnearbyint f32:$S)),
+def : Pat<(f32 (strict_fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f64 (fnearbyint f64:$S)),
+def : Pat<(f64 (strict_fnearbyint f64:$S)),
(f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (fnearbyint v2f64:$S)),
+def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)),
(v2f64 (XVRDPIC $S))>;
-def : Pat<(v4f32 (fnearbyint v4f32:$S)),
+def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)),
(v4f32 (XVRSPIC $S))>;
// Materialize a zero-vector of long long
@@ -3592,25 +3592,25 @@ def : Pat<(f64 (bitconvert i64:$S)),
(f64 (MTVSRD $S))>;
// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
+def : Pat<(i64 (strict_lrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
+def : Pat<(i64 (strict_lrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
+def : Pat<(i64 (strict_llrint f64:$S)),
(i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
+def : Pat<(i64 (strict_llrint f32:$S)),
(i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
+def : Pat<(i64 (strict_lround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
+def : Pat<(i64 (strict_lround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i32 (lround f64:$S)),
+def : Pat<(i32 (strict_lround f64:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>;
-def : Pat<(i32 (lround f32:$S)),
+def : Pat<(i32 (strict_lround f32:$S)),
(i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
+def : Pat<(i64 (strict_llround f64:$S)),
(i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
+def : Pat<(i64 (strict_llround f32:$S)),
(i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
diff --git a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
index f7df003fcc3f8..ad22591478814 100644
--- a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -4,10 +4,9 @@
; for minor code generation differences.
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL
-; Also check that with -enable-unsafe-fp-math we do not get that extra
+; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
-; CHECK-UNSAFE-NOT: isel
+
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -15,9 +14,8 @@ define float @test(i64 %x) nounwind readnone {
; Verify that we get the code sequence needed to avoid double-rounding.
; Note that only parts of the sequence are checked for here, to allow
; for minor code generation differences.
-; Also check that with -enable-unsafe-fp-math we do not get that extra
+; Also check that with fpexcept.ignore we do not get that extra
; code sequence. Simply verify that there is no "isel" present.
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: clrldi 4, 3, 53
@@ -51,18 +49,33 @@ define float @test(i64 %x) nounwind readnone {
; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
; CHECK-NO-ISEL-NEXT: frsp 1, 0
; CHECK-NO-ISEL-NEXT: blr
-;
-; CHECK-UNSAFE-LABEL: test:
-; CHECK-UNSAFE: # %bb.0: # %entry
-; CHECK-UNSAFE-NEXT: std 3, -8(1)
-; CHECK-UNSAFE-NEXT: lfd 0, -8(1)
-; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0
-; CHECK-UNSAFE-NEXT: frsp 1, 0
-; CHECK-UNSAFE-NEXT: blr
entry:
%conv = sitofp i64 %x to float
ret float %conv
}
+define float @test_constrained(i64 %x) nounwind readnone {
+; Also check that with fpexcept.ignore we do not get that extra
+; code sequence. Simply verify that there is no "isel" present.
+; CHECK-LABEL: test_constrained:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std 3, -8(1)
+; CHECK-NEXT: lfd 0, -8(1)
+; CHECK-NEXT: xscvsxddp 0, 0
+; CHECK-NEXT: frsp 1, 0
+; CHECK-NEXT: blr
+;
+; CHECK-NO-ISEL-LABEL: test_constrained:
+; CHECK-NO-ISEL: # %bb.0: # %entry
+; CHECK-NO-ISEL-NEXT: std 3, -8(1)
+; CHECK-NO-ISEL-NEXT: lfd 0, -8(1)
+; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0
+; CHECK-NO-ISEL-NEXT: frsp 1, 0
+; CHECK-NO-ISEL-NEXT: blr
+entry:
+ %conv = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret float %conv
+}
+declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
index 2be370f638d5b..af48bf22a7669 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
@@ -5,9 +5,6 @@
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \
; RUN: FileCheck %s
-; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
-; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
-; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
; BE-LABEL: test_lrint:
; BE: # %bb.0: # %entry
@@ -36,17 +33,36 @@ define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lrint:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lrint.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_lrint(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lrint:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lrint
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lrint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lrint.i64.f64(double)
define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
@@ -77,17 +93,36 @@ define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lrintf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lrint.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_lrintf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_lrintf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lrintf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lrintf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lrint.i64.f32(float)
define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
@@ -118,17 +153,36 @@ define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_llrint:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.llrint.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_llrint(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_llrint:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl llrint
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_llrint:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.i64.f64(double)
define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
@@ -159,17 +213,36 @@ define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_llrintf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: fctid f0, f1
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.llrint.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_llrintf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_llrintf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl llrintf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_llrintf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fctid f0, f1
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.llrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.llrint.i64.f32(float)
define dso_local i64 @test_lround(double %d) local_unnamed_addr {
@@ -200,18 +273,37 @@ define dso_local i64 @test_lround(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lround:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctid f0, f0
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lround.i64.f64(double %d)
ret i64 %0
}
+define dso_local i64 @test_constrained_lround(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lround:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lround
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lround:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctid f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore")
+ ret i64 %0
+}
+
declare i64 @llvm.lround.i64.f64(double)
define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr {
@@ -242,18 +334,37 @@ define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lroundi32f64:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctiw f0, f0
-; FAST-NEXT: mffprwz r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i32 @llvm.lround.i32.f64(double %d)
ret i32 %0
}
+define dso_local i32 @test_constrained_lroundi32f64(double %d) local_unnamed_addr {
+; BE-LABEL: test_constrained_lroundi32f64:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lround
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lroundi32f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctiw f0, f0
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore")
+ ret i32 %0
+}
+
declare i32 @llvm.lround.i32.f64(double)
define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
@@ -284,18 +395,37 @@ define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
-;
-; FAST-LABEL: test_lroundf:
-; FAST: # %bb.0: # %entry
-; FAST-NEXT: xsrdpi f0, f1
-; FAST-NEXT: fctid f0, f0
-; FAST-NEXT: mffprd r3, f0
-; FAST-NEXT: blr
entry:
%0 = tail call i64 @llvm.lround.i64.f32(float %f)
ret i64 %0
}
+define dso_local i64 @test_constrained_lroundf(float %f) local_unnamed_addr {
+; BE-LABEL: test_constrained_lroundf:
+; BE: # %bb.0: # %entry
+; BE-NEXT: mflr r0
+; BE-NEXT: stdu r1, -112(r1)
+; BE-NEXT: std r0, 128(r1)
+; BE-NEXT: .cfi_def_cfa_offset 112
+; BE-NEXT: .cfi_offset lr, 16
+; BE-NEXT: bl lroundf
+; BE-NEXT: nop
+; BE-NEXT: addi r1, r1, 112
+; BE-NEXT: ld r0, 16(r1)
+; BE-NEXT: mtlr r0
+; BE-NEXT: blr
+;
+; CHECK-LABEL: test_constrained_lroundf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xsrdpi f0, f1
+; CHECK-NEXT: fctid f0, f0
+; CHECK-NEXT: mffprd r3, f0
+; CH...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154901
More information about the llvm-commits
mailing list