[llvm] [SDAG] Add missing ppc_fp128 ExpandFloatRes legalization for modf (PR #127895)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 13:18:26 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
Should fix: https://lab.llvm.org/buildbot/#/builders/72/builds/8380
(`test_modf_ppcf128` is the test case that needed the additional legalization)
---
Full diff: https://github.com/llvm/llvm-project/pull/127895.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+18)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+4)
- (added) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+330)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4b79bd28e2750..0244c170a2123 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1569,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ case ISD::FMODF: ExpandFloatRes_FMODF(N); break;
// clang-format on
}
@@ -1619,6 +1620,23 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
GetPairElements(Tmp.first, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
+ ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)),
+ /*CallRetResNo=*/0);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented");
+ SmallVector<SDValue> Results;
+ DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+ for (auto [ResNo, Res] : enumerate(Results)) {
+ SDValue Lo, Hi;
+ GetPairElements(Res, Lo, Hi);
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+ }
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 69c687a797485..cac969f7e2185 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -668,6 +668,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
+
// clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -714,6 +717,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMODF(SDNode *N);
// clang-format on
// Float Operand Expansion.
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
new file mode 100644
index 0000000000000..69e3b22c7352c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -0,0 +1,330 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
+
+define { half, half } @test_modf_f16(half %a) {
+; CHECK-LABEL: test_modf_f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f2, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_modf_f16_only_use_fractional_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_modf_f16_only_use_integral_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_integral_part:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f1, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_modf_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset f30, -16
+; CHECK-NEXT: .cfi_offset f31, -8
+; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: xscvdphp f0, f2
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f31, f0
+; CHECK-NEXT: xscvdphp f0, f1
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: clrlwi r3, r3, 16
+; CHECK-NEXT: mtfprwz f0, r3
+; CHECK-NEXT: xscvhpdp f1, f0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: fmr f30, f1
+; CHECK-NEXT: fmr f1, f31
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f3, 40(r1)
+; CHECK-NEXT: fmr f2, f1
+; CHECK-NEXT: fmr f1, f30
+; CHECK-NEXT: lfs f4, 44(r1)
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_modf_f32(float %a) {
+; CHECK-LABEL: test_modf_f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r4, r1, 44
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfs f2, 44(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { float, float } @llvm.modf.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_modf_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r28, -32
+; CHECK-NEXT: .cfi_offset r29, -24
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: .cfi_offset v30, -64
+; CHECK-NEXT: .cfi_offset v31, -48
+; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: addi r30, r1, 36
+; CHECK-NEXT: std r28, 80(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: mr r4, r30
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd vs0, v31
+; CHECK-NEXT: addi r29, r1, 40
+; CHECK-NEXT: xscvdpspn v30, f1
+; CHECK-NEXT: mr r4, r29
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: addi r28, r1, 44
+; CHECK-NEXT: mr r4, r28
+; CHECK-NEXT: xxmrghw v30, vs0, v30
+; CHECK-NEXT: xxsldwi vs0, v31, v31, 1
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-NEXT: xscvdpspn v2, f1
+; CHECK-NEXT: lfiwzx f1, 0, r30
+; CHECK-NEXT: lfiwzx f2, 0, r29
+; CHECK-NEXT: lxsiwzx v3, 0, r28
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxmrghw v4, vs2, vs1
+; CHECK-NEXT: xxperm v2, v30, vs0
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxperm v3, v4, vs0
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_modf_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -112(r1)
+; CHECK-NEXT: std r0, 128(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r29, -24
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: .cfi_offset v30, -64
+; CHECK-NEXT: .cfi_offset v31, -48
+; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: addi r30, r1, 40
+; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: mr r4, r30
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd vs0, v31
+; CHECK-NEXT: addi r29, r1, 44
+; CHECK-NEXT: xscvdpspn v30, f1
+; CHECK-NEXT: mr r4, r29
+; CHECK-NEXT: xscvspdpn f1, vs0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscvdpspn vs0, f1
+; CHECK-NEXT: lfiwzx f1, 0, r29
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghw v2, vs0, v30
+; CHECK-NEXT: lfiwzx f0, 0, r30
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: xxmrghw v3, vs1, vs0
+; CHECK-NEXT: addi r1, r1, 112
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_modf_f64(double %a) {
+; CHECK-LABEL: test_modf_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f2, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_modf_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -80(r1)
+; CHECK-NEXT: std r0, 96(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset v30, -32
+; CHECK-NEXT: .cfi_offset v31, -16
+; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v2
+; CHECK-NEXT: addi r4, r1, 32
+; CHECK-NEXT: xscpsgndp f1, v31, v31
+; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: xscpsgndp v30, f1, f1
+; CHECK-NEXT: xxswapd vs1, v31
+; CHECK-NEXT: addi r4, r1, 40
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxmrghd v2, v30, vs1
+; CHECK-NEXT: lfd f0, 32(r1)
+; CHECK-NEXT: lfd f1, 40(r1)
+; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxmrghd v3, vs0, vs1
+; CHECK-NEXT: addi r1, r1, 80
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f3, 32(r1)
+; CHECK-NEXT: lfd f4, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ ret { ppc_fp128, ppc_fp128 } %result
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/127895
More information about the llvm-commits
mailing list