[llvm] [SDAG] Add missing float type legalizations for FMODF (PR #128055)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 11:39:36 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-backend-aarch64
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This patch adds the missing `SoftenFloatRes_FMODF` legalization for MODF and updates `ExpandFloatRes_FMODF` so it does not rely on the DAG root being available for the expansion.
The fix for `ExpandFloatRes_FMODF` is slightly more tricky, so as a summary, this is why it's needed:
If a node (two-result) node like `FMODF` or `FFREXP` is expanded to a library call, where said library has the function prototype like: `float(float, float*)` -- that is it returns a float from the call and via an output pointer. The first result of the node maps to the value returned by value and the second result maps to the value returned via the output pointer.
If only the second result is used after the expansion, we hit an issue on targets like x87:
```
// Before expansion:
t0, t1 = fmodf x
return t1 // t0 is unused
```
Expanded result:
```
ptr = alloca
ch0 = call modf ptr
t0, ch1 = copy_from_reg, ch0 // t0 unused
t1, ch2 = ldr ptr, ch1
return t1
```
So far things are already, but the DAGCombiner optimizes this to:
```
ptr = alloca
ch0 = call modf ptr
// copy_from_reg optimized out
t1, ch1 = ldr ptr, ch0
return t1
```
On most targets this is fine. The optimized out `copy_from_reg` is unused as is a NOP. However, x87 uses a floating-point stack, and if the `copy_from_reg` is optimized out it won't emit a pop needed to remove the unused result.
The prior solution for this was to attach the chain from the `copy_from_reg` to the root, which did work, however, the root is not
always available (it's set to null during legalize types). So the alternate solution in this patch is to add a `fake_use` (node) in the chain, e.g.:
```
ptr = alloca
ch0 = call modf ptr
t0, ch1 = copy_from_reg, ch0 // t0 unused
ch2 = fake_use ch1, undef
t1, ch3 = ldr ptr, ch2
return t1
```
This prevents optimizations from removing the `copy_from_reg` from the chain.
---
Patch is 44.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128055.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+32-12)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+3-1)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+14-16)
- (modified) llvm/test/CodeGen/AArch64/llvm.modf.ll (+1)
- (modified) llvm/test/CodeGen/ARM/llvm.frexp.ll (+16-2)
- (added) llvm/test/CodeGen/ARM/llvm.modf.ll (+507)
- (modified) llvm/test/CodeGen/PowerPC/llvm.frexp.ll (+5)
- (modified) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+108)
- (modified) llvm/test/CodeGen/RISCV/llvm.frexp.ll (+42-16)
- (modified) llvm/test/CodeGen/X86/llvm.frexp.f80.ll (+1)
- (modified) llvm/test/CodeGen/X86/llvm.frexp.ll (+13)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 0244c170a2123..b6fff948f89d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -132,6 +132,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break;
+ case ISD::FMODF: R = SoftenFloatRes_FMODF(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -791,27 +792,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
return ReturnVal;
}
-SDValue
-DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N,
- RTLIB::Libcall LC) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
EVT VT = N->getValueType(0);
+ assert(VT == N->getValueType(1) &&
+ "expected both return values to have the same type");
+
if (!TLI.getLibcallName(LC))
return SDValue();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue FirstResultSlot = DAG.CreateStackTemporary(NVT);
- SDValue SecondResultSlot = DAG.CreateStackTemporary(NVT);
SDLoc DL(N);
- TargetLowering::MakeLibCallOptions CallOptions;
- std::array Ops{GetSoftenedFloat(N->getOperand(0)), FirstResultSlot,
- SecondResultSlot};
- std::array OpsVT{VT, FirstResultSlot.getValueType(),
- SecondResultSlot.getValueType()};
+ SmallVector<SDValue, 3> Ops = {GetSoftenedFloat(N->getOperand(0))};
+ SmallVector<EVT, 3> OpsVT = {VT};
+
+ std::array<SDValue, 2> StackSlots;
+ for (auto [ResNum, _] : enumerate(N->values())) {
+ if (ResNum == CallRetResNo)
+ continue;
+ SDValue StackSlot = DAG.CreateStackTemporary(NVT);
+ Ops.push_back(StackSlot);
+ OpsVT.push_back(StackSlot.getValueType());
+ StackSlots[ResNum] = StackSlot;
+ }
+ TargetLowering::MakeLibCallOptions CallOptions;
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
// but since both returns have the same type it should be okay.
CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
@@ -825,8 +834,14 @@ DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo);
};
- SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(FirstResultSlot));
- SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(SecondResultSlot));
+
+ for (auto [ResNum, SlackSlot] : enumerate(StackSlots)) {
+ if (CallRetResNo == ResNum) {
+ SetSoftenedFloat(SDValue(N, ResNum), ReturnVal);
+ continue;
+ }
+ SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot));
+ }
return SDValue();
}
@@ -836,6 +851,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
N, RTLIB::getSINCOS(N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMODF(SDNode *N) {
+ return SoftenFloatRes_UnaryWithTwoFPResults(
+ N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cac969f7e2185..5cbec5cf409cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -562,7 +562,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
// Convert Float Results to Integer.
void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
- SDValue SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
@@ -608,6 +609,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_ExpOp(SDNode *N);
SDValue SoftenFloatRes_FFREXP(SDNode *N);
SDValue SoftenFloatRes_FSINCOS(SDNode *N);
+ SDValue SoftenFloatRes_FMODF(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0a3210a10d394..6dadb0a46528b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2616,6 +2616,20 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
+ if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
+ // FIXME: This is needed for x87, which uses a floating-point stack. If (for
+ // example) the node to be expanded has two results one floating-point which
+ // is returned by the call, and one integer result, returned via an output
+ // pointer. If only the integer result is used then the `CopyFromReg` for
+ // the FP result may be optimized out. This prevents an FP stack pop from
+ // being emitted for it. The `FAKE_USE` node prevents optimizations from
+ // removing the `CopyFromReg` from the chain, and ensures the FP pop will be
+ // emitted. Note: We use an undef pointer as the argument to prevent keeping
+ // any real values live longer than we need to.
+ CallChain = getNode(ISD::FAKE_USE, DL, MVT::Other, CallChain,
+ getUNDEF(TLI->getPointerTy(getDataLayout())));
+ }
+
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
if (ResNo == CallRetResNo) {
Results.push_back(Call);
@@ -2635,22 +2649,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
Results.push_back(LoadResult);
}
- if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
- // FIXME: Find a way to avoid updating the root. This is needed for x86,
- // which uses a floating-point stack. If (for example) the node to be
- // expanded has two results one floating-point which is returned by the
- // call, and one integer result, returned via an output pointer. If only the
- // integer result is used then the `CopyFromReg` for the FP result may be
- // optimized out. This prevents an FP stack pop from being emitted for it.
- // Setting the root like this ensures there will be a use of the
- // `CopyFromReg` chain, and ensures the FP pop will be emitted.
- SDValue NewRoot =
- getNode(ISD::TokenFactor, DL, MVT::Other, getRoot(), CallChain);
- setRoot(NewRoot);
- // Ensure the new root is reachable from the results.
- Results[0] = getMergeValues({Results[0], NewRoot}, DL);
- }
-
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll
index 41fe796daca86..ad746c6aef4fe 100644
--- a/llvm/test/CodeGen/AArch64/llvm.modf.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll
@@ -45,6 +45,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: bl modff
+; CHECK-NEXT: // fake_use: $x0
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll
index e79ddbe93336e..179467120687d 100644
--- a/llvm/test/CodeGen/ARM/llvm.frexp.ll
+++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll
@@ -41,6 +41,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -132,6 +133,8 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r5:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -190,6 +193,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -265,6 +269,8 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) {
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r4:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -376,8 +382,13 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: ldrd r1, r0, [sp, #8]
-; CHECK-NEXT: ldrd r3, r2, [sp], #16
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldr r0, [sp, #12]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldrd r2, r1, [sp, #4]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldr r3, [sp], #16
; CHECK-NEXT: pop {r4, r5, r6, pc}
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1
@@ -419,6 +430,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r2, sp, #4
; CHECK-NEXT: bl frexp
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -498,6 +510,8 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl frexp
; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r7:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #12
diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll
new file mode 100644
index 0000000000000..39996cd55772a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llvm.modf.ll
@@ -0,0 +1,507 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
+
+define { half, half } @test_modf_f16(half %a) {
+; CHECK-LABEL: test_modf_f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r1, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_f16:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r1, r0
+; THUMB-NEXT: mov r0, r4
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r4, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_modf_f16_only_use_fractional_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f16_only_use_fractional_part:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_modf_f16_only_use_integral_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_integral_part:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f16_only_use_integral_part:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_modf_v2f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #14]
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #12]
+; CHECK-NEXT: add r0, sp, #12
+; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32]
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp]
+; CHECK-NEXT: strh.w r0, [sp, #10]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #8]
+; CHECK-NEXT: add r0, sp, #8
+; CHECK-NEXT: vmovl.u16 q9, d8
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d18[0]
+; CHECK-NEXT: vmov.32 r1, d18[1]
+; CHECK-NEXT: vmov.32 r2, d16[0]
+; CHECK-NEXT: vmov.32 r3, d16[1]
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_v2f16:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r6, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r5, r1
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: uxth r0, r5
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r5, r0
+; THUMB-NEXT: ldr r0, [sp]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r6, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r3, r0
+; THUMB-NEXT: mov r0, r4
+; THUMB-NEXT: mov r1, r5
+; THUMB-NEXT: mov r2, r6
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r4, r5, r6, pc}
+ %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_modf_f32(float %a) {
+; CHECK-LABEL: test_modf_f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr r1, [sp, #4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r1, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { float, float } @llvm.modf.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_modf_v3f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vldr d9, [sp, #40]
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: vmov d8, r2, r3
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vldmia sp, {s0, s1}
+; CHECK-NEXT: add.w r1, r4, #16
+; CHECK-NEXT: vst1.32 {d0}, [r1:64]!
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: vmov s1, r5
+; CHECK-NEXT: vmov s0, r6
+; CHECK-NEXT: vst1.32 {d0}, [r4:64]!
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB-LABEL: test_modf_v3f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB-NEXT: sub sp, #12
+; THUMB-NEXT: mov r7, r3
+; THUMB-NEXT: mov r5, r2
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: ldr r0, [sp, #32]
+; THUMB-NEXT: add r1, sp, #8
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r6, r0
+; THUMB-NEXT: ldr r0, [sp, #8]
+; THUMB-NEXT: str r0, [r4, #24]
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: mov r0, r7
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r7, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: str r0, [r4, #20]
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: mov r0, r5
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r1, [sp]
+; THUMB-NEXT: str r1, [r4, #16]
+; THUMB-NEXT: stm r4!, {r0, r7}
+; THUMB-NEXT: str r6, [r4]
+; THUMB-NEXT: add sp, #12
+; THUMB-NEXT: pop {r4, r5, r6, r7, pc}
+ %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_modf_v2f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: vldr s1, [sp]
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: vldr s0, [sp, #4]
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_v2f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r4, r1
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r5, r0
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: mo...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/128055
More information about the llvm-commits
mailing list