[llvm] [SDAG] Add missing float type legalizations for FMODF (PR #128055)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 07:35:46 PST 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/128055
>From bdbf1d07dcdc0f5f69de3d83780ba46483d25d4e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 20 Feb 2025 09:16:42 +0000
Subject: [PATCH 01/14] [SDAG] Fix llvm.modf for ppc_fp128 (attempt two)
Apparently `DAG.getRoot()` can return null, so we need to check that
case. Hopefully fixes: https://lab.llvm.org/buildbot/#/builders/72/builds/8406
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +++-
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 22 +++++++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9e61df7047d4a..e7f155fd23c89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2644,8 +2644,10 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// optimized out. This prevents an FP stack pop from being emitted for it.
// Setting the root like this ensures there will be a use of the
// `CopyFromReg` chain, and ensures the FP pop will be emitted.
+ SDValue OldRoot = getRoot();
SDValue NewRoot =
- getNode(ISD::TokenFactor, DL, MVT::Other, getRoot(), CallChain);
+ OldRoot ? getNode(ISD::TokenFactor, DL, MVT::Other, OldRoot, CallChain)
+ : CallChain;
setRoot(NewRoot);
// Ensure the new root is reachable from the results.
Results[0] = getMergeValues({Results[0], NewRoot}, DL);
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 69e3b22c7352c..a3f8a9907a46a 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -328,3 +328,25 @@ define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) {
%result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
ret { ppc_fp128, ppc_fp128 } %result
}
+
+define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128_only_use_intergral:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f1, 32(r1)
+; CHECK-NEXT: lfd f2, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
+ ret ppc_fp128 %result.1
+}
>From 2cb8a2a9bb80d7376d80a89952d8aaad1cefeb4c Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 20 Feb 2025 12:19:46 +0000
Subject: [PATCH 02/14] Fixups
---
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 17 +++-
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 83 +++++++++++++++++++
2 files changed, 98 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index b6abad830c371..3a2d69d8a8eca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -435,8 +435,21 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- // If the root changed (e.g. it was a dead load) update the root.
- DAG.setRoot(Dummy.getValue());
+ // Get the value of the original root after type legalization.
+ SDValue Root = Dummy.getValue();
+
+ // Get the current root value, if it's not null combine it with the original
+ // root to prevent it being removed as a dead node.
+ if (SDValue LegalRoot = DAG.getRoot()) {
+ Root = DAG.getNode(ISD::TokenFactor, SDLoc(LegalRoot), MVT::Other, Root,
+ LegalRoot);
+ // The token_factor should not need any legalization (as both inputs have
+ // already been legalized).
+ Root->setNodeId(Processed);
+ }
+
+ // Restore the root.
+ DAG.setRoot(Root);
// Remove dead nodes. This is important to do for cleanliness but also before
// the checking loop below. Implicit folding by the DAG.getNode operators and
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index a3f8a9907a46a..1b137c786cc91 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -350,3 +350,86 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) {
%result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
ret ppc_fp128 %result.1
}
+
+define ppc_fp128 @test_modf_ppcf128_only_use_fractional(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128_only_use_fractional:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
+ ret ppc_fp128 %result.1
+}
+
+define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128_tail_call(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128_tail_call:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f3, 32(r1)
+; CHECK-NEXT: lfd f4, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ ret { ppc_fp128, ppc_fp128 } %result
+}
+
+define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128_only_use_intergral_tail_call:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: lfd f1, 32(r1)
+; CHECK-NEXT: lfd f2, 40(r1)
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
+ ret ppc_fp128 %result.1
+}
+
+define ppc_fp128 @test_modf_ppcf128_only_use_fractional_tail_call(ppc_fp128 %a) {
+; CHECK-LABEL: test_modf_ppcf128_only_use_fractional_tail_call:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: std r0, 64(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: addi r5, r1, 32
+; CHECK-NEXT: bl modfl
+; CHECK-NEXT: nop
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+ %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
+ %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
+ ret ppc_fp128 %result.1
+}
>From 7fbe2dd356cabbcca19bc383bc27257d737d71b8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 20 Feb 2025 14:20:20 +0000
Subject: [PATCH 03/14] Add missing soften for modf
---
.../SelectionDAG/LegalizeFloatTypes.cpp | 44 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 +-
llvm/test/CodeGen/ARM/llvm.modf.ll | 504 ++++++++++++++++++
3 files changed, 539 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/llvm.modf.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 9fbcb5bc31537..21ab3b1dc3094 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -132,6 +132,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break;
+ case ISD::FMODF: R = SoftenFloatRes_FMODF(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -791,27 +792,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
return ReturnVal;
}
-SDValue
-DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N,
- RTLIB::Libcall LC) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
EVT VT = N->getValueType(0);
+ assert(VT == N->getValueType(1) &&
+ "expected both return values to have the same type");
+
if (!TLI.getLibcallName(LC))
return SDValue();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue FirstResultSlot = DAG.CreateStackTemporary(NVT);
- SDValue SecondResultSlot = DAG.CreateStackTemporary(NVT);
SDLoc DL(N);
- TargetLowering::MakeLibCallOptions CallOptions;
- std::array Ops{GetSoftenedFloat(N->getOperand(0)), FirstResultSlot,
- SecondResultSlot};
- std::array OpsVT{VT, FirstResultSlot.getValueType(),
- SecondResultSlot.getValueType()};
+ SmallVector<SDValue, 3> Ops = {GetSoftenedFloat(N->getOperand(0))};
+ SmallVector<EVT, 3> OpsVT = {VT};
+
+ std::array<SDValue, 2> StackSlots;
+ for (auto [ResNum, _] : enumerate(N->values())) {
+ if (ResNum == CallRetResNo)
+ continue;
+ SDValue StackSlot = DAG.CreateStackTemporary(NVT);
+ Ops.push_back(StackSlot);
+ OpsVT.push_back(StackSlot.getValueType());
+ StackSlots[ResNum] = StackSlot;
+ }
+ TargetLowering::MakeLibCallOptions CallOptions;
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
// but since both returns have the same type it should be okay.
CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
@@ -825,8 +834,14 @@ DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo);
};
- SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(FirstResultSlot));
- SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(SecondResultSlot));
+
+ for (auto [ResNum, SlackSlot] : enumerate(StackSlots)) {
+ if (CallRetResNo == ResNum) {
+ SetSoftenedFloat(SDValue(N, ResNum), ReturnVal);
+ continue;
+ }
+ SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot));
+ }
return SDValue();
}
@@ -836,6 +851,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
N, RTLIB::getSINCOS(N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMODF(SDNode *N) {
+ return SoftenFloatRes_UnaryWithTwoFPResults(
+ N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 74d7210743372..50247cebb91b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -562,7 +562,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
// Convert Float Results to Integer.
void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
- SDValue SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_UnaryWithTwoFPResults(
+ SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
@@ -608,6 +609,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_ExpOp(SDNode *N);
SDValue SoftenFloatRes_FFREXP(SDNode *N);
SDValue SoftenFloatRes_FSINCOS(SDNode *N);
+ SDValue SoftenFloatRes_FMODF(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll
new file mode 100644
index 0000000000000..66f6c9b9383a7
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llvm.modf.ll
@@ -0,0 +1,504 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
+
+define { half, half } @test_modf_f16(half %a) {
+; CHECK-LABEL: test_modf_f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r1, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_f16:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r1, r0
+; THUMB-NEXT: mov r0, r4
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r4, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_modf_f16_only_use_fractional_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f16_only_use_fractional_part:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_modf_f16_only_use_integral_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_integral_part:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f16_only_use_integral_part:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_modf_v2f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #14]
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #12]
+; CHECK-NEXT: add r0, sp, #12
+; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32]
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp]
+; CHECK-NEXT: strh.w r0, [sp, #10]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #8]
+; CHECK-NEXT: add r0, sp, #8
+; CHECK-NEXT: vmovl.u16 q9, d8
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d18[0]
+; CHECK-NEXT: vmov.32 r1, d18[1]
+; CHECK-NEXT: vmov.32 r2, d16[0]
+; CHECK-NEXT: vmov.32 r3, d16[1]
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_v2f16:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r6, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r5, r1
+; THUMB-NEXT: uxth r0, r0
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: uxth r0, r5
+; THUMB-NEXT: bl __gnu_h2f_ieee
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r5, r0
+; THUMB-NEXT: ldr r0, [sp]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r6, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: bl __gnu_f2h_ieee
+; THUMB-NEXT: mov r3, r0
+; THUMB-NEXT: mov r0, r4
+; THUMB-NEXT: mov r1, r5
+; THUMB-NEXT: mov r2, r6
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r4, r5, r6, pc}
+ %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_modf_f32(float %a) {
+; CHECK-LABEL: test_modf_f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr r1, [sp, #4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r1, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { float, float } @llvm.modf.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_modf_v3f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vldr d9, [sp, #40]
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: vmov d8, r2, r3
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vldmia sp, {s0, s1}
+; CHECK-NEXT: add.w r1, r4, #16
+; CHECK-NEXT: vst1.32 {d0}, [r1:64]!
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: vmov s1, r5
+; CHECK-NEXT: vmov s0, r6
+; CHECK-NEXT: vst1.32 {d0}, [r4:64]!
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB-LABEL: test_modf_v3f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB-NEXT: sub sp, #12
+; THUMB-NEXT: mov r7, r3
+; THUMB-NEXT: mov r5, r2
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: ldr r0, [sp, #32]
+; THUMB-NEXT: add r1, sp, #8
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r6, r0
+; THUMB-NEXT: ldr r0, [sp, #8]
+; THUMB-NEXT: str r0, [r4, #24]
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: mov r0, r7
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r7, r0
+; THUMB-NEXT: ldr r0, [sp, #4]
+; THUMB-NEXT: str r0, [r4, #20]
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: mov r0, r5
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: ldr r1, [sp]
+; THUMB-NEXT: str r1, [r4, #16]
+; THUMB-NEXT: stm r4!, {r0, r7}
+; THUMB-NEXT: str r6, [r4]
+; THUMB-NEXT: add sp, #12
+; THUMB-NEXT: pop {r4, r5, r6, r7, pc}
+ %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_modf_v2f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: vldr s1, [sp]
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: vldr s0, [sp, #4]
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_v2f32:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r4, r1
+; THUMB-NEXT: mov r1, sp
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r5, r0
+; THUMB-NEXT: add r1, sp, #4
+; THUMB-NEXT: mov r0, r4
+; THUMB-NEXT: bl modff
+; THUMB-NEXT: mov r1, r0
+; THUMB-NEXT: ldr r2, [sp]
+; THUMB-NEXT: ldr r3, [sp, #4]
+; THUMB-NEXT: mov r0, r5
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r4, r5, r7, pc}
+ %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_modf_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: mov r3, r2
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldrd r12, r3, [sp, #40]
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vldr d16, [sp, #8]
+; CHECK-NEXT: vst1.64 {d8, d9}, [r4]!
+; CHECK-NEXT: vst1.64 {d16, d17}, [r4]
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, pc}
+;
+; THUMB-LABEL: test_modf_v2f64:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB-NEXT: sub sp, #28
+; THUMB-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; THUMB-NEXT: mov r7, r2
+; THUMB-NEXT: mov r4, r0
+; THUMB-NEXT: ldr r0, [sp, #48]
+; THUMB-NEXT: ldr r1, [sp, #52]
+; THUMB-NEXT: add r2, sp, #16
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: mov r6, r0
+; THUMB-NEXT: mov r5, r1
+; THUMB-NEXT: ldr r0, [sp, #20]
+; THUMB-NEXT: str r0, [r4, #28]
+; THUMB-NEXT: ldr r0, [sp, #16]
+; THUMB-NEXT: str r0, [r4, #24]
+; THUMB-NEXT: add r2, sp, #8
+; THUMB-NEXT: mov r0, r7
+; THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: ldr r2, [sp, #12]
+; THUMB-NEXT: str r2, [r4, #20]
+; THUMB-NEXT: ldr r2, [sp, #8]
+; THUMB-NEXT: str r2, [r4, #16]
+; THUMB-NEXT: str r5, [r4, #12]
+; THUMB-NEXT: stm r4!, {r0, r1, r6}
+; THUMB-NEXT: add sp, #28
+; THUMB-NEXT: pop {r4, r5, r6, r7, pc}
+ %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define { double, double } @test_modf_f64(double %a) {
+; CHECK-LABEL: test_modf_f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldrd r2, r3, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: ldr r2, [sp]
+; THUMB-NEXT: ldr r3, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ ret { double, double } %result
+}
+
+define double @test_modf_f64_only_use_intergral(double %a) {
+; CHECK-LABEL: test_modf_f64_only_use_intergral:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldrd r0, r1, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64_only_use_intergral:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: ldr r0, [sp]
+; THUMB-NEXT: ldr r1, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 1
+ ret double %result.1
+}
+
+define double @test_modf_f64_only_use_fractional(double %a) {
+; CHECK-LABEL: test_modf_f64_only_use_fractional:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64_only_use_fractional:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 0
+ ret double %result.1
+}
+
+define { double, double } @test_modf_f64_tail_call(double %a) {
+; CHECK-LABEL: test_modf_f64_tail_call:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldrd r2, r3, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64_tail_call:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: ldr r2, [sp]
+; THUMB-NEXT: ldr r3, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = tail call { double, double } @llvm.modf.f64(double %a)
+ ret { double, double } %result
+}
+
+define double @test_modf_f64_only_use_intergral_tail_call(double %a) {
+; CHECK-LABEL: test_modf_f64_only_use_intergral_tail_call:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldrd r0, r1, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64_only_use_intergral_tail_call:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: ldr r0, [sp]
+; THUMB-NEXT: ldr r1, [sp, #4]
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = tail call { double, double } @llvm.modf.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 1
+ ret double %result.1
+}
+
+define double @test_modf_f64_only_use_fractional_tail_call(double %a) {
+; CHECK-LABEL: test_modf_f64_only_use_fractional_tail_call:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+;
+; THUMB-LABEL: test_modf_f64_only_use_fractional_tail_call:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: push {r7, lr}
+; THUMB-NEXT: sub sp, #8
+; THUMB-NEXT: mov r2, sp
+; THUMB-NEXT: bl modf
+; THUMB-NEXT: add sp, #8
+; THUMB-NEXT: pop {r7, pc}
+ %result = tail call { double, double } @llvm.modf.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 0
+ ret double %result.1
+}
>From 699a242521d019642f309f770fbe34d73e0fcda6 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 20 Feb 2025 19:21:19 +0000
Subject: [PATCH 04/14] Alternate fake_use missing pop fix
---
.../CodeGen/SelectionDAG/LegalizeTypes.cpp | 17 +-----
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 32 +++++-----
llvm/test/CodeGen/AArch64/llvm.modf.ll | 1 +
llvm/test/CodeGen/ARM/llvm.frexp.ll | 18 +++++-
llvm/test/CodeGen/ARM/llvm.modf.ll | 3 +
llvm/test/CodeGen/PowerPC/llvm.frexp.ll | 5 ++
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 3 +
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 58 ++++++++++++++-----
llvm/test/CodeGen/X86/llvm.frexp.f80.ll | 1 +
llvm/test/CodeGen/X86/llvm.frexp.ll | 13 +++++
10 files changed, 100 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3a2d69d8a8eca..b6abad830c371 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -435,21 +435,8 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- // Get the value of the original root after type legalization.
- SDValue Root = Dummy.getValue();
-
- // Get the current root value, if it's not null combine it with the original
- // root to prevent it being removed as a dead node.
- if (SDValue LegalRoot = DAG.getRoot()) {
- Root = DAG.getNode(ISD::TokenFactor, SDLoc(LegalRoot), MVT::Other, Root,
- LegalRoot);
- // The token_factor should not need any legalization (as both inputs have
- // already been legalized).
- Root->setNodeId(Processed);
- }
-
- // Restore the root.
- DAG.setRoot(Root);
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
// Remove dead nodes. This is important to do for cleanliness but also before
// the checking loop below. Implicit folding by the DAG.getNode operators and
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e7f155fd23c89..6c29635e4642e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2616,6 +2616,20 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
+ if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
+ // FIXME: This is needed for x87, which uses a floating-point stack. If (for
+ // example) the node to be expanded has two results one floating-point which
+ // is returned by the call, and one integer result, returned via an output
+ // pointer. If only the integer result is used then the `CopyFromReg` for
+ // the FP result may be optimized out. This prevents an FP stack pop from
+ // being emitted for it. The `FAKE_USE` node prevents optimizations from
+ // removing the `CopyFromReg` from the chain, and ensures the FP pop will be
+ // emitted. Note: We use an undef pointer as the argument to prevent keeping
+ // any real values live longer than we need to.
+ CallChain = getNode(ISD::FAKE_USE, DL, MVT::Other, CallChain,
+ getUNDEF(TLI->getPointerTy(getDataLayout())));
+ }
+
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
if (ResNo == CallRetResNo) {
Results.push_back(Call);
@@ -2635,24 +2649,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
Results.push_back(LoadResult);
}
- if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
- // FIXME: Find a way to avoid updating the root. This is needed for x86,
- // which uses a floating-point stack. If (for example) the node to be
- // expanded has two results one floating-point which is returned by the
- // call, and one integer result, returned via an output pointer. If only the
- // integer result is used then the `CopyFromReg` for the FP result may be
- // optimized out. This prevents an FP stack pop from being emitted for it.
- // Setting the root like this ensures there will be a use of the
- // `CopyFromReg` chain, and ensures the FP pop will be emitted.
- SDValue OldRoot = getRoot();
- SDValue NewRoot =
- OldRoot ? getNode(ISD::TokenFactor, DL, MVT::Other, OldRoot, CallChain)
- : CallChain;
- setRoot(NewRoot);
- // Ensure the new root is reachable from the results.
- Results[0] = getMergeValues({Results[0], NewRoot}, DL);
- }
-
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll
index 41fe796daca86..ad746c6aef4fe 100644
--- a/llvm/test/CodeGen/AArch64/llvm.modf.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll
@@ -45,6 +45,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: bl modff
+; CHECK-NEXT: // fake_use: $x0
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll
index e79ddbe93336e..179467120687d 100644
--- a/llvm/test/CodeGen/ARM/llvm.frexp.ll
+++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll
@@ -41,6 +41,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -132,6 +133,8 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r5:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -190,6 +193,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -265,6 +269,8 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) {
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r4:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -376,8 +382,13 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: ldrd r1, r0, [sp, #8]
-; CHECK-NEXT: ldrd r3, r2, [sp], #16
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldr r0, [sp, #12]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldrd r2, r1, [sp, #4]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: ldr r3, [sp], #16
; CHECK-NEXT: pop {r4, r5, r6, pc}
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1
@@ -419,6 +430,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r2, sp, #4
; CHECK-NEXT: bl frexp
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -498,6 +510,8 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl frexp
; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
+; CHECK-NEXT: @ fake_use: $r0
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r7:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #12
diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll
index 66f6c9b9383a7..39996cd55772a 100644
--- a/llvm/test/CodeGen/ARM/llvm.modf.ll
+++ b/llvm/test/CodeGen/ARM/llvm.modf.ll
@@ -75,6 +75,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl modff
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: add sp, #8
@@ -390,6 +391,7 @@ define double @test_modf_f64_only_use_intergral(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: bl modf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldrd r0, r1, [sp], #8
; CHECK-NEXT: pop {r7, pc}
;
@@ -462,6 +464,7 @@ define double @test_modf_f64_only_use_intergral_tail_call(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: bl modf
+; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldrd r0, r1, [sp], #8
; CHECK-NEXT: pop {r7, pc}
;
diff --git a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
index 2c522c4d10cda..8b7cfff2a048b 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
@@ -68,6 +68,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -207,6 +208,8 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfiwzx f0, 0, r30
; CHECK-NEXT: lfiwzx f1, 0, r29
; CHECK-NEXT: xxmrghw v2, vs1, vs0
@@ -273,6 +276,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -352,6 +356,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: bl frexp
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 1b137c786cc91..f4526fec560b5 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -68,6 +68,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfs f1, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -340,6 +341,7 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) {
; CHECK-NEXT: addi r5, r1, 32
; CHECK-NEXT: bl modfl
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfd f1, 32(r1)
; CHECK-NEXT: lfd f2, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
@@ -403,6 +405,7 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) {
; CHECK-NEXT: addi r5, r1, 32
; CHECK-NEXT: bl modfl
; CHECK-NEXT: nop
+; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfd f1, 32(r1)
; CHECK-NEXT: lfd f2, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 4a77b4d32cdda..62b04f42fd07e 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -225,6 +225,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV32IFD-NEXT: call __extendhfsf2
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -237,6 +238,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexpf
+; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -249,6 +251,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: call __extendhfsf2
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -261,6 +264,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: call __extendhfsf2
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -458,6 +462,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -469,6 +474,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexpf
+; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -480,6 +486,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -491,6 +498,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -1086,22 +1094,24 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fmv.s fs0, fa2
-; RV32IFD-NEXT: fmv.s fs1, fa1
-; RV32IFD-NEXT: fmv.s fs2, fa0
+; RV32IFD-NEXT: fmv.s fs0, fa3
+; RV32IFD-NEXT: fmv.s fs1, fa2
+; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: addi a0, a0, 12
-; RV32IFD-NEXT: fmv.s fa0, fa3
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 8
+; RV32IFD-NEXT: addi a0, s0, 12
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 4
+; RV32IFD-NEXT: addi a0, s0, 8
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: addi a0, s0, 4
; RV32IFD-NEXT: fmv.s fa0, fs2
-; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: # fake_use: $x10
+; RV32IFD-NEXT: # fake_use: $x10
+; RV32IFD-NEXT: # fake_use: $x10
+; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
@@ -1133,6 +1143,10 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IFD-NEXT: addi a0, sp, 32
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call frexpf
+; RV64IFD-NEXT: # fake_use: $x10
+; RV64IFD-NEXT: # fake_use: $x10
+; RV64IFD-NEXT: # fake_use: $x10
+; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 8(sp)
; RV64IFD-NEXT: ld a1, 16(sp)
; RV64IFD-NEXT: ld a2, 24(sp)
@@ -1157,22 +1171,26 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: mv s0, a3
-; RV32IZFINXZDINX-NEXT: mv s1, a2
-; RV32IZFINXZDINX-NEXT: mv s2, a1
+; RV32IZFINXZDINX-NEXT: mv s0, a4
+; RV32IZFINXZDINX-NEXT: mv s1, a3
+; RV32IZFINXZDINX-NEXT: mv s2, a2
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: addi a1, a0, 12
-; RV32IZFINXZDINX-NEXT: mv a0, a4
+; RV32IZFINXZDINX-NEXT: mv a0, a1
+; RV32IZFINXZDINX-NEXT: mv a1, s3
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
+; RV32IZFINXZDINX-NEXT: addi a1, s3, 12
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
+; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
; RV32IZFINXZDINX-NEXT: mv a0, s2
-; RV32IZFINXZDINX-NEXT: mv a1, s3
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1206,6 +1224,10 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IZFINXZDINX-NEXT: addi a1, sp, 32
; RV64IZFINXZDINX-NEXT: mv a0, s0
; RV64IZFINXZDINX-NEXT: call frexpf
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 8(sp)
; RV64IZFINXZDINX-NEXT: ld a1, 16(sp)
; RV64IZFINXZDINX-NEXT: ld a2, 24(sp)
@@ -1459,6 +1481,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexp
+; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -1470,6 +1493,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexp
+; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -1481,6 +1505,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: addi a2, sp, 8
; RV32IZFINXZDINX-NEXT: call frexp
+; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -1492,6 +1517,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexp
+; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
index 92d8c53c8182e..f73770856623a 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
@@ -66,6 +66,7 @@ define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) {
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpl at PLT
; X64-NEXT: fstp %st(0)
+; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index 8436c1052552e..fdb883853468e 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -152,6 +152,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
+; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
@@ -242,6 +243,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf at PLT
+; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
@@ -256,6 +258,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
+; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
@@ -476,6 +479,10 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf at PLT
+; X64-NEXT: # fake_use: $rax
+; X64-NEXT: # fake_use: $rax
+; X64-NEXT: # fake_use: $rax
+; X64-NEXT: # fake_use: $rax
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
@@ -513,8 +520,12 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
+; WIN32-NEXT: # fake_use: $eax
+; WIN32-NEXT: # fake_use: $eax
+; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
+; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: popl %esi
@@ -584,6 +595,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexp at PLT
+; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
@@ -598,6 +610,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
+; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
>From b5a3a58deb4c4cd0b9d43491c1a70403008f6a31 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 20 Feb 2025 19:50:52 +0000
Subject: [PATCH 05/14] Update comment to avoid undef bot false positive?
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6c29635e4642e..9f9ea0f47a001 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2624,7 +2624,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// the FP result may be optimized out. This prevents an FP stack pop from
// being emitted for it. The `FAKE_USE` node prevents optimizations from
// removing the `CopyFromReg` from the chain, and ensures the FP pop will be
- // emitted. Note: We use an undef pointer as the argument to prevent keeping
+ // emitted. Note: We use an UNDEF pointer as the argument to prevent keeping
// any real values live longer than we need to.
CallChain = getNode(ISD::FAKE_USE, DL, MVT::Other, CallChain,
getUNDEF(TLI->getPointerTy(getDataLayout())));
>From 7d013a8b33a5d636064a4b6b90b1eb9d44f251e0 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 11:00:09 +0000
Subject: [PATCH 06/14] Add and use new CHAIN_BARRIER node
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 5 ++++
llvm/include/llvm/CodeGen/SelectionDAGISel.h | 1 +
llvm/include/llvm/Support/TargetOpcodes.def | 3 ++
llvm/include/llvm/Target/Target.td | 13 +++++++++
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 +++----
.../SelectionDAG/SelectionDAGDumper.cpp | 2 ++
.../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 7 +++++
.../GlobalISel/legalizer-info-validation.mir | 4 +--
llvm/test/CodeGen/AArch64/llvm.modf.ll | 1 -
llvm/test/CodeGen/ARM/llvm.frexp.ll | 18 ++----------
llvm/test/CodeGen/ARM/llvm.modf.ll | 3 --
llvm/test/CodeGen/PowerPC/llvm.frexp.ll | 5 ----
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 3 --
.../GlobalISel/legalizer-info-validation.mir | 6 ++--
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 28 -------------------
llvm/test/CodeGen/X86/llvm.frexp.f80.ll | 1 -
llvm/test/CodeGen/X86/llvm.frexp.ll | 13 ---------
17 files changed, 42 insertions(+), 81 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 59f31f8443947..27332e1cec503 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1400,6 +1400,11 @@ enum NodeType {
/// debugging purposes.
FAKE_USE,
+ /// OutChain = CHAIN_BARRIER(InChain) marks that optimizations should not
+ /// optimize any users of a chain that contains a CHAIN_BARRIER use a new
+ /// chain from a point earlier than the CHAIN_BARRIER.
+ CHAIN_BARRIER,
+
/// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
/// beginning and end of GC transition sequence, and carry arbitrary
/// information that target might need for lowering. The first operand is
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..eabaeab721991 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -472,6 +472,7 @@ class SelectionDAGISel {
void Select_WRITE_REGISTER(SDNode *Op);
void Select_UNDEF(SDNode *N);
void Select_FAKE_USE(SDNode *N);
+ void Select_CHAIN_BARRIER(SDNode *N);
void CannotYetSelect(SDNode *N);
void Select_FREEZE(SDNode *N);
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 5ef3707b81fe9..9334f560db036 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -225,6 +225,9 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
/// Represents a use of the operand but generates no code.
HANDLE_TARGET_OPCODE(FAKE_USE)
+/// Represents a point in a chain optimizations cannot remove.
+HANDLE_TARGET_OPCODE(CHAIN_BARRIER)
+
// This is a fence with the singlethread scope. It represents a compiler memory
// barrier, but does not correspond to any generated instruction.
HANDLE_TARGET_OPCODE(MEMBARRIER)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index e8b460aaf803b..24c8abdcd888f 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1440,6 +1440,19 @@ def FAKE_USE : StandardPseudoInstruction {
let hasSideEffects = 0;
let isMeta = true;
}
+
+
+def CHAIN_BARRIER : StandardPseudoInstruction {
+ // An instruction that takes a chain to mark that optimizations should not
+ // optimize any users of a chain that contains a CHAIN_BARRIER use a new chain
+ // from a point earlier than the CHAIN_BARRIER.
+ let OutOperandList = (outs);
+ let InOperandList = (ins variable_ops);
+ let AsmString = "CHAIN_BARRIER";
+ let hasSideEffects = 0;
+ let isMeta = true;
+}
+
def PATCHABLE_OP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9f9ea0f47a001..1192dbc1d6abf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2622,12 +2622,10 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// is returned by the call, and one integer result, returned via an output
// pointer. If only the integer result is used then the `CopyFromReg` for
// the FP result may be optimized out. This prevents an FP stack pop from
- // being emitted for it. The `FAKE_USE` node prevents optimizations from
- // removing the `CopyFromReg` from the chain, and ensures the FP pop will be
- // emitted. Note: We use an UNDEF pointer as the argument to prevent keeping
- // any real values live longer than we need to.
- CallChain = getNode(ISD::FAKE_USE, DL, MVT::Other, CallChain,
- getUNDEF(TLI->getPointerTy(getDataLayout())));
+ // being emitted for it. The `CHAIN_BARRIER` node prevents optimizations
+ // from removing the `CopyFromReg` from the chain, and ensures the FP pop
+ // will be emitted.
+ CallChain = getNode(ISD::CHAIN_BARRIER, DL, MVT::Other, CallChain);
}
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 8457bee3f665b..22fbe84c41606 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -462,6 +462,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::FAKE_USE:
return "fake_use";
+ case ISD::CHAIN_BARRIER:
+ return "chain_barrier";
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 61e5aa270bc11..6bd5cfa02611c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2506,6 +2506,10 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0),
N->getOperand(1), N->getOperand(0));
}
+void SelectionDAGISel::Select_CHAIN_BARRIER(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CHAIN_BARRIER, N->getValueType(0),
+ N->getOperand(0));
+}
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
@@ -3281,6 +3285,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FAKE_USE:
Select_FAKE_USE(NodeToMatch);
return;
+ case ISD::CHAIN_BARRIER:
+ Select_CHAIN_BARRIER(NodeToMatch);
+ return;
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 0260e65520774..141c94224a615 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -70,11 +70,11 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
-# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
-# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
+# DEBUG-NEXT:G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll
index ad746c6aef4fe..41fe796daca86 100644
--- a/llvm/test/CodeGen/AArch64/llvm.modf.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll
@@ -45,7 +45,6 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: bl modff
-; CHECK-NEXT: // fake_use: $x0
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/ARM/llvm.frexp.ll b/llvm/test/CodeGen/ARM/llvm.frexp.ll
index 179467120687d..e79ddbe93336e 100644
--- a/llvm/test/CodeGen/ARM/llvm.frexp.ll
+++ b/llvm/test/CodeGen/ARM/llvm.frexp.ll
@@ -41,7 +41,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -133,8 +132,6 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r5:32]
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -193,7 +190,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -269,8 +265,6 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) {
; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: vld1.32 {d16[0]}, [r4:32]
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #8
@@ -382,13 +376,8 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl frexpf
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: ldr r0, [sp, #12]
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: ldrd r2, r1, [sp, #4]
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: ldr r3, [sp], #16
+; CHECK-NEXT: ldrd r1, r0, [sp, #8]
+; CHECK-NEXT: ldrd r3, r2, [sp], #16
; CHECK-NEXT: pop {r4, r5, r6, pc}
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1
@@ -430,7 +419,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: add r2, sp, #4
; CHECK-NEXT: bl frexp
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r7, pc}
@@ -510,8 +498,6 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl frexp
; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
-; CHECK-NEXT: @ fake_use: $r0
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: vld1.32 {d16[1]}, [r7:32]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: add sp, #12
diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll
index 39996cd55772a..66f6c9b9383a7 100644
--- a/llvm/test/CodeGen/ARM/llvm.modf.ll
+++ b/llvm/test/CodeGen/ARM/llvm.modf.ll
@@ -75,7 +75,6 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: add r1, sp, #4
; CHECK-NEXT: bl modff
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldr r0, [sp, #4]
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: add sp, #8
@@ -391,7 +390,6 @@ define double @test_modf_f64_only_use_intergral(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: bl modf
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldrd r0, r1, [sp], #8
; CHECK-NEXT: pop {r7, pc}
;
@@ -464,7 +462,6 @@ define double @test_modf_f64_only_use_intergral_tail_call(double %a) {
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r2, sp
; CHECK-NEXT: bl modf
-; CHECK-NEXT: @ fake_use: $r0
; CHECK-NEXT: ldrd r0, r1, [sp], #8
; CHECK-NEXT: pop {r7, pc}
;
diff --git a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
index 8b7cfff2a048b..2c522c4d10cda 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.frexp.ll
@@ -68,7 +68,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -208,8 +207,6 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
; CHECK-NEXT: mr r4, r29
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfiwzx f0, 0, r30
; CHECK-NEXT: lfiwzx f1, 0, r29
; CHECK-NEXT: xxmrghw v2, vs1, vs0
@@ -276,7 +273,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: bl frexpf
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -356,7 +352,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; CHECK-NEXT: addi r4, r1, 44
; CHECK-NEXT: bl frexp
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lwz r3, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index f4526fec560b5..1b137c786cc91 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -68,7 +68,6 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
; CHECK-NEXT: xscvhpdp f1, f0
; CHECK-NEXT: bl modff
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfs f1, 44(r1)
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
@@ -341,7 +340,6 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) {
; CHECK-NEXT: addi r5, r1, 32
; CHECK-NEXT: bl modfl
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfd f1, 32(r1)
; CHECK-NEXT: lfd f2, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
@@ -405,7 +403,6 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) {
; CHECK-NEXT: addi r5, r1, 32
; CHECK-NEXT: bl modfl
; CHECK-NEXT: nop
-; CHECK-NEXT: # fake_use: $x3
; CHECK-NEXT: lfd f1, 32(r1)
; CHECK-NEXT: lfd f2, 40(r1)
; CHECK-NEXT: addi r1, r1, 48
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index dbc13840a0265..259fe8cdf3df0 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -72,11 +72,11 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
-# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
+# DEBUG-NEXT:G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
@@ -579,7 +579,7 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index
-# DEBUG-NEXT: .. opcode 212 is aliased to 213
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 62b04f42fd07e..4bb855e415fcb 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -225,7 +225,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV32IFD-NEXT: call __extendhfsf2
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -238,7 +237,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -251,7 +249,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: call __extendhfsf2
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -264,7 +261,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: call __extendhfsf2
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -462,7 +458,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -474,7 +469,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -486,7 +480,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -498,7 +491,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -1108,10 +1100,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-NEXT: addi a0, s0, 4
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: # fake_use: $x10
-; RV32IFD-NEXT: # fake_use: $x10
-; RV32IFD-NEXT: # fake_use: $x10
-; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
@@ -1143,10 +1131,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IFD-NEXT: addi a0, sp, 32
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: # fake_use: $x10
-; RV64IFD-NEXT: # fake_use: $x10
-; RV64IFD-NEXT: # fake_use: $x10
-; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 8(sp)
; RV64IFD-NEXT: ld a1, 16(sp)
; RV64IFD-NEXT: ld a2, 24(sp)
@@ -1187,10 +1171,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1224,10 +1204,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IZFINXZDINX-NEXT: addi a1, sp, 32
; RV64IZFINXZDINX-NEXT: mv a0, s0
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 8(sp)
; RV64IZFINXZDINX-NEXT: ld a1, 16(sp)
; RV64IZFINXZDINX-NEXT: ld a2, 24(sp)
@@ -1481,7 +1457,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexp
-; RV32IFD-NEXT: # fake_use: $x10
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
@@ -1493,7 +1468,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: mv a0, sp
; RV64IFD-NEXT: call frexp
-; RV64IFD-NEXT: # fake_use: $x10
; RV64IFD-NEXT: ld a0, 0(sp)
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: addi sp, sp, 16
@@ -1505,7 +1479,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: addi a2, sp, 8
; RV32IZFINXZDINX-NEXT: call frexp
-; RV32IZFINXZDINX-NEXT: # fake_use: $x10
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
@@ -1517,7 +1490,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) nounwind {
; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexp
-; RV64IZFINXZDINX-NEXT: # fake_use: $x10
; RV64IZFINXZDINX-NEXT: ld a0, 0(sp)
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
index f73770856623a..92d8c53c8182e 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.f80.ll
@@ -66,7 +66,6 @@ define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) {
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpl at PLT
; X64-NEXT: fstp %st(0)
-; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: addq $24, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index fdb883853468e..8436c1052552e 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -152,7 +152,6 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
@@ -243,7 +242,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf at PLT
-; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
@@ -258,7 +256,6 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
@@ -479,10 +476,6 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexpf at PLT
-; X64-NEXT: # fake_use: $rax
-; X64-NEXT: # fake_use: $rax
-; X64-NEXT: # fake_use: $rax
-; X64-NEXT: # fake_use: $rax
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
@@ -520,12 +513,8 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
-; WIN32-NEXT: # fake_use: $eax
-; WIN32-NEXT: # fake_use: $eax
-; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: popl %esi
@@ -595,7 +584,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq frexp at PLT
-; X64-NEXT: # fake_use: $rax
; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
@@ -610,7 +598,6 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: # fake_use: $eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: retl
>From 19192c40ebb7295aa674960a4da12461d302b7b2 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 11:18:47 +0000
Subject: [PATCH 07/14] Remove FIXME
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1192dbc1d6abf..702ed5b29cb61 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2617,7 +2617,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
- // FIXME: This is needed for x87, which uses a floating-point stack. If (for
+ // This is needed for x87, which uses a floating-point stack. If (for
// example) the node to be expanded has two results one floating-point which
// is returned by the call, and one integer result, returned via an output
// pointer. If only the integer result is used then the `CopyFromReg` for
>From 8b711a9989b3265cddef22f6c995855c3abbf648 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 12:21:55 +0000
Subject: [PATCH 08/14] Remove target opcode
---
llvm/include/llvm/Support/TargetOpcodes.def | 3 ---
llvm/include/llvm/Target/Target.td | 11 -----------
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 6 ++++--
.../AArch64/GlobalISel/legalizer-info-validation.mir | 4 ++--
.../RISCV/GlobalISel/legalizer-info-validation.mir | 6 +++---
5 files changed, 9 insertions(+), 21 deletions(-)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 9334f560db036..5ef3707b81fe9 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -225,9 +225,6 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
/// Represents a use of the operand but generates no code.
HANDLE_TARGET_OPCODE(FAKE_USE)
-/// Represents a point in a chain optimizations cannot remove.
-HANDLE_TARGET_OPCODE(CHAIN_BARRIER)
-
// This is a fence with the singlethread scope. It represents a compiler memory
// barrier, but does not correspond to any generated instruction.
HANDLE_TARGET_OPCODE(MEMBARRIER)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 24c8abdcd888f..d062c4237ecee 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1442,17 +1442,6 @@ def FAKE_USE : StandardPseudoInstruction {
}
-def CHAIN_BARRIER : StandardPseudoInstruction {
- // An instruction that takes a chain to mark that optimizations should not
- // optimize any users of a chain that contains a CHAIN_BARRIER use a new chain
- // from a point earlier than the CHAIN_BARRIER.
- let OutOperandList = (outs);
- let InOperandList = (ins variable_ops);
- let AsmString = "CHAIN_BARRIER";
- let hasSideEffects = 0;
- let isMeta = true;
-}
-
def PATCHABLE_OP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6bd5cfa02611c..b81462ba14fa2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2506,9 +2506,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::FAKE_USE, N->getValueType(0),
N->getOperand(1), N->getOperand(0));
}
+
void SelectionDAGISel::Select_CHAIN_BARRIER(SDNode *N) {
- CurDAG->SelectNodeTo(N, TargetOpcode::CHAIN_BARRIER, N->getValueType(0),
- N->getOperand(0));
+ // CHAIN_BARRIER exists only for SDAG. Remove it before lowering to MIs.
+ CurDAG->ReplaceAllUsesWith(SDValue(N, 0), N->getOperand(0));
+ CurDAG->RemoveDeadNode(N);
}
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 141c94224a615..0260e65520774 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -70,11 +70,11 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
-# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
-# DEBUG-NEXT:G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index 259fe8cdf3df0..dbc13840a0265 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -72,11 +72,11 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
-# DEBUG-NEXT:G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
#
@@ -579,7 +579,7 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. opcode 212 is aliased to 213
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index
>From 4a7230ef05398842d24a2a0d116e103949d8373f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 12:23:10 +0000
Subject: [PATCH 09/14] Rm newlines
---
llvm/include/llvm/Target/Target.td | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index d062c4237ecee..e8b460aaf803b 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1440,8 +1440,6 @@ def FAKE_USE : StandardPseudoInstruction {
let hasSideEffects = 0;
let isMeta = true;
}
-
-
def PATCHABLE_OP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
>From b8c9a286c11c619d0200cd222dd14030718f1f31 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 12:40:57 +0000
Subject: [PATCH 10/14] Update tests
---
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 30 ++++++++++++++-------------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 4bb855e415fcb..4a77b4d32cdda 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -1086,19 +1086,21 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fmv.s fs0, fa3
-; RV32IFD-NEXT: fmv.s fs1, fa2
-; RV32IFD-NEXT: fmv.s fs2, fa1
+; RV32IFD-NEXT: fmv.s fs0, fa2
+; RV32IFD-NEXT: fmv.s fs1, fa1
+; RV32IFD-NEXT: fmv.s fs2, fa0
; RV32IFD-NEXT: mv s0, a0
+; RV32IFD-NEXT: addi a0, a0, 12
+; RV32IFD-NEXT: fmv.s fa0, fa3
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 12
+; RV32IFD-NEXT: addi a0, s0, 8
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 8
+; RV32IFD-NEXT: addi a0, s0, 4
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 4
; RV32IFD-NEXT: fmv.s fa0, fs2
+; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -1155,21 +1157,21 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: mv s0, a4
-; RV32IZFINXZDINX-NEXT: mv s1, a3
-; RV32IZFINXZDINX-NEXT: mv s2, a2
+; RV32IZFINXZDINX-NEXT: mv s0, a3
+; RV32IZFINXZDINX-NEXT: mv s1, a2
+; RV32IZFINXZDINX-NEXT: mv s2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: mv a0, a1
-; RV32IZFINXZDINX-NEXT: mv a1, s3
+; RV32IZFINXZDINX-NEXT: addi a1, a0, 12
+; RV32IZFINXZDINX-NEXT: mv a0, a4
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 12
+; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
+; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
; RV32IZFINXZDINX-NEXT: mv a0, s2
+; RV32IZFINXZDINX-NEXT: mv a1, s3
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
>From 0cfeb18e53e1cab0d763d27b87cc0e3382a792b4 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 21 Feb 2025 13:42:13 +0000
Subject: [PATCH 11/14] Verify CHAIN_BARRIER nodes
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 702ed5b29cb61..8728c0f054283 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1182,6 +1182,14 @@ static void VerifySDNode(SDNode *N, const TargetLowering *TLI) {
}
break;
}
+ case ISD::CHAIN_BARRIER: {
+ assert(N->getNumValues() == 1 && "Expected single result!");
+ assert(N->getNumOperands() == 1 && "Expected single operand!");
+ assert(N->getValueType(0) == MVT::Other &&
+ N->getOperand(0).getValueType() == MVT::Other &&
+ "Expected result and operand to be chains!");
+ break;
+ }
}
}
#endif // NDEBUG
>From 51b94f8441f1f558c342ac63bcc0899a059c1c09 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 26 Feb 2025 11:06:50 +0000
Subject: [PATCH 12/14] Fixups
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 ++--
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 27332e1cec503..362102c1d5f6a 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1401,8 +1401,8 @@ enum NodeType {
FAKE_USE,
/// OutChain = CHAIN_BARRIER(InChain) marks that optimizations should not
- /// optimize any users of a chain that contains a CHAIN_BARRIER use a new
- /// chain from a point earlier than the CHAIN_BARRIER.
+ /// optimize any users of a chain that contains a CHAIN_BARRIER to use a chain
+ /// from a point earlier than the CHAIN_BARRIER.
CHAIN_BARRIER,
/// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 21ab3b1dc3094..c2107a73301bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -811,7 +811,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
SmallVector<EVT, 3> OpsVT = {VT};
std::array<SDValue, 2> StackSlots;
- for (auto [ResNum, _] : enumerate(N->values())) {
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ++ResNum) {
if (ResNum == CallRetResNo)
continue;
SDValue StackSlot = DAG.CreateStackTemporary(NVT);
>From 65722fc17768756904638056c314b8f47eb1bccb Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 28 Feb 2025 15:04:49 +0000
Subject: [PATCH 13/14] Use X86ISD::POP_FROM_X87_REG rather than
ISD::CopyFromReg in call lowering
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 5 -----
llvm/include/llvm/CodeGen/SelectionDAGISel.h | 1 -
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ------------------
.../SelectionDAG/SelectionDAGDumper.cpp | 2 --
.../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 9 --------
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 11 ++++++++++
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
llvm/lib/Target/X86/X86ISelLowering.h | 9 ++++++++
llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 21 +++++++++++++++----
llvm/lib/Target/X86/X86InstrFragments.td | 2 ++
10 files changed, 40 insertions(+), 41 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 362102c1d5f6a..59f31f8443947 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1400,11 +1400,6 @@ enum NodeType {
/// debugging purposes.
FAKE_USE,
- /// OutChain = CHAIN_BARRIER(InChain) marks that optimizations should not
- /// optimize any users of a chain that contains a CHAIN_BARRIER to use a chain
- /// from a point earlier than the CHAIN_BARRIER.
- CHAIN_BARRIER,
-
/// GC_TRANSITION_START/GC_TRANSITION_END - These operators mark the
/// beginning and end of GC transition sequence, and carry arbitrary
/// information that target might need for lowering. The first operand is
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index eabaeab721991..e9452a6dc6233 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -472,7 +472,6 @@ class SelectionDAGISel {
void Select_WRITE_REGISTER(SDNode *Op);
void Select_UNDEF(SDNode *N);
void Select_FAKE_USE(SDNode *N);
- void Select_CHAIN_BARRIER(SDNode *N);
void CannotYetSelect(SDNode *N);
void Select_FREEZE(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8728c0f054283..11d0a4e8f56f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1182,14 +1182,6 @@ static void VerifySDNode(SDNode *N, const TargetLowering *TLI) {
}
break;
}
- case ISD::CHAIN_BARRIER: {
- assert(N->getNumValues() == 1 && "Expected single result!");
- assert(N->getNumOperands() == 1 && "Expected single operand!");
- assert(N->getValueType(0) == MVT::Other &&
- N->getOperand(0).getValueType() == MVT::Other &&
- "Expected result and operand to be chains!");
- break;
- }
}
}
#endif // NDEBUG
@@ -2624,18 +2616,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
- if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
- // This is needed for x87, which uses a floating-point stack. If (for
- // example) the node to be expanded has two results one floating-point which
- // is returned by the call, and one integer result, returned via an output
- // pointer. If only the integer result is used then the `CopyFromReg` for
- // the FP result may be optimized out. This prevents an FP stack pop from
- // being emitted for it. The `CHAIN_BARRIER` node prevents optimizations
- // from removing the `CopyFromReg` from the chain, and ensures the FP pop
- // will be emitted.
- CallChain = getNode(ISD::CHAIN_BARRIER, DL, MVT::Other, CallChain);
- }
-
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
if (ResNo == CallRetResNo) {
Results.push_back(Call);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 22fbe84c41606..8457bee3f665b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -462,8 +462,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::FAKE_USE:
return "fake_use";
- case ISD::CHAIN_BARRIER:
- return "chain_barrier";
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b81462ba14fa2..61e5aa270bc11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2507,12 +2507,6 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
N->getOperand(1), N->getOperand(0));
}
-void SelectionDAGISel::Select_CHAIN_BARRIER(SDNode *N) {
- // CHAIN_BARRIER exists only for SDAG. Remove it before lowering to MIs.
- CurDAG->ReplaceAllUsesWith(SDValue(N, 0), N->getOperand(0));
- CurDAG->RemoveDeadNode(N);
-}
-
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
// If FREEZE instruction is added later, the code below must be changed as
@@ -3287,9 +3281,6 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FAKE_USE:
Select_FAKE_USE(NodeToMatch);
return;
- case ISD::CHAIN_BARRIER:
- Select_CHAIN_BARRIER(NodeToMatch);
- return;
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 84bcdae520885..a7a0e84ba2b60 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -6717,6 +6717,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Res);
return;
}
+ case X86ISD::POP_FROM_X87_REG: {
+ SDValue Chain = Node->getOperand(0);
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ SDValue Glue;
+ if (Node->getNumValues() == 3)
+ Glue = Node->getOperand(2);
+ SDValue Copy =
+ CurDAG->getCopyFromReg(Chain, dl, Reg, Node->getValueType(0), Glue);
+ ReplaceNode(Node, Copy.getNode());
+ return;
+ }
}
SelectCode(Node);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 12636f22d8409..127dab8afe5f9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35113,6 +35113,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVTTP2SIS_SAE)
NODE_NAME_CASE(CVTTP2UIS)
NODE_NAME_CASE(MCVTTP2UIS)
+ NODE_NAME_CASE(POP_FROM_X87_REG)
}
return nullptr;
#undef NODE_NAME_CASE
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index fe79fefeed631..4a2b35e9efe7c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -81,6 +81,15 @@ namespace llvm {
// marker instruction.
CALL_RVMARKER,
+ /// The same as ISD::CopyFromReg except that this node makes it explicit
+ /// that it may lower to an x87 FPU stack pop. Optimizations should be more
+ /// cautious when handling this node than a normal CopyFromReg to avoid
+ /// removing a required FPU stack pop. A key requirement is optimizations
+ /// should not optimize any users of a chain that contains a
+ /// POP_FROM_X87_REG to use a chain from a point earlier than the
+ /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
+ POP_FROM_X87_REG,
+
/// X86 compare and logical compare instructions.
CMP,
FCMP,
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index ee4bb758102f4..80b4aeeda1e00 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -1095,6 +1095,15 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
return DAG.getBitcast(ValVT, ValReturned);
}
+static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain,
+ const SDLoc &dl, Register Reg, EVT VT,
+ SDValue Glue) {
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
+ SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
+ return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
+ ArrayRef(Ops, Glue.getNode() ? 3 : 2));
+}
+
/// Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1145,8 +1154,8 @@ SDValue X86TargetLowering::LowerCallResult(
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
bool RoundAfterCopy = false;
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
- isScalarFPTypeInSSEReg(VA.getValVT())) {
+ bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
+ if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
if (!Subtarget.hasX87())
report_fatal_error("X87 register return with X87 disabled");
CopyVT = MVT::f80;
@@ -1160,8 +1169,12 @@ SDValue X86TargetLowering::LowerCallResult(
Val =
getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
} else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
- .getValue(1);
+ Chain =
+ X87Result
+ ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
+ .getValue(1)
+ : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
+ .getValue(1);
Val = Chain.getValue(0);
InGlue = Chain.getValue(2);
}
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index ddbc7c55a6113..3606d2c0955dd 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -210,6 +210,8 @@ def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def X86pop_from_x87_reg : SDNode<"X86ISD::POP_FROM_X87_REG", SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
>From e69f52fc4a97d6439e5dace72ce3ac3744ad0fd4 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 28 Feb 2025 15:33:57 +0000
Subject: [PATCH 14/14] Remove extra legalization
---
.../SelectionDAG/LegalizeFloatTypes.cpp | 80 +--
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 +-
llvm/test/CodeGen/ARM/llvm.modf.ll | 504 ------------------
3 files changed, 15 insertions(+), 572 deletions(-)
delete mode 100644 llvm/test/CodeGen/ARM/llvm.modf.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index c2107a73301bc..71f100bfa0343 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -132,7 +132,6 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break;
- case ISD::FMODF: R = SoftenFloatRes_FMODF(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -792,35 +791,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
return ReturnVal;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
- SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
+SDValue
+DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N,
+ RTLIB::Libcall LC) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
EVT VT = N->getValueType(0);
- assert(VT == N->getValueType(1) &&
- "expected both return values to have the same type");
-
if (!TLI.getLibcallName(LC))
return SDValue();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue FirstResultSlot = DAG.CreateStackTemporary(NVT);
+ SDValue SecondResultSlot = DAG.CreateStackTemporary(NVT);
SDLoc DL(N);
- SmallVector<SDValue, 3> Ops = {GetSoftenedFloat(N->getOperand(0))};
- SmallVector<EVT, 3> OpsVT = {VT};
-
- std::array<SDValue, 2> StackSlots;
- for (unsigned ResNum = 0; ResNum < N->getNumValues(); ++ResNum) {
- if (ResNum == CallRetResNo)
- continue;
- SDValue StackSlot = DAG.CreateStackTemporary(NVT);
- Ops.push_back(StackSlot);
- OpsVT.push_back(StackSlot.getValueType());
- StackSlots[ResNum] = StackSlot;
- }
-
TargetLowering::MakeLibCallOptions CallOptions;
+ std::array Ops{GetSoftenedFloat(N->getOperand(0)), FirstResultSlot,
+ SecondResultSlot};
+ std::array OpsVT{VT, FirstResultSlot.getValueType(),
+ SecondResultSlot.getValueType()};
+
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
// but since both returns have the same type it should be okay.
CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
@@ -834,26 +825,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo);
};
-
- for (auto [ResNum, SlackSlot] : enumerate(StackSlots)) {
- if (CallRetResNo == ResNum) {
- SetSoftenedFloat(SDValue(N, ResNum), ReturnVal);
- continue;
- }
- SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot));
- }
+ SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(FirstResultSlot));
+ SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(SecondResultSlot));
return SDValue();
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
return SoftenFloatRes_UnaryWithTwoFPResults(
- N, RTLIB::getSINCOS(N->getValueType(0)));
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatRes_FMODF(SDNode *N) {
- return SoftenFloatRes_UnaryWithTwoFPResults(
- N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0);
+ N, RTLIB::getFSINCOS(N->getValueType(0)));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -1589,9 +1569,6 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
- case ISD::FMODF: ExpandFloatRes_FMODF(N); break;
- case ISD::FSINCOS: ExpandFloatRes_FSINCOS(N); break;
- case ISD::FSINCOSPI: ExpandFloatRes_FSINCOSPI(N); break;
// clang-format on
}
@@ -1642,32 +1619,6 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
GetPairElements(Tmp.first, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
- ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)),
- /*CallRetResNo=*/0);
-}
-
-void DAGTypeLegalizer::ExpandFloatRes_FSINCOS(SDNode *N) {
- ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getSINCOS(N->getValueType(0)));
-}
-
-void DAGTypeLegalizer::ExpandFloatRes_FSINCOSPI(SDNode *N) {
- ExpandFloatRes_UnaryWithTwoFPResults(N,
- RTLIB::getSINCOSPI(N->getValueType(0)));
-}
-
-void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
- SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
- assert(!N->isStrictFPOpcode() && "strictfp not implemented");
- SmallVector<SDValue> Results;
- DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
- for (auto [ResNo, Res] : enumerate(Results)) {
- SDValue Lo, Hi;
- GetPairElements(Res, Lo, Hi);
- SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
- }
-}
-
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -2815,11 +2766,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
- case ISD::FMODF:
case ISD::FSINCOS:
- case ISD::FSINCOSPI:
R = PromoteFloatRes_UnaryWithTwoFPResults(N);
break;
+
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::STRICT_FP_ROUND:
R = PromoteFloatRes_STRICT_FP_ROUND(N);
@@ -3278,9 +3228,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
- case ISD::FMODF:
case ISD::FSINCOS:
- case ISD::FSINCOSPI:
R = SoftPromoteHalfRes_UnaryWithTwoFPResults(N);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 50247cebb91b1..63efd77455b51 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -562,8 +562,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
// Convert Float Results to Integer.
void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
- SDValue SoftenFloatRes_UnaryWithTwoFPResults(
- SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
+ SDValue SoftenFloatRes_UnaryWithTwoFPResults(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
diff --git a/llvm/test/CodeGen/ARM/llvm.modf.ll b/llvm/test/CodeGen/ARM/llvm.modf.ll
deleted file mode 100644
index 66f6c9b9383a7..0000000000000
--- a/llvm/test/CodeGen/ARM/llvm.modf.ll
+++ /dev/null
@@ -1,504 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
-; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
-
-define { half, half } @test_modf_f16(half %a) {
-; CHECK-LABEL: test_modf_f16:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: ldr r0, [sp, #4]
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: mov r1, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r4, pc}
-;
-; THUMB-LABEL: test_modf_f16:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r4, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: uxth r0, r0
-; THUMB-NEXT: bl __gnu_h2f_ieee
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r4, r0
-; THUMB-NEXT: ldr r0, [sp, #4]
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r1, r0
-; THUMB-NEXT: mov r0, r4
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r4, pc}
- %result = call { half, half } @llvm.modf.f16(half %a)
- ret { half, half } %result
-}
-
-define half @test_modf_f16_only_use_fractional_part(half %a) {
-; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f16_only_use_fractional_part:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: uxth r0, r0
-; THUMB-NEXT: bl __gnu_h2f_ieee
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { half, half } @llvm.modf.f16(half %a)
- %result.0 = extractvalue { half, half } %result, 0
- ret half %result.0
-}
-
-define half @test_modf_f16_only_use_integral_part(half %a) {
-; CHECK-LABEL: test_modf_f16_only_use_integral_part:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: ldr r0, [sp, #4]
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f16_only_use_integral_part:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: uxth r0, r0
-; THUMB-NEXT: bl __gnu_h2f_ieee
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: ldr r0, [sp, #4]
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { half, half } @llvm.modf.f16(half %a)
- %result.1 = extractvalue { half, half } %result, 1
- ret half %result.1
-}
-
-define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
-; CHECK-LABEL: test_modf_v2f16:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vpush {d8}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: strh.w r0, [sp, #14]
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: strh.w r0, [sp, #12]
-; CHECK-NEXT: add r0, sp, #12
-; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32]
-; CHECK-NEXT: ldr r0, [sp, #4]
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: ldr r1, [sp]
-; CHECK-NEXT: strh.w r0, [sp, #10]
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: strh.w r0, [sp, #8]
-; CHECK-NEXT: add r0, sp, #8
-; CHECK-NEXT: vmovl.u16 q9, d8
-; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
-; CHECK-NEXT: vmovl.u16 q8, d16
-; CHECK-NEXT: vmov.32 r0, d18[0]
-; CHECK-NEXT: vmov.32 r1, d18[1]
-; CHECK-NEXT: vmov.32 r2, d16[0]
-; CHECK-NEXT: vmov.32 r3, d16[1]
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, pc}
-;
-; THUMB-LABEL: test_modf_v2f16:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r4, r5, r6, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r5, r1
-; THUMB-NEXT: uxth r0, r0
-; THUMB-NEXT: bl __gnu_h2f_ieee
-; THUMB-NEXT: mov r1, sp
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r4, r0
-; THUMB-NEXT: uxth r0, r5
-; THUMB-NEXT: bl __gnu_h2f_ieee
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r5, r0
-; THUMB-NEXT: ldr r0, [sp]
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r6, r0
-; THUMB-NEXT: ldr r0, [sp, #4]
-; THUMB-NEXT: bl __gnu_f2h_ieee
-; THUMB-NEXT: mov r3, r0
-; THUMB-NEXT: mov r0, r4
-; THUMB-NEXT: mov r1, r5
-; THUMB-NEXT: mov r2, r6
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r4, r5, r6, pc}
- %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
- ret { <2 x half>, <2 x half> } %result
-}
-
-define { float, float } @test_modf_f32(float %a) {
-; CHECK-LABEL: test_modf_f32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: ldr r1, [sp, #4]
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f32:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: ldr r1, [sp, #4]
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { float, float } @llvm.modf.f32(float %a)
- ret { float, float } %result
-}
-
-define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
-; CHECK-LABEL: test_modf_v3f32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vldr d9, [sp, #40]
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: mov r5, r3
-; CHECK-NEXT: vmov d8, r2, r3
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: vldmia sp, {s0, s1}
-; CHECK-NEXT: add.w r1, r4, #16
-; CHECK-NEXT: vst1.32 {d0}, [r1:64]!
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: vmov s1, r5
-; CHECK-NEXT: vmov s0, r6
-; CHECK-NEXT: vst1.32 {d0}, [r4:64]!
-; CHECK-NEXT: str r0, [r4]
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, r5, r6, pc}
-;
-; THUMB-LABEL: test_modf_v3f32:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r4, r5, r6, r7, lr}
-; THUMB-NEXT: sub sp, #12
-; THUMB-NEXT: mov r7, r3
-; THUMB-NEXT: mov r5, r2
-; THUMB-NEXT: mov r4, r0
-; THUMB-NEXT: ldr r0, [sp, #32]
-; THUMB-NEXT: add r1, sp, #8
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: mov r6, r0
-; THUMB-NEXT: ldr r0, [sp, #8]
-; THUMB-NEXT: str r0, [r4, #24]
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: mov r0, r7
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: mov r7, r0
-; THUMB-NEXT: ldr r0, [sp, #4]
-; THUMB-NEXT: str r0, [r4, #20]
-; THUMB-NEXT: mov r1, sp
-; THUMB-NEXT: mov r0, r5
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: ldr r1, [sp]
-; THUMB-NEXT: str r1, [r4, #16]
-; THUMB-NEXT: stm r4!, {r0, r7}
-; THUMB-NEXT: str r6, [r4]
-; THUMB-NEXT: add sp, #12
-; THUMB-NEXT: pop {r4, r5, r6, r7, pc}
- %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
- ret { <3 x float>, <3 x float> } %result
-}
-
-define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
-; CHECK-LABEL: test_modf_v2f32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vpush {d8}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vmov d8, r0, r1
-; CHECK-NEXT: mov r1, sp
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl modff
-; CHECK-NEXT: vldr s1, [sp]
-; CHECK-NEXT: mov r1, r4
-; CHECK-NEXT: vldr s0, [sp, #4]
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r4, pc}
-;
-; THUMB-LABEL: test_modf_v2f32:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r4, r5, r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r4, r1
-; THUMB-NEXT: mov r1, sp
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: mov r5, r0
-; THUMB-NEXT: add r1, sp, #4
-; THUMB-NEXT: mov r0, r4
-; THUMB-NEXT: bl modff
-; THUMB-NEXT: mov r1, r0
-; THUMB-NEXT: ldr r2, [sp]
-; THUMB-NEXT: ldr r3, [sp, #4]
-; THUMB-NEXT: mov r0, r5
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r4, r5, r7, pc}
- %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
- ret { <2 x float>, <2 x float> } %result
-}
-
-define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
-; CHECK-LABEL: test_modf_v2f64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: mov r1, r3
-; CHECK-NEXT: mov r3, r2
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r3
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: ldrd r12, r3, [sp, #40]
-; CHECK-NEXT: vmov d8, r0, r1
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: mov r1, r3
-; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: vmov d9, r0, r1
-; CHECK-NEXT: vldr d17, [sp]
-; CHECK-NEXT: vldr d16, [sp, #8]
-; CHECK-NEXT: vst1.64 {d8, d9}, [r4]!
-; CHECK-NEXT: vst1.64 {d16, d17}, [r4]
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r4, pc}
-;
-; THUMB-LABEL: test_modf_v2f64:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r4, r5, r6, r7, lr}
-; THUMB-NEXT: sub sp, #28
-; THUMB-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; THUMB-NEXT: mov r7, r2
-; THUMB-NEXT: mov r4, r0
-; THUMB-NEXT: ldr r0, [sp, #48]
-; THUMB-NEXT: ldr r1, [sp, #52]
-; THUMB-NEXT: add r2, sp, #16
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: mov r6, r0
-; THUMB-NEXT: mov r5, r1
-; THUMB-NEXT: ldr r0, [sp, #20]
-; THUMB-NEXT: str r0, [r4, #28]
-; THUMB-NEXT: ldr r0, [sp, #16]
-; THUMB-NEXT: str r0, [r4, #24]
-; THUMB-NEXT: add r2, sp, #8
-; THUMB-NEXT: mov r0, r7
-; THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: ldr r2, [sp, #12]
-; THUMB-NEXT: str r2, [r4, #20]
-; THUMB-NEXT: ldr r2, [sp, #8]
-; THUMB-NEXT: str r2, [r4, #16]
-; THUMB-NEXT: str r5, [r4, #12]
-; THUMB-NEXT: stm r4!, {r0, r1, r6}
-; THUMB-NEXT: add sp, #28
-; THUMB-NEXT: pop {r4, r5, r6, r7, pc}
- %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
- ret { <2 x double>, <2 x double> } %result
-}
-
-define { double, double } @test_modf_f64(double %a) {
-; CHECK-LABEL: test_modf_f64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: ldrd r2, r3, [sp], #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: ldr r2, [sp]
-; THUMB-NEXT: ldr r3, [sp, #4]
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { double, double } @llvm.modf.f64(double %a)
- ret { double, double } %result
-}
-
-define double @test_modf_f64_only_use_intergral(double %a) {
-; CHECK-LABEL: test_modf_f64_only_use_intergral:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: ldrd r0, r1, [sp], #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64_only_use_intergral:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: ldr r0, [sp]
-; THUMB-NEXT: ldr r1, [sp, #4]
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { double, double } @llvm.modf.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 1
- ret double %result.1
-}
-
-define double @test_modf_f64_only_use_fractional(double %a) {
-; CHECK-LABEL: test_modf_f64_only_use_fractional:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64_only_use_fractional:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = call { double, double } @llvm.modf.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 0
- ret double %result.1
-}
-
-define { double, double } @test_modf_f64_tail_call(double %a) {
-; CHECK-LABEL: test_modf_f64_tail_call:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: ldrd r2, r3, [sp], #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64_tail_call:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: ldr r2, [sp]
-; THUMB-NEXT: ldr r3, [sp, #4]
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = tail call { double, double } @llvm.modf.f64(double %a)
- ret { double, double } %result
-}
-
-define double @test_modf_f64_only_use_intergral_tail_call(double %a) {
-; CHECK-LABEL: test_modf_f64_only_use_intergral_tail_call:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: ldrd r0, r1, [sp], #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64_only_use_intergral_tail_call:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: ldr r0, [sp]
-; THUMB-NEXT: ldr r1, [sp, #4]
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = tail call { double, double } @llvm.modf.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 1
- ret double %result.1
-}
-
-define double @test_modf_f64_only_use_fractional_tail_call(double %a) {
-; CHECK-LABEL: test_modf_f64_only_use_fractional_tail_call:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl modf
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
-;
-; THUMB-LABEL: test_modf_f64_only_use_fractional_tail_call:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: push {r7, lr}
-; THUMB-NEXT: sub sp, #8
-; THUMB-NEXT: mov r2, sp
-; THUMB-NEXT: bl modf
-; THUMB-NEXT: add sp, #8
-; THUMB-NEXT: pop {r7, pc}
- %result = tail call { double, double } @llvm.modf.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 0
- ret double %result.1
-}
More information about the llvm-commits
mailing list