[llvm] [SDAG] Limit sincos/frexp stack slot folding to stores chained to entry (PR #115906)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 12 09:06:47 PST 2024
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/115906
When the chain is not the entry node there is a risk the stores are within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded will lead to nested call sequences.
It should be possible to check for this and allow more cases, but for now, let's limit this to cases where it's definitely safe.
Fixes #115323
>From fa3dfa7dd80bdd984c3be4b65871958e026dc6c8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 12 Nov 2024 16:31:29 +0000
Subject: [PATCH] [SDAG] Limit sincos/frexp stack slot folding to stores
chained to entry
When the chain is not the entry node there is a risk the stores are
within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded
will lead to nested call sequences.
It should be possible to check for this and allow more cases, but for
now, let's limit this to cases where it's definitely safe.
Fixes #115323
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +-
llvm/test/CodeGen/PowerPC/f128-arith.ll | 32 ++-
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 192 ++++++++++--------
llvm/test/CodeGen/X86/llvm.frexp.ll | 45 ++--
llvm/test/CodeGen/X86/sincos-stack-args.ll | 35 ++++
5 files changed, 208 insertions(+), 107 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/sincos-stack-args.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 901e63c47fac17..3a8ec3c6105bc0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2509,7 +2509,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// Find users of the node that store the results (and share input chains). The
// destination pointers can be used instead of creating stack allocations.
- SDValue StoresInChain{};
+ // FIXME: This should allow stores with the same chains (not just the entry
+ // chain), but there's a risk the store is within a (CALLSEQ_START,
+ // CALLSEQ_END) pair, which after this expansion will lead to nested call
+ // sequences.
+ SDValue InChain = getEntryNode();
SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
for (SDNode *User : Node->uses()) {
if (!ISD::isNormalStore(User))
@@ -2522,11 +2526,9 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
ST->getAddressSpace() != 0 ||
ST->getAlign() <
getDataLayout().getABITypeAlign(StoreType->getScalarType()) ||
- (StoresInChain && ST->getChain() != StoresInChain) ||
- Node->isPredecessorOf(ST->getChain().getNode()))
+ ST->getChain() != InChain)
continue;
ResultStores[ResNo] = ST;
- StoresInChain = ST->getChain();
}
TargetLowering::ArgListTy Args;
@@ -2568,7 +2570,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
Type *RetType = CallRetResNo.has_value()
? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
: Type::getVoidTy(Ctx);
- SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
TLI->getPointerTy(getDataLayout()));
TargetLowering::CallLoweringInfo CLI(*this);
diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index decc4a38f7ccd4..35e5d61947ead7 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -1365,33 +1365,45 @@ define dso_local fp128 @qpFREXP(ptr %a, ptr %b) {
; CHECK-LABEL: qpFREXP:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -32(r1)
-; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: addi r5, r1, 44
+; CHECK-NEXT: mr r30, r4
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: mr r5, r4
; CHECK-NEXT: bl frexpf128
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r1, r1, 32
+; CHECK-NEXT: lwz r3, 44(r1)
+; CHECK-NEXT: stw r3, 0(r30)
+; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFREXP:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: stdu r1, -32(r1)
-; CHECK-P8-NEXT: std r0, 48(r1)
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
+; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
+; CHECK-P8-NEXT: .cfi_offset r30, -16
+; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT: stdu r1, -64(r1)
+; CHECK-P8-NEXT: std r0, 80(r1)
+; CHECK-P8-NEXT: addi r5, r1, 44
+; CHECK-P8-NEXT: mr r30, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT: mr r5, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl frexpf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: addi r1, r1, 32
+; CHECK-P8-NEXT: lwz r3, 44(r1)
+; CHECK-P8-NEXT: stw r3, 0(r30)
+; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
+; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 3f615d23d3eaf6..e85a7118f5ff83 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -543,42 +543,50 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -48
-; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs3, 8(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -64
+; RV32IFD-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa3
; RV32IFD-NEXT: fmv.s fs1, fa2
; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: addi a0, a0, 16
+; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs3, fa0
-; RV32IFD-NEXT: addi a0, s0, 20
+; RV32IFD-NEXT: addi a0, sp, 12
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs2, fa0
-; RV32IFD-NEXT: addi a0, s0, 24
+; RV32IFD-NEXT: addi a0, sp, 16
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs1, fa0
-; RV32IFD-NEXT: addi a0, s0, 28
+; RV32IFD-NEXT: addi a0, sp, 20
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: lw a0, 8(sp)
+; RV32IFD-NEXT: lw a1, 12(sp)
+; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a3, 20(sp)
+; RV32IFD-NEXT: sw a0, 16(s0)
+; RV32IFD-NEXT: sw a1, 20(s0)
+; RV32IFD-NEXT: sw a2, 24(s0)
+; RV32IFD-NEXT: sw a3, 28(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
; RV32IFD-NEXT: fsw fs2, 4(s0)
; RV32IFD-NEXT: fsw fs1, 8(s0)
; RV32IFD-NEXT: fsw fa0, 12(s0)
-; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs3, 8(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 48
+; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 64
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32:
@@ -631,44 +639,52 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
-; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
+; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s0, a4
; RV32IZFINXZDINX-NEXT: mv s1, a3
; RV32IZFINXZDINX-NEXT: mv s2, a2
; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: addi a1, a0, 16
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s4, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 20
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s2, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 24
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s1, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 28
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 20(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 16(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 20(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 24(s3)
+; RV32IZFINXZDINX-NEXT: sw a4, 28(s3)
; RV32IZFINXZDINX-NEXT: sw s4, 0(s3)
; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
@@ -1080,34 +1096,41 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -32
-; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fmv.s fs0, fa2
-; RV32IFD-NEXT: fmv.s fs1, fa1
-; RV32IFD-NEXT: fmv.s fs2, fa0
+; RV32IFD-NEXT: addi sp, sp, -48
+; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fmv.s fs0, fa3
+; RV32IFD-NEXT: fmv.s fs1, fa2
+; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: addi a0, a0, 12
-; RV32IFD-NEXT: fmv.s fa0, fa3
+; RV32IFD-NEXT: mv a0, sp
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 8
-; RV32IFD-NEXT: fmv.s fa0, fs0
+; RV32IFD-NEXT: addi a0, sp, 4
+; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 4
+; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: fmv.s fa0, fs2
-; RV32IFD-NEXT: mv a0, s0
+; RV32IFD-NEXT: addi a0, sp, 12
+; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 32
+; RV32IFD-NEXT: lw a0, 0(sp)
+; RV32IFD-NEXT: lw a1, 4(sp)
+; RV32IFD-NEXT: lw a2, 8(sp)
+; RV32IFD-NEXT: lw a3, 12(sp)
+; RV32IFD-NEXT: sw a0, 0(s0)
+; RV32IFD-NEXT: sw a1, 4(s0)
+; RV32IFD-NEXT: sw a2, 8(s0)
+; RV32IFD-NEXT: sw a3, 12(s0)
+; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 48
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
@@ -1151,34 +1174,43 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
-; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: mv s0, a3
-; RV32IZFINXZDINX-NEXT: mv s1, a2
-; RV32IZFINXZDINX-NEXT: mv s2, a1
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
+; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: mv s0, a4
+; RV32IZFINXZDINX-NEXT: mv s1, a3
+; RV32IZFINXZDINX-NEXT: mv s2, a2
+; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: addi a1, a0, 12
-; RV32IZFINXZDINX-NEXT: mv a0, a4
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
+; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
-; RV32IZFINXZDINX-NEXT: mv a0, s0
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
+; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: mv a0, s2
-; RV32IZFINXZDINX-NEXT: mv a1, s3
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 24
+; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: lw a0, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 24(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(s3)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 8(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 12(s3)
+; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index 96de34519556d0..cd560ad627de4c 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -325,27 +325,28 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32:
; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $44, %esp
+; WIN32-NEXT: subl $60, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: leal 24(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
-; WIN32-NEXT: leal 20(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
-; WIN32-NEXT: leal 16(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
-; WIN32-NEXT: leal 28(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
@@ -360,13 +361,22 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl %edi, 28(%esi)
+; WIN32-NEXT: movl %edx, 24(%esi)
+; WIN32-NEXT: movl %ecx, 20(%esi)
+; WIN32-NEXT: movl %eax, 16(%esi)
; WIN32-NEXT: fstps 12(%esi)
; WIN32-NEXT: fstps 8(%esi)
; WIN32-NEXT: fstps 4(%esi)
; WIN32-NEXT: fstps (%esi)
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: addl $44, %esp
+; WIN32-NEXT: addl $60, %esp
; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
ret { <4 x float>, <4 x i32> } %result
@@ -489,35 +499,46 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $12, %esp
+; WIN32-NEXT: subl $28, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: leal 8(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 4(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 12(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl %edi, 12(%esi)
+; WIN32-NEXT: movl %edx, 8(%esi)
+; WIN32-NEXT: movl %ecx, 4(%esi)
+; WIN32-NEXT: movl %eax, (%esi)
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: addl $12, %esp
+; WIN32-NEXT: addl $28, %esp
; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1
diff --git a/llvm/test/CodeGen/X86/sincos-stack-args.ll b/llvm/test/CodeGen/X86/sincos-stack-args.ll
new file mode 100644
index 00000000000000..9fb3a6769fda11
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sincos-stack-args.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
+; Test for issue https://github.com/llvm/llvm-project/issues/115323
+
+declare double @g(double, double)
+
+define double @f(double %a) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subl $44, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: fldl 48(%esp)
+; CHECK-NEXT: leal 24(%esp), %eax
+; CHECK-NEXT: movl %eax, 12(%esp)
+; CHECK-NEXT: leal 32(%esp), %eax
+; CHECK-NEXT: movl %eax, 8(%esp)
+; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: calll sincos
+; CHECK-NEXT: fldl 32(%esp)
+; CHECK-NEXT: fldl 24(%esp)
+; CHECK-NEXT: faddl {{\.?LCPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fstpl 8(%esp)
+; CHECK-NEXT: fstpl (%esp)
+; CHECK-NEXT: calll g at PLT
+; CHECK-NEXT: addl $44, %esp
+; CHECK-NEXT: .cfi_def_cfa_offset 4
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call double @llvm.sin.f64(double %a)
+ %1 = tail call double @llvm.cos.f64(double %a)
+ %add = fadd double %1, 3.140000e+00
+ %call = tail call double @g(double %add, double %0)
+ ret double %call
+}
More information about the llvm-commits
mailing list