[llvm] [X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA (PR #116508)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 16 13:03:54 PST 2024
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/116508
>From a51c0f464f897b72032a8aecda3b79e1b498ccf0 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 16 Nov 2024 23:57:54 +0300
Subject: [PATCH] [X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA
These nodes should appear between CALLSEQ_START / CALLSEQ_END.
Previously, they could be scheduled after CALLSEQ_END because
the nodes didn't update the chain.
The change in a test is due to X86 call frame optimizer pass bailing
out for a particular call when CALLSEQ_START / CALLSEQ_END are not in
the same basic block. This happens because SEG_ALLOCA is expanded into
a sequence of basic blocks early. It didn't bail out before because
the closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same
basic block as CALLSEQ_START.
While here, simplify creation of these nodes: allocating a virtual
register and copying `Size` into it were unnecessary.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++-------------
.../CodeGen/X86/segmented-stacks-dynamic.ll | 16 +++++++--------
2 files changed, 14 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c984c5b6da8735..009a94daf5854f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25270,13 +25270,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Align StackAlign = TFI.getStackAlign();
if (hasInlineStackProbe(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- Register Vreg = MRI.createVirtualRegister(AddrRegClass);
- Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
- DAG.getRegister(Vreg, SPTy));
+ Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, {SPTy, MVT::Other},
+ {Chain, Size});
+ Chain = Result.getValue(1);
} else {
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
@@ -25288,8 +25284,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getSignedConstant(~(Alignment->value() - 1ULL), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
} else if (SplitStack) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
if (Is64Bit) {
// The 64 bit implementation of segmented stacks needs to clobber both r10
// r11. This makes it impossible to use it along with nested parameters.
@@ -25301,11 +25295,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
}
- const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- Register Vreg = MRI.createVirtualRegister(AddrRegClass);
- Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
- DAG.getRegister(Vreg, SPTy));
+ Result =
+ DAG.getNode(X86ISD::SEG_ALLOCA, dl, {SPTy, MVT::Other}, {Chain, Size});
+ Chain = Result.getValue(1);
} else {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);
diff --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
index 4fb061e1bb764e..e8359cb088dc38 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -24,9 +24,9 @@ define i32 @test_basic(i32 %l) #0 {
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl %esp, %eax
; X86-NEXT: leal 15(,%esi,4), %ecx
; X86-NEXT: andl $-16, %ecx
-; X86-NEXT: movl %esp, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmpl %eax, %gs:48
; X86-NEXT: jg .LBB0_4
@@ -39,17 +39,17 @@ define i32 @test_basic(i32 %l) #0 {
; X86-NEXT: calll __morestack_allocate_stack_space
; X86-NEXT: addl $16, %esp
; X86-NEXT: .LBB0_5:
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll dummy_use at PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %esi, %esi
; X86-NEXT: je .LBB0_6
; X86-NEXT: # %bb.8: # %false
; X86-NEXT: decl %esi
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: pushl %esi
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl %esi, (%esp)
; X86-NEXT: calll test_basic at PLT
; X86-NEXT: jmp .LBB0_7
; X86-NEXT: .LBB0_6: # %true
@@ -83,10 +83,10 @@ define i32 @test_basic(i32 %l) #0 {
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_offset %rbx, -24
; X64-NEXT: movl %edi, %ebx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: movl %ebx, %eax
; X64-NEXT: leaq 15(,%rax,4), %rax
; X64-NEXT: andq $-16, %rax
-; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: subq %rax, %rdi
; X64-NEXT: cmpq %rdi, %fs:112
; X64-NEXT: jg .LBB0_4
More information about the llvm-commits
mailing list