[llvm] 6f53ae6 - [X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA (#116508)

Tue Nov 19 02:30:01 PST 2024

Author: Sergei Barannikov
Date: 2024-11-19T13:29:58+03:00
New Revision: 6f53ae6e613f5f7011c1752b94038114f4e1cc94

URL: https://github.com/llvm/llvm-project/commit/6f53ae6e613f5f7011c1752b94038114f4e1cc94
DIFF: https://github.com/llvm/llvm-project/commit/6f53ae6e613f5f7011c1752b94038114f4e1cc94.diff

LOG: [X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA (#116508)

These nodes should appear between CALLSEQ_START / CALLSEQ_END.
Previously, they could be scheduled after CALLSEQ_END because the nodes
didn't update the chain.

The change in a test is due to X86 call frame optimizer pass bailing out
for a particular call when CALLSEQ_START / CALLSEQ_END are not in the
same basic block. This happens because SEG_ALLOCA is expanded into a
sequence of basic blocks early. It didn't bail out before because the
closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same basic
block as CALLSEQ_START.

While here, simplify creation of these nodes: allocating a virtual
register and copying `Size` into it were unnecessary.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c984c5b6da8735..009a94daf5854f 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25270,13 +25270,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
     const Align StackAlign = TFI.getStackAlign();
     if (hasInlineStackProbe(MF)) {
-      MachineRegisterInfo &MRI = MF.getRegInfo();
-
-      const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
-      Register Vreg = MRI.createVirtualRegister(AddrRegClass);
-      Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
-      Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
-                           DAG.getRegister(Vreg, SPTy));
+      Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, {SPTy, MVT::Other},
+                           {Chain, Size});
+      Chain = Result.getValue(1);
     } else {
       SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
       Chain = SP.getValue(1);
@@ -25288,8 +25284,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
           DAG.getSignedConstant(~(Alignment->value() - 1ULL), dl, VT));
     Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
   } else if (SplitStack) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-
     if (Is64Bit) {
       // The 64 bit implementation of segmented stacks needs to clobber both r10
       // r11. This makes it impossible to use it along with nested parameters.
@@ -25301,11 +25295,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
       }
     }
 
-    const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
-    Register Vreg = MRI.createVirtualRegister(AddrRegClass);
-    Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
-    Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
-                                DAG.getRegister(Vreg, SPTy));
+    Result =
+        DAG.getNode(X86ISD::SEG_ALLOCA, dl, {SPTy, MVT::Other}, {Chain, Size});
+    Chain = Result.getValue(1);
   } else {
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
     Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);

diff  --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
index 4fb061e1bb764e..e8359cb088dc38 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -24,9 +24,9 @@ define i32 @test_basic(i32 %l) #0 {
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_offset %esi, -12
 ; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
 ; X86-NEXT:    leal 15(,%esi,4), %ecx
 ; X86-NEXT:    andl $-16, %ecx
-; X86-NEXT:    movl %esp, %eax
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    cmpl %eax, %gs:48
 ; X86-NEXT:    jg .LBB0_4
@@ -39,17 +39,17 @@ define i32 @test_basic(i32 %l) #0 {
 ; X86-NEXT:    calll __morestack_allocate_stack_space
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:  .LBB0_5:
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    pushl %eax
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, (%esp)
 ; X86-NEXT:    calll dummy_use at PLT
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    testl %esi, %esi
 ; X86-NEXT:    je .LBB0_6
 ; X86-NEXT:  # %bb.8: # %false
 ; X86-NEXT:    decl %esi
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl %esi, (%esp)
 ; X86-NEXT:    calll test_basic at PLT
 ; X86-NEXT:    jmp .LBB0_7
 ; X86-NEXT:  .LBB0_6: # %true
@@ -83,10 +83,10 @@ define i32 @test_basic(i32 %l) #0 {
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    .cfi_offset %rbx, -24
 ; X64-NEXT:    movl %edi, %ebx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movq %rsp, %rdi
+; X64-NEXT:    movl %ebx, %eax
 ; X64-NEXT:    leaq 15(,%rax,4), %rax
 ; X64-NEXT:    andq $-16, %rax
-; X64-NEXT:    movq %rsp, %rdi
 ; X64-NEXT:    subq %rax, %rdi
 ; X64-NEXT:    cmpq %rdi, %fs:112
 ; X64-NEXT:    jg .LBB0_4