[llvm] cb46ffd - [X86] Use BuildStackAdjustment in stack probes

Josh Stone via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 23 09:34:31 PDT 2022


Author: Josh Stone
Date: 2022-09-23T09:30:32-07:00
New Revision: cb46ffdbf44c4c9097531091dca92acf54844150

URL: https://github.com/llvm/llvm-project/commit/cb46ffdbf44c4c9097531091dca92acf54844150
DIFF: https://github.com/llvm/llvm-project/commit/cb46ffdbf44c4c9097531091dca92acf54844150.diff

LOG: [X86] Use BuildStackAdjustment in stack probes

This has the advantage of dealing with live EFLAGS, using LEA instead of
SUB if needed to avoid clobbering. That also respects feature "lea-sp".

We could allow unrolled stack probing from blocks with live-EFLAGS, if
canUseAsEpilogue learns when emitStackProbeInlineGeneric will be used.

Differential Revision: https://reviews.llvm.org/D134495

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/test/CodeGen/X86/probe-stack-eflags.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index fb78f9612e76..b6fa3d5d4e54 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -660,7 +660,6 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
   const bool HasFP = hasFP(MF);
   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   const X86TargetLowering &TLI = *STI.getTargetLowering();
-  const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
   const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
   const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
 
@@ -671,16 +670,14 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
   // If the offset is so small it fits within a page, there's nothing to do.
   if (StackProbeSize < Offset + AlignOffset) {
 
-    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-                           .addReg(StackPtr)
-                           .addImm(StackProbeSize - AlignOffset)
-                           .setMIFlag(MachineInstr::FrameSetup);
+    uint64_t StackAdjustment = StackProbeSize - AlignOffset;
+    BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
+        .setMIFlag(MachineInstr::FrameSetup);
     if (!HasFP && NeedsDwarfCFI) {
-      BuildCFI(MBB, MBBI, DL,
-               MCCFIInstruction::createAdjustCfaOffset(
-                   nullptr, StackProbeSize - AlignOffset));
+      BuildCFI(
+          MBB, MBBI, DL,
+          MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
     }
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
 
     addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
                      .setMIFlag(MachineInstr::FrameSetup),
@@ -695,11 +692,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
   // natural probes but it implies much more logic and there was very few
   // interesting natural probes to interleave.
   while (CurrentOffset + StackProbeSize < Offset) {
-    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-                           .addReg(StackPtr)
-                           .addImm(StackProbeSize)
-                           .setMIFlag(MachineInstr::FrameSetup);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
+        .setMIFlag(MachineInstr::FrameSetup);
 
     if (!HasFP && NeedsDwarfCFI) {
       BuildCFI(
@@ -717,13 +711,20 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
 
   // No need to probe the tail, it is smaller than a Page.
   uint64_t ChunkSize = Offset - CurrentOffset;
-  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-                         .addReg(StackPtr)
-                         .addImm(ChunkSize)
-                         .setMIFlag(MachineInstr::FrameSetup);
+  if (ChunkSize == SlotSize) {
+    // Use push for slot sized adjustments as a size optimization,
+    // like emitSPUpdate does when not probing.
+    unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
+    unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
+    BuildMI(MBB, MBBI, DL, TII.get(Opc))
+        .addReg(Reg, RegState::Undef)
+        .setMIFlag(MachineInstr::FrameSetup);
+  } else {
+    BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
   // No need to adjust Dwarf CFA offset here, the last position of the stack has
   // been defined
-  MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
 }
 
 void X86FrameLowering::emitStackProbeInlineGenericLoop(
@@ -732,6 +733,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
     uint64_t AlignOffset) const {
   assert(Offset && "null offset");
 
+  assert(!MBB.isLiveIn(X86::EFLAGS) &&
+         "Inline stack probe loop will clobber live EFLAGS.");
+
   const bool NeedsDwarfCFI = needsDwarfCFI(MF);
   const bool HasFP = hasFP(MF);
   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
@@ -742,12 +746,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
   if (AlignOffset) {
     if (AlignOffset < StackProbeSize) {
       // Perform a first smaller allocation followed by a probe.
-      const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
-      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
-                             .addReg(StackPtr)
-                             .addImm(AlignOffset)
-                             .setMIFlag(MachineInstr::FrameSetup);
-      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+      BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
+          .setMIFlag(MachineInstr::FrameSetup);
 
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
                        .setMIFlag(MachineInstr::FrameSetup),
@@ -806,13 +806,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
   }
 
   // allocate a page
-  {
-    const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
-    BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
-        .addReg(StackPtr)
-        .addImm(StackProbeSize)
-        .setMIFlag(MachineInstr::FrameSetup);
-  }
+  BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
+                       /*InEpilogue=*/false)
+      .setMIFlag(MachineInstr::FrameSetup);
 
   // touch the page
   addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
@@ -841,13 +837,11 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
   MBB.addSuccessor(testMBB);
 
   // handle tail
-  const unsigned TailOffset = Offset % StackProbeSize;
+  const uint64_t TailOffset = Offset % StackProbeSize;
   MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
   if (TailOffset) {
-    const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
-    BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
-        .addReg(StackPtr)
-        .addImm(TailOffset)
+    BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
+                         /*InEpilogue=*/false)
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
@@ -879,6 +873,9 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
   const TargetInstrInfo &TII = *STI.getInstrInfo();
   const BasicBlock *LLVM_BB = MBB.getBasicBlock();
 
+  assert(!MBB.isLiveIn(X86::EFLAGS) &&
+         "Inline stack probe loop will clobber live EFLAGS.");
+
   // RAX contains the number of bytes of desired stack adjustment.
   // The handling here assumes this value has already been updated so as to
   // maintain stack alignment.
@@ -1115,6 +1112,9 @@ void X86FrameLowering::emitStackProbeCall(
     report_fatal_error("Emitting stack probe calls on 64-bit with the large "
                        "code model and indirect thunks not yet implemented.");
 
+  assert(!MBB.isLiveIn(X86::EFLAGS) &&
+         "Stack probe calls will clobber live EFLAGS.");
+
   unsigned CallOp;
   if (Is64Bit)
     CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
@@ -3510,6 +3510,7 @@ bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
     return true;
 
   // If stack probes have to loop inline or call, that will clobber EFLAGS.
+  // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   const X86TargetLowering &TLI = *STI.getTargetLowering();
   if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))

diff  --git a/llvm/test/CodeGen/X86/probe-stack-eflags.ll b/llvm/test/CodeGen/X86/probe-stack-eflags.ll
index c311c36eb9a4..28432ef1e3b9 100644
--- a/llvm/test/CodeGen/X86/probe-stack-eflags.ll
+++ b/llvm/test/CodeGen/X86/probe-stack-eflags.ll
@@ -13,7 +13,7 @@ define i32 @f(i32 %a, i32 %b) #0 {
 ; CHECK:       # %bb.0: # %bb13.i
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    subq $8, %rsp
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    testl %edi, %edi
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %bb16.i


        


More information about the llvm-commits mailing list