[llvm] r261349 - [AArch64][ShrinkWrap] Fix bug in prolog clobbering live reg when shrink wrapping.

Geoff Berry via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 19 10:27:33 PST 2016


Author: gberry
Date: Fri Feb 19 12:27:32 2016
New Revision: 261349

URL: http://llvm.org/viewvc/llvm-project?rev=261349&view=rev
Log:
[AArch64][ShrinkWrap] Fix bug in prolog clobbering live reg when shrink wrapping.

Summary: See bug https://llvm.org/bugs/show_bug.cgi?id=26642

Reviewers: qcolombet, t.p.northover

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D17350

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=261349&r1=261348&r2=261349&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Fri Feb 19 12:27:32 2016
@@ -250,6 +250,63 @@ void AArch64FrameLowering::emitCalleeSav
   }
 }
 
+// Find a scratch register that we can use at the start of the prologue to
+// re-align the stack pointer.  We avoid using callee-save registers since they
+// may appear to be free when this is called from canUseAsPrologue (during
+// shrink wrapping), but then no longer be free when this is called from
+// emitPrologue.
+//
+// FIXME: This is a bit conservative, since in the above case we could use one
+// of the callee-save registers as a scratch temp to re-align the stack pointer,
+// but we would then have to make sure that we were in fact saving at least one
+// callee-save register in the prologue, which is additional complexity that
+// doesn't seem worth the benefit.
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+  MachineFunction *MF = MBB->getParent();
+
+  // If MBB is an entry block, use X9 as the scratch register
+  if (&MF->front() == MBB)
+    return AArch64::X9;
+
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);
+
+  // Prefer X9 since it was historically used for the prologue scratch reg.
+  if (!RS.isRegUsed(AArch64::X9))
+    return AArch64::X9;
+
+  // Find a free non callee-save reg.
+  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
+  BitVector CalleeSaveRegs(RegInfo->getNumRegs());
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    CalleeSaveRegs.set(CSRegs[i]);
+
+  BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass);
+  for (int AvailReg = Available.find_first(); AvailReg != -1;
+       AvailReg = Available.find_next(AvailReg))
+    if (!CalleeSaveRegs.test(AvailReg))
+      return AvailReg;
+
+  return AArch64::NoRegister;
+}
+
+bool AArch64FrameLowering::canUseAsPrologue(
+    const MachineBasicBlock &MBB) const {
+  const MachineFunction *MF = MBB.getParent();
+  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+  // Don't need a scratch register if we're not going to re-align the stack.
+  if (!RegInfo->needsStackRealignment(*MF))
+    return true;
+  // Otherwise, we can use any block as long as it has a scratch register
+  // available.
+  return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
+}
+
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -331,8 +388,8 @@ void AArch64FrameLowering::emitPrologue(
   const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
   unsigned scratchSPReg = AArch64::SP;
   if (NumBytes && NeedsRealignment) {
-    // Use the first callee-saved register as a scratch register.
-    scratchSPReg = AArch64::X9;
+    scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
+    assert(scratchSPReg != AArch64::NoRegister);
   }
 
   // If we're a leaf function, try using the red zone.
@@ -926,19 +983,14 @@ void AArch64FrameLowering::determineCall
   if (RegInfo->hasBasePointer(MF))
     BasePointerReg = RegInfo->getBaseRegister();
 
-  unsigned StackAlignReg = AArch64::NoRegister;
-  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
-    StackAlignReg = AArch64::X9;
-
   bool ExtraCSSpill = false;
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
 
-    // Add the stack re-align scratch register and base pointer register to
-    // SavedRegs set only if they are callee-save.
-    if (Reg == BasePointerReg || Reg == StackAlignReg)
+    // Add the base pointer register to SavedRegs if it is callee-save.
+    if (Reg == BasePointerReg)
       SavedRegs.set(Reg);
 
     bool RegUsed = SavedRegs.test(Reg);

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=261349&r1=261348&r2=261349&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h Fri Feb 19 12:27:32 2016
@@ -37,6 +37,8 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
+  bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
   int resolveFrameIndexReference(const MachineFunction &MF, int FI,

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll?rev=261349&r1=261348&r2=261349&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll Fri Feb 19 12:27:32 2016
@@ -630,3 +630,92 @@ loop2b:
 end:
   ret void
 }
+
+; Re-aligned stack pointer.  See bug 26642.  Avoid clobbering live
+; values in the prologue when re-aligning the stack pointer.
+; CHECK-LABEL: stack_realign:
+; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
+; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
+; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
+; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; ENABLE-NOT: sub x[[LSL1]], sp, #16
+; ENABLE-NOT: sub x[[LSL2]], sp, #16
+; DISABLE: sub x{{[0-9]+}}, sp, #16
+; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
+; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
+; CHECK-DAG: str w[[LSL1]],
+; CHECK-DAG: str w[[LSL2]],
+
+define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) {
+  %tmp = alloca i32, align 32
+  %shl1 = shl i32 %a, %b
+  %shl2 = shl i32 %b, %a
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = load i32, i32* %tmp
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  store i32 %shl1, i32* %ptr1
+  store i32 %shl2, i32* %ptr2
+  ret i32 %tmp.0
+}
+
+; Re-aligned stack pointer with all caller-save regs live.  See bug
+; 26642.  In this case we currently avoid shrink wrapping because
+; ensuring we have a scratch register to re-align the stack pointer is
+; too complicated.  Output should be the same for both enabled and
+; disabled shrink wrapping.
+; CHECK-LABEL: stack_realign2:
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]!
+; CHECK: add x29, sp, #{{[0-9]+}}
+; CHECK: lsl {{w[0-9]+}}, w0, w1
+
+define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) {
+  %tmp = alloca i32, align 32
+  %tmp1 = shl i32 %a, %b
+  %tmp2 = shl i32 %b, %a
+  %tmp3 = lshr i32 %a, %b
+  %tmp4 = lshr i32 %b, %a
+  %tmp5 = add i32 %b, %a
+  %tmp6 = sub i32 %b, %a
+  %tmp7 = add i32 %tmp1, %tmp2
+  %tmp8 = sub i32 %tmp2, %tmp3
+  %tmp9 = add i32 %tmp3, %tmp4
+  %tmp10 = add i32 %tmp4, %tmp5
+  %cmp = icmp slt i32 %a, %b
+  br i1 %cmp, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
+  br label %false
+
+false:
+  store i32 %tmp1, i32* %ptr1, align 4
+  store i32 %tmp2, i32* %ptr2, align 4
+  store i32 %tmp3, i32* %ptr3, align 4
+  store i32 %tmp4, i32* %ptr4, align 4
+  store i32 %tmp5, i32* %ptr5, align 4
+  store i32 %tmp6, i32* %ptr6, align 4
+  %idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1
+  store i32 %a, i32* %idx1, align 4
+  %idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2
+  store i32 %b, i32* %idx2, align 4
+  %idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3
+  store i32 %tmp7, i32* %idx3, align 4
+  %idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4
+  store i32 %tmp8, i32* %idx4, align 4
+  %idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5
+  store i32 %tmp9, i32* %idx5, align 4
+  %idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6
+  store i32 %tmp10, i32* %idx6, align 4
+
+  ret void
+}




More information about the llvm-commits mailing list