[llvm] r322124 - [PowerPC] Manually schedule the prologue and epilogue

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 9 13:57:49 PST 2018


Author: stefanp
Date: Tue Jan  9 13:57:49 2018
New Revision: 322124

URL: http://llvm.org/viewvc/llvm-project?rev=322124&view=rev
Log:
[PowerPC] Manually schedule the prologue and epilogue

This patch makes the following changes to the schedule of instructions in the
prologue and epilogue.

The stack pointer update is moved down in the prologue so that the callee saves
do not have to wait for the update to happen.
Saving the lr is moved down in the prologue to hide the latency of the mflr.
The stack pointer is moved up in the epilogue so that restoring of the lr can
happen sooner.
The mtlr is moved up in the epilogue so that it is away form the blr at the end
of the epilogue. The latency of the mtlr can now be hidden by the loads of the
callee saved registers.

This commit is almost identical to this one: r322036 except that two warnings
that broke build bots have been fixed.

The revision number is D41737 as before.

Modified:
    llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
    llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
    llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll
    llvm/trunk/test/CodeGen/PowerPC/vsxD-Form-spills.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp Tue Jan  9 13:57:49 2018
@@ -823,6 +823,39 @@ void PPCFrameLowering::emitPrologue(Mach
   assert((isPPC64 || !MustSaveCR) &&
          "Prologue CR saving supported only in 64-bit mode");
 
+  // Check if we can move the stack update instruction (stdu) down the prologue
+  //  past the callee saves. Hopefully this will avoid the situation where the
+  //  saves are waiting for the update on the store with update to complete.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  bool MovingStackUpdateDown = false;
+  // This optimization has a number of guards. At this point we are being very
+  //  cautious and we do not try to do this when we have a fast call or
+  //  we are using PIC base or we are using a frame pointer or a base pointer.
+  //  It would be possible to turn on this optimization under these conditions
+  //  as well but it would require further modifications to the prologue and
+  //  epilogue. For example, if we want to turn on this optimization for
+  //  functions that use frame pointers we would have to take into consideration
+  //  the fact that spills to the stack may be using r30 instead of r1.
+  // Aside form that we need to have a non-zero frame and we need to have a
+  //  non-large frame size. Notice that we did not use !isLargeFrame but we used
+  //  isInt<16>(FrameSize) instead. This is important because this guard has to
+  //  be identical to the one in the epilogue and in the epilogue the variable
+  //  is defined as bool isLargeFrame = !isInt<16>(FrameSize);
+  if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+      !HasBP && isInt<16>(FrameSize)) {
+    const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+    for (unsigned i=0; i<Info.size(); i++) {
+      int FrIdx = Info[i].getFrameIdx();
+      if (FrIdx < 0) {
+        if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+          MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+          StackUpdateLoc++;
+          MovingStackUpdateDown = true;
+        }
+      }
+    }
+  }
+
   // If we need to spill the CR and the LR but we don't have two separate
   // registers available, we must spill them one at a time
   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -886,7 +919,7 @@ void PPCFrameLowering::emitPrologue(Mach
   }
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, StoreInst)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
       .addReg(ScratchReg, getKillRegState(true))
       .addImm(LROffset)
       .addReg(SPReg);
@@ -954,7 +987,7 @@ void PPCFrameLowering::emitPrologue(Mach
     HasSTUX = true;
 
   } else if (!isLargeFrame) {
-    BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
       .addReg(SPReg)
       .addImm(NegFrameSize)
       .addReg(SPReg);
@@ -1194,6 +1227,12 @@ void PPCFrameLowering::emitPrologue(Mach
       }
 
       int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+      // We have changed the object offset above but we do not want to change
+      //  the actual offsets in the CFI instruction so we have to undo the
+      //  offset change here.
+      if (MovingStackUpdateDown)
+        Offset -= NegFrameSize;
+
       unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
           nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1339,6 +1378,23 @@ void PPCFrameLowering::emitEpilogue(Mach
   unsigned RBReg = SPReg;
   unsigned SPAdd = 0;
 
+  // Check if we can move the stack update instruction up the epilogue
+  //  past the callee saves. This will allow the move to LR instruction
+  //  to be executed before the restores of the callee saves which means
+  //  that the callee saves can hide the latency from the MTLR instrcution.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+      !HasBP && !isLargeFrame) {
+    const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo();
+    for (unsigned i=0; i<Info.size(); i++) {
+      int FrIdx = Info[i].getFrameIdx();
+      if (FrIdx < 0) {
+        if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+          StackUpdateLoc--;
+      }
+    }
+  }
+
   if (FrameSize) {
     // In the prologue, the loaded (or persistent) stack pointer value is
     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1368,7 +1424,7 @@ void PPCFrameLowering::emitEpilogue(Mach
       }
     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
       if (HasRedZone) {
-        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+        BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
           .addReg(SPReg)
           .addImm(FrameSize);
       } else {
@@ -1392,7 +1448,7 @@ void PPCFrameLowering::emitEpilogue(Mach
             .addReg(FPReg);
         RBReg = FPReg;
       }
-      BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+      BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
         .addImm(0)
         .addReg(SPReg);
     }
@@ -1425,7 +1481,7 @@ void PPCFrameLowering::emitEpilogue(Mach
   // a base register anyway, because it may happen to be R0.
   bool LoadedLR = false;
   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
-    BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+    BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
       .addImm(LROffset+SPAdd)
       .addReg(RBReg);
     LoadedLR = true;
@@ -1497,7 +1553,7 @@ void PPCFrameLowering::emitEpilogue(Mach
         .addReg(TempReg, getKillRegState(i == e-1));
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+    BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization

Modified: llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll Tue Jan  9 13:57:49 2018
@@ -15,12 +15,12 @@
 define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
 ; CHECK-LABEL: _ZN2CC3funEv:
 ; CHECK:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
-; CHECK-NEXT:    stdu 1, -48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
-; CHECK-NEXT:    std 30, 32(1)
+; CHECK-NEXT:    std 30, -16(1)
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -48(1)
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    ld 12, 0(30)
 ; CHECK-NEXT:    std 2, 24(1)
@@ -38,11 +38,11 @@ define noalias i8* @_ZN2CC3funEv(%class.
 ; CHECK-NEXT:    mr 3, 30
 ; CHECK-NEXT:    bl _ZN2CC3barEPi
 ; CHECK-NEXT:    nop
-; CHECK:    ld 30, 32(1)
-; CHECK-NEXT:    li 3, 0
+; CHECK:    li 3, 0
 ; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
+; CHECK:    ld 30, -16(1)
 ; CHECK-NEXT:    blr
 entry:
   %foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0

Modified: llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll Tue Jan  9 13:57:49 2018
@@ -16,12 +16,12 @@ entry:
 ; stfd 14, 416(1)
 
 ; After the fix by patch D34337:
+; CHECK-LE:std 15, -280(1)
+; CHECK-LE:stfd 14, -144(1)
 ; CHECK-LE: stdu 1, -528(1)
-; CHECK-LE:std 15, 248(1)
-; CHECK-LE:stfd 14, 384(1)
+; CHECK-BE:std 15, -280(1)
+; CHECK-BE:stfd 14, -144(1)
 ; CHECK-BE: stdu 1, -544(1)
-; CHECK-BE:std 15, 264(1)
-; CHECK-BE:stfd 14, 400(1)
 }
 
 define signext i32 @foo() {

Modified: llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll Tue Jan  9 13:57:49 2018
@@ -110,7 +110,7 @@ declare i32 @doSomething(i32, i32*)
 ;
 ; Epilogue code.
 ; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
 ;
 ; ENABLE: .[[ELSE_LABEL]]: # %if.else
 ; Shift second argument by one and store into returned register.
@@ -171,7 +171,7 @@ declare i32 @something(...)
 ; Next BB
 ; CHECK: %for.end
 ; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
 entry:
   br label %for.preheader
@@ -209,9 +209,9 @@ for.end:
 ; Make sure we save the link register 
 ; CHECK: mflr {{[0-9]+}}
 ;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
 ; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Loop preheader
@@ -240,7 +240,7 @@ for.end:
 ; DISABLE: .[[EPILOG_BB]]: # %if.end
 ; Epilog code
 ; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
 ; 
 ; ENABLE: .[[ELSE_LABEL]]: # %if.else
 ; Shift second argument by one and store into returned register.
@@ -291,9 +291,9 @@ declare void @somethingElse(...)
 ; Make sure we save the link register
 ; CHECK: mflr {{[0-9]+}}
 ;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
 ; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; CHECK: bl somethingElse
@@ -322,7 +322,7 @@ declare void @somethingElse(...)
 ;
 ; Epilogue code.
 ; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
 ;
 ; ENABLE: .[[ELSE_LABEL]]: # %if.else
 ; Shift second argument by one and store into returned register.

Modified: llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll Tue Jan  9 13:57:49 2018
@@ -6,7 +6,7 @@ define void @test_foo(i32* nocapture %x0
 entry:
 
 ; CHECK-LABEL: test_foo:
-; CHECK: stdu 1, {{-?[0-9]+}}(1)
+; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1)
 ; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3
 ; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4
 ; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5
@@ -15,14 +15,14 @@ entry:
 ; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8
 ; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9
 ; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10
-; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1)
 ; CHECK: bl __tls_get_addr
 ; CHECK-DAG: stw 3, 0([[BACKUP_3]])
 ; CHECK-DAG: stw 3, 0([[BACKUP_4]])

Modified: llvm/trunk/test/CodeGen/PowerPC/vsxD-Form-spills.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsxD-Form-spills.ll?rev=322124&r1=322123&r2=322124&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsxD-Form-spills.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsxD-Form-spills.ll Tue Jan  9 13:57:49 2018
@@ -4,35 +4,37 @@
 define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) {
 
 ; CHECK-LABEL: testSpill:
-; CHECK:    li 11, 80
-; CHECK:    li 12, 96
-; CHECK:    li 3, 48
-; CHECK:    li 10, 64
-; CHECK:    stxvd2x 62, 1, 11 # 16-byte Folded Spill
-; CHECK:    stxvd2x 63, 1, 12 # 16-byte Folded Spill
-; CHECK:    stxvd2x 60, 1, 3 # 16-byte Folded Spill
-; CHECK:    stxvd2x 61, 1, 10 # 16-byte Folded Spill
-; CHECK:    li 9, 96
-; CHECK:    li 10, 80
-; CHECK:    li 11, 64
-; CHECK:    li 12, 48
-; CHECK:    lxvd2x 63, 1, 9 # 16-byte Folded Reload
-; CHECK:    lxvd2x 62, 1, 10 # 16-byte Folded Reload
-; CHECK:    lxvd2x 61, 1, 11 # 16-byte Folded Reload
-; CHECK:    lxvd2x 60, 1, 12 # 16-byte Folded Reload
+; CHECK-DAG:    li [[REG64:[0-9]+]], -64
+; CHECK-DAG:    li [[REG48:[0-9]+]], -48
+; CHECK-DAG:    li [[REG32:[0-9]+]], -32
+; CHECK-DAG:    li [[REG16:[0-9]+]], -16
+; CHECK-NOT:    li
+; CHECK-DAG:    stxvd2x 60, 1, [[REG64]] # 16-byte Folded Spill
+; CHECK-DAG:    stxvd2x 61, 1, [[REG48]] # 16-byte Folded Spill
+; CHECK-DAG:    stxvd2x 62, 1, [[REG32]] # 16-byte Folded Spill
+; CHECK-DAG:    stxvd2x 63, 1, [[REG16]] # 16-byte Folded Spill
+; CHECK:    std 0, 16(1)
+; CHECK-DAG:    li [[REG16:[0-9]+]], -16
+; CHECK-DAG:    li [[REG32:[0-9]+]], -32
+; CHECK-DAG:    li [[REG48:[0-9]+]], -48
+; CHECK-DAG:    li [[REG64:[0-9]+]], -64
 ; CHECK:    mtlr 0
+; CHECK-DAG:    lxvd2x 63, 1, [[REG16]] # 16-byte Folded Reload
+; CHECK-DAG:    lxvd2x 62, 1, [[REG32]] # 16-byte Folded Reload
+; CHECK-DAG:    lxvd2x 61, 1, [[REG48]] # 16-byte Folded Reload
+; CHECK-DAG:    lxvd2x 60, 1, [[REG64]] # 16-byte Folded Reload
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-PWR9-LABEL: testSpill:
-; CHECK-PWR9:    stxv 62, 80(1) # 16-byte Folded Spill
-; CHECK-PWR9:    stxv 63, 96(1) # 16-byte Folded Spill
-; CHECK-PWR9:    stxv 60, 48(1) # 16-byte Folded Spill
-; CHECK-PWR9:    stxv 61, 64(1) # 16-byte Folded Spill
-; CHECK-PWR9:    lxv 63, 96(1) # 16-byte Folded Reload
-; CHECK-PWR9:    lxv 62, 80(1) # 16-byte Folded Reload
-; CHECK-PWR9:    lxv 61, 64(1) # 16-byte Folded Reload
-; CHECK-PWR9:    lxv 60, 48(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG:    stxv 60, -64(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG:    stxv 61, -48(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG:    stxv 62, -32(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG:    stxv 63, -16(1) # 16-byte Folded Spill
 ; CHECK-PWR9:    mtlr 0
+; CHECK-PWR9-DAG:    lxv 63, -16(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG:    lxv 62, -32(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG:    lxv 61, -48(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG:    lxv 60, -64(1) # 16-byte Folded Reload
 ; CHECK-PWR9-NEXT:    blr
 
 entry:




More information about the llvm-commits mailing list