[llvm] r346512 - [Power9] Allow gpr callee saved spills in prologue to vectors registers

Zaara Syeda via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 9 08:36:24 PST 2018


Author: syzaara
Date: Fri Nov  9 08:36:24 2018
New Revision: 346512

URL: http://llvm.org/viewvc/llvm-project?rev=346512&view=rev
Log:
[Power9] Allow gpr callee saved spills in prologue to vectors registers

Currently in llvm, CalleeSavedInfo can only assign a callee saved register to
stack frame index to be spilled in the prologue. We would like to enable
spilling gprs to vector registers. This patch adds the capability to spill to
other registers aside from just the stack. It also adds the changes for power9
to spill gprs to volatile vector registers when they are available.
This happens only for leaf functions when using the option
-ppc-enable-pe-vector-spills.

Differential Revision: https://reviews.llvm.org/D39386

Added:
    llvm/trunk/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir
Modified:
    llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h
    llvm/trunk/lib/CodeGen/MIRPrinter.cpp
    llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
    llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h

Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h?rev=346512&r1=346511&r2=346512&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Fri Nov  9 08:36:24 2018
@@ -28,9 +28,14 @@ class AllocaInst;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
 /// callee saved register is in the current frame.
+/// Callee saved reg can also be saved to a different register rather than
+/// on the stack by setting DstReg instead of FrameIdx.
 class CalleeSavedInfo {
   unsigned Reg;
-  int FrameIdx;
+  union {
+    int FrameIdx;
+    unsigned DstReg;
+  };
   /// Flag indicating whether the register is actually restored in the epilog.
   /// In most cases, if a register is saved, it is also restored. There are
   /// some situations, though, when this is not the case. For example, the
@@ -44,17 +49,29 @@ class CalleeSavedInfo {
   /// by implicit uses on the return instructions, however, the required
   /// changes in the ARM backend would be quite extensive.
   bool Restored;
+  /// Flag indicating whether the register is spilled to stack or another
+  /// register.
+  bool SpilledToReg;
 
 public:
   explicit CalleeSavedInfo(unsigned R, int FI = 0)
-  : Reg(R), FrameIdx(FI), Restored(true) {}
+  : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
 
   // Accessors.
   unsigned getReg()                        const { return Reg; }
   int getFrameIdx()                        const { return FrameIdx; }
-  void setFrameIdx(int FI)                       { FrameIdx = FI; }
+  unsigned getDstReg()                     const { return DstReg; }
+  void setFrameIdx(int FI) {
+    FrameIdx = FI;
+    SpilledToReg = false;
+  }
+  void setDstReg(unsigned SpillReg) {
+    DstReg = SpillReg;
+    SpilledToReg = true;
+  }
   bool isRestored()                        const { return Restored; }
   void setRestored(bool R)                       { Restored = R; }
+  bool isSpilledToReg()                    const { return SpilledToReg; }
 };
 
 /// The MachineFrameInfo class represents an abstract stack frame until
@@ -271,9 +288,9 @@ private:
   unsigned CVBytesOfCalleeSavedRegisters = 0;
 
   /// The prolog/epilog code inserter fills in this vector with each
-  /// callee saved register saved in the frame.  Beyond its use by the prolog/
-  /// epilog code inserter, this data used for debug info and exception
-  /// handling.
+  /// callee saved register saved in either the frame or a different
+  /// register.  Beyond its use by the prolog/ epilog code inserter,
+  /// this data is used for debug info and exception handling.
   std::vector<CalleeSavedInfo> CSInfo;
 
   /// Has CSInfo been set yet?

Modified: llvm/trunk/lib/CodeGen/MIRPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MIRPrinter.cpp?rev=346512&r1=346511&r2=346512&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MIRPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/MIRPrinter.cpp Fri Nov  9 08:36:24 2018
@@ -401,18 +401,20 @@ void MIRPrinter::convertStackObjects(yam
   for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
     yaml::StringValue Reg;
     printRegMIR(CSInfo.getReg(), Reg, TRI);
-    auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
-    assert(StackObjectInfo != StackObjectOperandMapping.end() &&
-           "Invalid stack object index");
-    const FrameIndexOperand &StackObject = StackObjectInfo->second;
-    if (StackObject.IsFixed) {
-      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
-      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
-        CSInfo.isRestored();
-    } else {
-      YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
-      YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
-        CSInfo.isRestored();
+    if (!CSInfo.isSpilledToReg()) {
+      auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+      assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+             "Invalid stack object index");
+      const FrameIndexOperand &StackObject = StackObjectInfo->second;
+      if (StackObject.IsFixed) {
+        YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+        YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+          CSInfo.isRestored();
+      } else {
+        YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+        YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+          CSInfo.isRestored();
+      }
     }
   }
   for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {

Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=346512&r1=346511&r2=346512&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original)
+++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Fri Nov  9 08:36:24 2018
@@ -75,6 +75,10 @@ using namespace llvm;
 
 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 
+STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs");
+STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
+
+
 namespace {
 
 class PEI : public MachineFunctionPass {
@@ -168,6 +172,7 @@ using StackObjSet = SmallSetVector<int,
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 bool PEI::runOnMachineFunction(MachineFunction &MF) {
+  NumFuncSeen++;
   const Function &F = MF.getFunction();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -357,6 +362,11 @@ static void assignCalleeSavedSpillSlots(
     // Now that we know which registers need to be saved and restored, allocate
     // stack slots for them.
     for (auto &CS : CSI) {
+      // If the target has spilled this register to another register, we don't
+      // need to allocate a stack slot.
+      if (CS.isSpilledToReg())
+        continue;
+
       unsigned Reg = CS.getReg();
       const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
 
@@ -454,7 +464,22 @@ static void updateLiveness(MachineFuncti
       if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
         MBB->addLiveIn(Reg);
     }
+    // If callee-saved register is spilled to another register rather than
+    // spilling to stack, the destination register has to be marked as live for
+    // each MBB between the prologue and epilogue so that it is not clobbered
+    // before it is reloaded in the epilogue. The Visited set contains all
+    // blocks outside of the region delimited by prologue/epilogue.
+    if (CSI[i].isSpilledToReg()) {
+      for (MachineBasicBlock &MBB : MF) {
+        if (Visited.count(&MBB))
+          continue;
+        MCPhysReg DstReg = CSI[i].getDstReg();
+        if (!MBB.isLiveIn(DstReg))
+          MBB.addLiveIn(DstReg);
+      }
+    }
   }
+
 }
 
 /// Insert restore code for the callee-saved registers used in the function.
@@ -530,6 +555,9 @@ void PEI::spillCalleeSavedRegs(MachineFu
 
     std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
     if (!CSI.empty()) {
+      if (!MFI.hasCalls())
+        NumLeafFuncWithSpills++;
+
       for (MachineBasicBlock *SaveBlock : SaveBlocks) {
         insertCSRSaves(*SaveBlock, CSI);
         // Update the live-in information of all the blocks up to the save

Modified: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp?rev=346512&r1=346511&r2=346512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp Fri Nov  9 08:36:24 2018
@@ -17,6 +17,7 @@
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,16 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "framelowering"
+STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
+STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
+STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+
+static cl::opt<bool>
+EnablePEVectorSpills("ppc-enable-pe-vector-spills",
+                     cl::desc("Enable spills in prologue to vector registers."),
+                     cl::init(false), cl::Hidden);
+
 /// VRRegNo - Map from a numbered VR register to its enum value.
 ///
 static const MCPhysReg VRRegNo[] = {
@@ -466,6 +477,7 @@ unsigned PPCFrameLowering::determineFram
 
   // Check whether we can skip adjusting the stack pointer (by using red zone)
   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
+    NumNoNeedForFrame++;
     // No need for frame
     if (UpdateMF)
       MFI.setStackSize(0);
@@ -1213,11 +1225,20 @@ void PPCFrameLowering::emitPrologue(Mach
         continue;
       }
 
-      int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
-      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
-          nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
-      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex);
+      if (CSI[I].isSpilledToReg()) {
+        unsigned SpilledReg = CSI[I].getDstReg();
+        unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
+            nullptr, MRI->getDwarfRegNum(Reg, true),
+            MRI->getDwarfRegNum(SpilledReg, true)));
+        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIRegister);
+      } else {
+        int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex);
+      }
     }
   }
 }
@@ -1822,17 +1843,19 @@ void PPCFrameLowering::processFunctionBe
     // Move general register save area spill slots down, taking into account
     // the size of the Floating-point register save area.
     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
-      int FI = GPRegs[i].getFrameIdx();
-
-      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      if (!GPRegs[i].isSpilledToReg()) {
+        int FI = GPRegs[i].getFrameIdx();
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      }
     }
 
     // Move general register save area spill slots down, taking into account
     // the size of the Floating-point register save area.
     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
-      int FI = G8Regs[i].getFrameIdx();
-
-      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      if (!G8Regs[i].isSpilledToReg()) {
+        int FI = G8Regs[i].getFrameIdx();
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+      }
     }
 
     unsigned MinReg =
@@ -1947,6 +1970,64 @@ PPCFrameLowering::addScavengingSpillSlot
   }
 }
 
+// This function checks if a callee saved gpr can be spilled to a volatile
+// vector register. This occurs for leaf functions when the option
+// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
+// which were not spilled to vectors, return false so the target independent
+// code can handle them by assigning a FrameIdx to a stack slot.
+bool PPCFrameLowering::assignCalleeSavedSpillSlots(
+    MachineFunction &MF, const TargetRegisterInfo *TRI,
+    std::vector<CalleeSavedInfo> &CSI) const {
+
+  if (CSI.empty())
+    return true; // Early exit if no callee saved registers are modified!
+
+  // Early exit if cannot spill gprs to volatile vector registers.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
+    return false;
+
+  // Build a BitVector of VSRs that can be used for spilling GPRs.
+  BitVector BVAllocatable = TRI->getAllocatableSet(MF);
+  BitVector BVCalleeSaved(TRI->getNumRegs());
+  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    BVCalleeSaved.set(CSRegs[i]);
+
+  for (unsigned Reg : BVAllocatable.set_bits()) {
+    // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+    // used in the function.
+    if (BVCalleeSaved[Reg] ||
+        (!PPC::F8RCRegClass.contains(Reg) &&
+         !PPC::VFRCRegClass.contains(Reg)) ||
+        (MF.getRegInfo().isPhysRegUsed(Reg)))
+      BVAllocatable.reset(Reg);
+  }
+
+  bool AllSpilledToReg = true;
+  for (auto &CS : CSI) {
+    if (BVAllocatable.none())
+      return false;
+
+    unsigned Reg = CS.getReg();
+    if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+      AllSpilledToReg = false;
+      continue;
+    }
+
+    unsigned VolatileVFReg = BVAllocatable.find_first();
+    if (VolatileVFReg < BVAllocatable.size()) {
+      CS.setDstReg(VolatileVFReg);
+      BVAllocatable.reset(VolatileVFReg);
+    } else {
+      AllSpilledToReg = false;
+    }
+  }
+  return AllSpilledToReg;
+}
+
+
 bool
 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MI,
@@ -2012,12 +2093,18 @@ PPCFrameLowering::spillCalleeSavedRegist
                                          CSI[i].getFrameIdx()));
       }
     } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      // Use !IsLiveIn for the kill flag.
-      // We do not want to kill registers that are live in this function
-      // before their use because they will become undefined registers.
-      TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
-                              CSI[i].getFrameIdx(), RC, TRI);
+      if (CSI[i].isSpilledToReg()) {
+        NumPESpillVSR++;
+        BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
+          .addReg(Reg, getKillRegState(true));
+      } else {
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        // Use !IsLiveIn for the kill flag.
+        // We do not want to kill registers that are live in this function
+        // before their use because they will become undefined registers.
+        TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
+                                CSI[i].getFrameIdx(), RC, TRI);
+      }
     }
   }
   return true;
@@ -2157,13 +2244,19 @@ PPCFrameLowering::restoreCalleeSavedRegi
         CR2Spilled = CR3Spilled = CR4Spilled = false;
       }
 
-      // Default behavior for non-CR saves.
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
-                               RC, TRI);
-      assert(I != MBB.begin() &&
-             "loadRegFromStackSlot didn't insert any code!");
+      if (CSI[i].isSpilledToReg()) {
+        DebugLoc DL;
+        NumPEReloadVSR++;
+        BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
+            .addReg(CSI[i].getDstReg(), getKillRegState(true));
+      } else {
+       // Default behavior for non-CR saves.
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+        assert(I != MBB.begin() &&
+               "loadRegFromStackSlot didn't insert any code!");
       }
+    }
 
     // Insert in reverse order.
     if (AtStart)

Modified: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h?rev=346512&r1=346511&r2=346512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.h Fri Nov  9 08:36:24 2018
@@ -99,6 +99,13 @@ public:
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
                                  const TargetRegisterInfo *TRI) const override;
+  /// This function will assign callee saved gprs to volatile vector registers
+  /// for prologue spills when applicable. It returns false if there are any
+  /// registers which were not spilled to volatile vector registers.
+  bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,

Added: llvm/trunk/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir?rev=346512&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir Fri Nov  9 08:36:24 2018
@@ -0,0 +1,62 @@
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=prologepilog -ppc-enable-pe-vector-spills %s -o - | FileCheck %s
+
+---
+name:            test1BB
+alignment:       4
+tracksRegLiveness: true
+liveins:
+body:             |
+  bb.0.entry:
+    $r14 = IMPLICIT_DEF
+    $r15 = IMPLICIT_DEF
+    $r16 = IMPLICIT_DEF
+    $f0 = IMPLICIT_DEF
+    $v20 = IMPLICIT_DEF
+    BLR8 implicit undef $lr8, implicit undef $rm
+
+# CHECK-LABEL: name:            test1BB
+# CHECK: body:             |
+# CHECK: $f1 = MTVSRD killed $x14
+# CHECK-NEXT: $f2 = MTVSRD killed $x15
+# CHECK-NEXT: $f3 = MTVSRD killed $x16
+# CHECK: $x16 = MFVSRD killed $f3
+# CHECK-NEXT: $x15 = MFVSRD killed $f2
+# CHECK-NEXT: $x14 = MFVSRD killed $f1
+...
+
+---
+name:            test2BBs
+alignment:       4
+tracksRegLiveness: true
+liveins:
+body:             |
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+
+    $cr0 = IMPLICIT_DEF
+    BCC 4, killed renamable $cr0, %bb.2
+    B %bb.1
+
+  bb.1:
+    $r14 = IMPLICIT_DEF
+    $r15 = IMPLICIT_DEF
+    $r16 = IMPLICIT_DEF
+    $r3 = IMPLICIT_DEF
+    B %bb.3
+
+  bb.2:
+    liveins: $x3
+    $r3 = IMPLICIT_DEF
+
+  bb.3:
+    BLR8 implicit undef $lr8, implicit undef $rm
+
+# CHECK-LABEL: name:            test2BB
+# CHECK: body:             |
+# CHECK: $f0 = MTVSRD killed $x14
+# CHECK-NEXT: $f1 = MTVSRD killed $x15
+# CHECK-NEXT: $f2 = MTVSRD killed $x16
+# CHECK: $x16 = MFVSRD killed $f2
+# CHECK-NEXT: $x15 = MFVSRD killed $f1
+# CHECK-NEXT: $x14 = MFVSRD killed $f0
+...




More information about the llvm-commits mailing list