[llvm] 2432d80 - [PowerPC] Use mtvsrdd to put callee-saved GPR into VSR

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 20 01:43:40 PDT 2021


Author: Qiu Chaofan
Date: 2021-04-20T16:43:24+08:00
New Revision: 2432d80d3b54f67c0e496d6b8c11ceb9f573982d

URL: https://github.com/llvm/llvm-project/commit/2432d80d3b54f67c0e496d6b8c11ceb9f573982d
DIFF: https://github.com/llvm/llvm-project/commit/2432d80d3b54f67c0e496d6b8c11ceb9f573982d.diff

LOG: [PowerPC] Use mtvsrdd to put callee-saved GPR into VSR

This patch exploits mtvsrdd instruction (available in ISA3.0+) to save
two callee-saved GPR registers into a single VSR, making it more
efficient.

Reviewed By: jsji, nemanjai

Differential Revision: https://reviews.llvm.org/D62565

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/lib/Target/PowerPC/PPCFrameLowering.h
    llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 36bdd547923d..5d53053216c8 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2248,30 +2248,39 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
     BVCalleeSaved.set(CSRegs[i]);
 
   for (unsigned Reg : BVAllocatable.set_bits()) {
-    // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+    // Set to 0 if the register is not a volatile VSX register, or if it is
     // used in the function.
-    if (BVCalleeSaved[Reg] ||
-        (!PPC::F8RCRegClass.contains(Reg) &&
-         !PPC::VFRCRegClass.contains(Reg)) ||
-        (MF.getRegInfo().isPhysRegUsed(Reg)))
+    if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
+        MF.getRegInfo().isPhysRegUsed(Reg))
       BVAllocatable.reset(Reg);
   }
 
   bool AllSpilledToReg = true;
+  unsigned LastVSRUsedForSpill = 0;
   for (auto &CS : CSI) {
     if (BVAllocatable.none())
       return false;
 
     unsigned Reg = CS.getReg();
-    if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+
+    if (!PPC::G8RCRegClass.contains(Reg)) {
       AllSpilledToReg = false;
       continue;
     }
 
+    // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
+    // into one VSR using the mtvsrdd instruction.
+    if (LastVSRUsedForSpill != 0) {
+      CS.setDstReg(LastVSRUsedForSpill);
+      BVAllocatable.reset(LastVSRUsedForSpill);
+      LastVSRUsedForSpill = 0;
+      continue;
+    }
+
     unsigned VolatileVFReg = BVAllocatable.find_first();
     if (VolatileVFReg < BVAllocatable.size()) {
       CS.setDstReg(VolatileVFReg);
-      BVAllocatable.reset(VolatileVFReg);
+      LastVSRUsedForSpill = VolatileVFReg;
     } else {
       AllSpilledToReg = false;
     }
@@ -2290,6 +2299,24 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
+  BitVector Spilled(TRI->getNumRegs());
+
+  VSRContainingGPRs.clear();
+
+  // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
+  // or two GPRs, so we need table to record information for later save/restore.
+  llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
+    if (Info.isSpilledToReg()) {
+      auto &SpilledVSR =
+          VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
+      assert(SpilledVSR.second == 0 &&
+             "Can't spill more than two GPRs into VSR!");
+      if (SpilledVSR.first == 0)
+        SpilledVSR.first = Info.getReg();
+      else
+        SpilledVSR.second = Info.getReg();
+    }
+  });
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
@@ -2339,9 +2366,31 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
       }
     } else {
       if (CSI[i].isSpilledToReg()) {
-        NumPESpillVSR++;
-        BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
-          .addReg(Reg, getKillRegState(true));
+        unsigned Dst = CSI[i].getDstReg();
+
+        if (Spilled[Dst])
+          continue;
+
+        if (VSRContainingGPRs[Dst].second != 0) {
+          assert(Subtarget.hasP9Vector() &&
+                 "mtvsrdd is unavailable on pre-P9 targets.");
+
+          NumPESpillVSR += 2;
+          BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
+              .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
+              .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
+        } else if (VSRContainingGPRs[Dst].second == 0) {
+          assert(Subtarget.hasP8Vector() &&
+                 "Can't move GPR to VSR on pre-P8 targets.");
+
+          ++NumPESpillVSR;
+          BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
+                  TRI->getSubReg(Dst, PPC::sub_64))
+              .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
+        } else {
+          llvm_unreachable("More than two GPRs spilled to a VSR!");
+        }
+        Spilled.set(Dst);
       } else {
         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
         // Use !IsLiveIn for the kill flag.
@@ -2445,6 +2494,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
   bool CR3Spilled = false;
   bool CR4Spilled = false;
   unsigned CSIIndex = 0;
+  BitVector Restored(TRI->getNumRegs());
 
   // Initialize insertion-point logic; we will be restoring in reverse
   // order of spill.
@@ -2489,9 +2539,32 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
 
       if (CSI[i].isSpilledToReg()) {
         DebugLoc DL;
-        NumPEReloadVSR++;
-        BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
-            .addReg(CSI[i].getDstReg(), getKillRegState(true));
+        unsigned Dst = CSI[i].getDstReg();
+
+        if (Restored[Dst])
+          continue;
+
+        if (VSRContainingGPRs[Dst].second != 0) {
+          assert(Subtarget.hasP9Vector());
+          NumPEReloadVSR += 2;
+          BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
+                  VSRContainingGPRs[Dst].second)
+              .addReg(Dst);
+          BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
+                  VSRContainingGPRs[Dst].first)
+              .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
+        } else if (VSRContainingGPRs[Dst].second == 0) {
+          assert(Subtarget.hasP8Vector());
+          ++NumPEReloadVSR;
+          BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
+                  VSRContainingGPRs[Dst].first)
+              .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
+        } else {
+          llvm_unreachable("More than two GPRs spilled to a VSR!");
+        }
+
+        Restored.set(Dst);
+
       } else {
        // Default behavior for non-CR saves.
         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);

diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 8bf52c0ed01a..b378c2739925 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -28,6 +28,10 @@ class PPCFrameLowering: public TargetFrameLowering {
   const unsigned BasePointerSaveOffset;
   const unsigned CRSaveOffset;
 
+  // Map each group of one or two GPRs to corresponding VSR for spilling.
+  // TODO: Use local table in methods to avoid this mutable member.
+  mutable DenseMap<unsigned, std::pair<Register, Register>> VSRContainingGPRs;
+
   /**
    * Find register[s] that can be used in function prologue and epilogue
    *

diff  --git a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
index 9803b226b43c..59ab2fce083c 100644
--- a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
+++ b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
@@ -14,15 +14,15 @@ body:             |
     $v20 = IMPLICIT_DEF
     BLR8 implicit undef $lr8, implicit undef $rm
 
+# Use mtvsrdd to save two GPRs in a single instruction
 # CHECK-LABEL: name:            test1BB
 # CHECK: body:             |
 # CHECK:      liveins: $x14, $x15, $x16, $v20
-# CHECK: $f1 = MTVSRD killed $x14
-# CHECK-NEXT: $f2 = MTVSRD killed $x15
-# CHECK-NEXT: $f3 = MTVSRD killed $x16
-# CHECK: $x16 = MFVSRD killed $f3
-# CHECK-NEXT: $x15 = MFVSRD killed $f2
-# CHECK-NEXT: $x14 = MFVSRD killed $f1
+# CHECK: $v0 = MTVSRDD killed $x14, killed $x15
+# CHECK-NEXT: $vf1 = MTVSRD killed $x16
+# CHECK: $x16 = MFVSRD killed $vf1
+# CHECK-NEXT: $x15 = MFVSRLD $v0
+# CHECK-NEXT: $x14 = MFVSRD killed $vf0
 ...
 
 ---
@@ -55,15 +55,14 @@ body:             |
 ## clobbered before restored in the epilogue.
 # CHECK-LABEL: name:            test2BB
 # CHECK: body:             |
-# CHECK:        $f0 = MTVSRD killed $x14
-# CHECK-NEXT:   $f1 = MTVSRD killed $x15
-# CHECK-NEXT:   $f2 = MTVSRD killed $x16
+# CHECK:        $v0 = MTVSRDD killed $x14, killed $x15
+# CHECK-NEXT:   $vf1 = MTVSRD killed $x16
 # CHECK:      bb.2:
 # CHECK-NEXT:   successors: %bb.3
-# CHECK-NEXT:   liveins: $f0, $f1, $f2
+# CHECK-NEXT:   liveins: $v0, $v1
 # CHECK:      bb.3:
-# CHECK-NEXT:   liveins: $f0, $f1, $f2
-# CHECK:        $x16 = MFVSRD killed $f2
-# CHECK-NEXT:   $x15 = MFVSRD killed $f1
-# CHECK-NEXT:   $x14 = MFVSRD killed $f0
+# CHECK-NEXT:   liveins: $v0, $v1
+# CHECK:        $x16 = MFVSRD killed $vf1
+# CHECK-NEXT:   $x15 = MFVSRLD $v0
+# CHECK-NEXT:   $x14 = MFVSRD killed $vf0
 ...


        


More information about the llvm-commits mailing list