[llvm] 2432d80 - [PowerPC] Use mtvsrdd to put callee-saved GPR into VSR
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 20 01:43:40 PDT 2021
Author: Qiu Chaofan
Date: 2021-04-20T16:43:24+08:00
New Revision: 2432d80d3b54f67c0e496d6b8c11ceb9f573982d
URL: https://github.com/llvm/llvm-project/commit/2432d80d3b54f67c0e496d6b8c11ceb9f573982d
DIFF: https://github.com/llvm/llvm-project/commit/2432d80d3b54f67c0e496d6b8c11ceb9f573982d.diff
LOG: [PowerPC] Use mtvsrdd to put callee-saved GPR into VSR
This patch exploits mtvsrdd instruction (available in ISA3.0+) to save
two callee-saved GPR registers into a single VSR, making it more
efficient.
Reviewed By: jsji, nemanjai
Differential Revision: https://reviews.llvm.org/D62565
Added:
Modified:
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
llvm/lib/Target/PowerPC/PPCFrameLowering.h
llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 36bdd547923d..5d53053216c8 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2248,30 +2248,39 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
BVCalleeSaved.set(CSRegs[i]);
for (unsigned Reg : BVAllocatable.set_bits()) {
- // Set to 0 if the register is not a volatile VF/F8 register, or if it is
+ // Set to 0 if the register is not a volatile VSX register, or if it is
// used in the function.
- if (BVCalleeSaved[Reg] ||
- (!PPC::F8RCRegClass.contains(Reg) &&
- !PPC::VFRCRegClass.contains(Reg)) ||
- (MF.getRegInfo().isPhysRegUsed(Reg)))
+ if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
+ MF.getRegInfo().isPhysRegUsed(Reg))
BVAllocatable.reset(Reg);
}
bool AllSpilledToReg = true;
+ unsigned LastVSRUsedForSpill = 0;
for (auto &CS : CSI) {
if (BVAllocatable.none())
return false;
unsigned Reg = CS.getReg();
- if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
+
+ if (!PPC::G8RCRegClass.contains(Reg)) {
AllSpilledToReg = false;
continue;
}
+ // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
+ // into one VSR using the mtvsrdd instruction.
+ if (LastVSRUsedForSpill != 0) {
+ CS.setDstReg(LastVSRUsedForSpill);
+ BVAllocatable.reset(LastVSRUsedForSpill);
+ LastVSRUsedForSpill = 0;
+ continue;
+ }
+
unsigned VolatileVFReg = BVAllocatable.find_first();
if (VolatileVFReg < BVAllocatable.size()) {
CS.setDstReg(VolatileVFReg);
- BVAllocatable.reset(VolatileVFReg);
+ LastVSRUsedForSpill = VolatileVFReg;
} else {
AllSpilledToReg = false;
}
@@ -2290,6 +2299,24 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
+ BitVector Spilled(TRI->getNumRegs());
+
+ VSRContainingGPRs.clear();
+
+ // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
+ // or two GPRs, so we need table to record information for later save/restore.
+ llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
+ if (Info.isSpilledToReg()) {
+ auto &SpilledVSR =
+ VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
+ assert(SpilledVSR.second == 0 &&
+ "Can't spill more than two GPRs into VSR!");
+ if (SpilledVSR.first == 0)
+ SpilledVSR.first = Info.getReg();
+ else
+ SpilledVSR.second = Info.getReg();
+ }
+ });
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
@@ -2339,9 +2366,31 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
}
} else {
if (CSI[i].isSpilledToReg()) {
- NumPESpillVSR++;
- BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
- .addReg(Reg, getKillRegState(true));
+ unsigned Dst = CSI[i].getDstReg();
+
+ if (Spilled[Dst])
+ continue;
+
+ if (VSRContainingGPRs[Dst].second != 0) {
+ assert(Subtarget.hasP9Vector() &&
+ "mtvsrdd is unavailable on pre-P9 targets.");
+
+ NumPESpillVSR += 2;
+ BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
+ .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
+ .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
+ } else if (VSRContainingGPRs[Dst].second == 0) {
+ assert(Subtarget.hasP8Vector() &&
+ "Can't move GPR to VSR on pre-P8 targets.");
+
+ ++NumPESpillVSR;
+ BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
+ TRI->getSubReg(Dst, PPC::sub_64))
+ .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
+ } else {
+ llvm_unreachable("More than two GPRs spilled to a VSR!");
+ }
+ Spilled.set(Dst);
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
// Use !IsLiveIn for the kill flag.
@@ -2445,6 +2494,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
bool CR3Spilled = false;
bool CR4Spilled = false;
unsigned CSIIndex = 0;
+ BitVector Restored(TRI->getNumRegs());
// Initialize insertion-point logic; we will be restoring in reverse
// order of spill.
@@ -2489,9 +2539,32 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
if (CSI[i].isSpilledToReg()) {
DebugLoc DL;
- NumPEReloadVSR++;
- BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
- .addReg(CSI[i].getDstReg(), getKillRegState(true));
+ unsigned Dst = CSI[i].getDstReg();
+
+ if (Restored[Dst])
+ continue;
+
+ if (VSRContainingGPRs[Dst].second != 0) {
+ assert(Subtarget.hasP9Vector());
+ NumPEReloadVSR += 2;
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
+ VSRContainingGPRs[Dst].second)
+ .addReg(Dst);
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
+ VSRContainingGPRs[Dst].first)
+ .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
+ } else if (VSRContainingGPRs[Dst].second == 0) {
+ assert(Subtarget.hasP8Vector());
+ ++NumPEReloadVSR;
+ BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
+ VSRContainingGPRs[Dst].first)
+ .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
+ } else {
+ llvm_unreachable("More than two GPRs spilled to a VSR!");
+ }
+
+ Restored.set(Dst);
+
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 8bf52c0ed01a..b378c2739925 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -28,6 +28,10 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned BasePointerSaveOffset;
const unsigned CRSaveOffset;
+ // Map each group of one or two GPRs to corresponding VSR for spilling.
+ // TODO: Use local table in methods to avoid this mutable member.
+ mutable DenseMap<unsigned, std::pair<Register, Register>> VSRContainingGPRs;
+
/**
* Find register[s] that can be used in function prologue and epilogue
*
diff --git a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
index 9803b226b43c..59ab2fce083c 100644
--- a/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
+++ b/llvm/test/CodeGen/PowerPC/prolog_vec_spills.mir
@@ -14,15 +14,15 @@ body: |
$v20 = IMPLICIT_DEF
BLR8 implicit undef $lr8, implicit undef $rm
+# Use mtvsrdd to save two GPRs in a single instruction
# CHECK-LABEL: name: test1BB
# CHECK: body: |
# CHECK: liveins: $x14, $x15, $x16, $v20
-# CHECK: $f1 = MTVSRD killed $x14
-# CHECK-NEXT: $f2 = MTVSRD killed $x15
-# CHECK-NEXT: $f3 = MTVSRD killed $x16
-# CHECK: $x16 = MFVSRD killed $f3
-# CHECK-NEXT: $x15 = MFVSRD killed $f2
-# CHECK-NEXT: $x14 = MFVSRD killed $f1
+# CHECK: $v0 = MTVSRDD killed $x14, killed $x15
+# CHECK-NEXT: $vf1 = MTVSRD killed $x16
+# CHECK: $x16 = MFVSRD killed $vf1
+# CHECK-NEXT: $x15 = MFVSRLD $v0
+# CHECK-NEXT: $x14 = MFVSRD killed $vf0
...
---
@@ -55,15 +55,14 @@ body: |
## clobbered before restored in the epilogue.
# CHECK-LABEL: name: test2BB
# CHECK: body: |
-# CHECK: $f0 = MTVSRD killed $x14
-# CHECK-NEXT: $f1 = MTVSRD killed $x15
-# CHECK-NEXT: $f2 = MTVSRD killed $x16
+# CHECK: $v0 = MTVSRDD killed $x14, killed $x15
+# CHECK-NEXT: $vf1 = MTVSRD killed $x16
# CHECK: bb.2:
# CHECK-NEXT: successors: %bb.3
-# CHECK-NEXT: liveins: $f0, $f1, $f2
+# CHECK-NEXT: liveins: $v0, $v1
# CHECK: bb.3:
-# CHECK-NEXT: liveins: $f0, $f1, $f2
-# CHECK: $x16 = MFVSRD killed $f2
-# CHECK-NEXT: $x15 = MFVSRD killed $f1
-# CHECK-NEXT: $x14 = MFVSRD killed $f0
+# CHECK-NEXT: liveins: $v0, $v1
+# CHECK: $x16 = MFVSRD killed $vf1
+# CHECK-NEXT: $x15 = MFVSRLD $v0
+# CHECK-NEXT: $x14 = MFVSRD killed $vf0
...
More information about the llvm-commits
mailing list