[llvm] [PowerPC][CodeGen] Exploit STMW and LMW in 32-bit big-endian mode. (PR #74415)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 13 21:54:41 PST 2023
https://github.com/EsmeYi updated https://github.com/llvm/llvm-project/pull/74415
>From 6243b1b8529779f0c09da35c844bd0038d6c4121 Mon Sep 17 00:00:00 2001
From: esmeyi <esme.yi at ibm.com>
Date: Tue, 12 Dec 2023 01:17:55 -0500
Subject: [PATCH 1/2] Draft.
---
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 86 +++++++++++++++++++-
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 12 +--
2 files changed, 89 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 245e78641ed654..3b28069a1eb37d 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -40,6 +40,12 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills",
cl::desc("Enable spills in prologue to vector registers."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableLoadStoreMultiple("ppc-enable-load-store-multiple",
+ cl::desc("Enable load/store multiple (only "
+ "support on 32-bit AIX)."),
+ cl::init(true), cl::Hidden);
+
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
@@ -2399,6 +2405,34 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
return AllSpilledToReg;
}
+static void findContinuousLoadStore(const MachineFrameInfo &MFI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ Register &MergeFrom) {
+
+ for (unsigned I = 0, E = CSI.size(); I + 1 < E; ++I) {
+ Register CurrReg = CSI[I].getReg();
+ if (CurrReg >= PPC::R31)
+ break;
+
+ if (CurrReg < PPC::R13 || CSI[I].isSpilledToReg() ||
+ CSI[I].getFrameIdx() >= 0)
+ continue;
+
+ // Check memory operand type.
+ if (MFI.getObjectSize(CSI[I].getFrameIdx()) != 4)
+ continue;
+
+ // Record the first reg that STMW/LMW are going to merge since STMW/LMW save
+ // from rN to r31, where rN >= r13.
+ if (MergeFrom == PPC::R0 && CurrReg >= PPC::R13)
+ MergeFrom = CurrReg;
+
+ // Find continuous store/load.
+ if (CSI[I].getFrameIdx() - CSI[I + 1].getFrameIdx() != 1)
+ MergeFrom = PPC::R0;
+ }
+}
+
bool PPCFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
@@ -2407,6 +2441,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
bool MustSaveTOC = FI->mustSaveTOC();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2429,6 +2464,10 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
}
}
+ Register MergeFrom = PPC::R0;
+ if (EnableLoadStoreMultiple && Subtarget.isAIXABI() && !Subtarget.isPPC64())
+ findContinuousLoadStore(MFI, CSI, MergeFrom);
+
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
@@ -2511,9 +2550,26 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
// saved vector registers.
if (Subtarget.needsSwapsForVSXMemOps() &&
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
- TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
- I.getFrameIdx(), RC, TRI);
- else
+ TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(),
+ RC, TRI);
+ else if (MergeFrom >= PPC::R13 && MergeFrom < PPC::R31 &&
+ Reg <= PPC::R31) {
+ if (Reg == MergeFrom) {
+ // Build an STMW instruction.
+ int FrameIdx = I.getFrameIdx();
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBB.begin(), DL, TII.get(PPC::STMW));
+ MIB.addReg(Reg, getKillRegState(!IsLiveIn));
+ // Add frame reference.
+ MIB.addImm(0).addFrameIndex(FrameIdx);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
+ MIB.addMemOperand(MMO);
+ } else
+ continue;
+ } else
TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
TRI, Register());
}
@@ -2600,6 +2656,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
bool MustSaveTOC = FI->mustSaveTOC();
bool CR2Spilled = false;
bool CR3Spilled = false;
@@ -2607,6 +2664,10 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
unsigned CSIIndex = 0;
BitVector Restored(TRI->getNumRegs());
+ Register MergeFrom = PPC::R0;
+ if (EnableLoadStoreMultiple && Subtarget.isAIXABI() && !Subtarget.isPPC64())
+ findContinuousLoadStore(MFI, CSI, MergeFrom);
+
// Initialize insertion-point logic; we will be restoring in reverse
// order of spill.
MachineBasicBlock::iterator I = MI, BeforeI = I;
@@ -2686,7 +2747,24 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
TRI);
- else
+ else if (MergeFrom >= PPC::R13 && MergeFrom < PPC::R31 &&
+ Reg <= PPC::R31) {
+ if (Reg == MergeFrom) {
+ // Build an LMW instruction.
+ int FrameIdx = CSI[i].getFrameIdx();
+ DebugLoc DL;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBB.begin(), DL, TII.get(PPC::LMW), Reg);
+ // Add frame reference.
+ MIB.addImm(0).addFrameIndex(FrameIdx);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
+ MIB.addMemOperand(MMO);
+ } else
+ continue;
+ } else
TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
Register());
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 7d913a77cc7155..21ec25b2e93dd9 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1673,8 +1673,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
+ // STMW and LMW only have immediate form.
bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
- OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
+ OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC) &&
+ OpC != PPC::STMW && OpC != PPC::LMW;
// Now add the frame object offset to the offset from r1.
int64_t Offset = MFI.getObjectOffset(FrameIndex);
@@ -1716,10 +1718,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
isInt<16>(Offset);
if (TII.isPrefixed(MI.getOpcode()))
OffsetFitsMnemonic = isInt<34>(Offset);
- if (!noImmForm && ((OffsetFitsMnemonic &&
- ((Offset % offsetMinAlign(MI)) == 0)) ||
- OpC == TargetOpcode::STACKMAP ||
- OpC == TargetOpcode::PATCHPOINT)) {
+ if (!noImmForm &&
+ ((OffsetFitsMnemonic && ((Offset % offsetMinAlign(MI)) == 0)) ||
+ OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT ||
+ OpC == PPC::STMW || OpC == PPC::LMW)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
return false;
}
>From aeb446d4c88ae18ca9c2bef263b41e405d54595f Mon Sep 17 00:00:00 2001
From: esmeyi <esme.yi at ibm.com>
Date: Thu, 14 Dec 2023 00:52:11 -0500
Subject: [PATCH 2/2] Fix incorrect insert points.
---
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 8 ++++----
llvm/test/CodeGen/PowerPC/aix-stm-lm-merge.ll | 16 ++++++++++++++++
2 files changed, 20 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/aix-stm-lm-merge.ll
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3b28069a1eb37d..239e9b7f4960c5 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2410,10 +2410,10 @@ static void findContinuousLoadStore(const MachineFrameInfo &MFI,
Register &MergeFrom) {
for (unsigned I = 0, E = CSI.size(); I + 1 < E; ++I) {
- Register CurrReg = CSI[I].getReg();
- if (CurrReg >= PPC::R31)
+ if (CSI[I + 1].getReg() >= PPC::R31)
break;
+ Register CurrReg = CSI[I].getReg();
if (CurrReg < PPC::R13 || CSI[I].isSpilledToReg() ||
CSI[I].getFrameIdx() >= 0)
continue;
@@ -2558,7 +2558,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
// Build an STMW instruction.
int FrameIdx = I.getFrameIdx();
MachineInstrBuilder MIB =
- BuildMI(MBB, MBB.begin(), DL, TII.get(PPC::STMW));
+ BuildMI(MBB, MI, DL, TII.get(PPC::STMW));
MIB.addReg(Reg, getKillRegState(!IsLiveIn));
// Add frame reference.
MIB.addImm(0).addFrameIndex(FrameIdx);
@@ -2754,7 +2754,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
int FrameIdx = CSI[i].getFrameIdx();
DebugLoc DL;
MachineInstrBuilder MIB =
- BuildMI(MBB, MBB.begin(), DL, TII.get(PPC::LMW), Reg);
+ BuildMI(MBB, I, DL, TII.get(PPC::LMW), Reg);
// Add frame reference.
MIB.addImm(0).addFrameIndex(FrameIdx);
MachineMemOperand *MMO = MF->getMachineMemOperand(
diff --git a/llvm/test/CodeGen/PowerPC/aix-stm-lm-merge.ll b/llvm/test/CodeGen/PowerPC/aix-stm-lm-merge.ll
new file mode 100644
index 00000000000000..a8e0dd8cf6ef52
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-stm-lm-merge.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=powerpc-unknown-aix-xcoff -verify-machineinstrs \
+; RUN: -mcpu=pwr4 -mattr=-altivec --ppc-enable-load-store-multiple < %s \
+; RUN: | FileCheck %s
+
+define dso_local void @test_simple() #0 {
+entry:
+ call void asm sideeffect "nop", "~{r16}"()
+ ret void
+}
+
+; CHECK: stmw 16, -64(1) # 4-byte Folded Spill
+; CHECK-NEXT: #APP
+; CHECK-NEXT: nop
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lmw 16, -64(1) # 4-byte Folded Reload
+
More information about the llvm-commits
mailing list