[llvm] 8220591 - [PowerPC] [Peephole] fold frame offset by using index form to save add.
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 01:13:51 PDT 2019
Author: czhengsz
Date: 2019-10-25T04:13:30-04:00
New Revision: 822059147bf2870d88321824d41c92abb1f671d1
URL: https://github.com/llvm/llvm-project/commit/822059147bf2870d88321824d41c92abb1f671d1
DIFF: https://github.com/llvm/llvm-project/commit/822059147bf2870d88321824d41c92abb1f671d1.diff
LOG: [PowerPC] [Peephole] fold frame offset by using index form to save add.
renamable $x6 = ADDI8 $x1, -80 ;;; 0 is replaced with -80
renamable $x6 = ADD8 killed renamable $x6, renamable $x5
STW killed renamable $r3, 4, killed renamable $x6 :: (store 4 into %ir.14, !tbaa !2)
After PEI there is a peephole opt opportunity to combine above -80 in ADDI8 with 4 in the STW to eliminate unnecessary ADD8.
Expected result:
renamable $x6 = ADDI8 $x1, -76
STWX killed renamable $r3, renamable $x5, killed renamable $x6 :: (store 4 into %ir.6, !tbaa !2)
Reviewed by: stefanp
Differential Revision: https://reviews.llvm.org/D66329
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
llvm/lib/Target/PowerPC/PPCRegisterInfo.h
llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b10672965c9..fbe5895cf94d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2527,6 +2527,225 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
"RegNo should be killed or dead");
}
+// This opt tries to convert the following imm form to an index form to save an
+// add for stack variables.
+// Return false if no such pattern found.
+//
+// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
+// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
+//
+// can be converted to:
+//
+// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
+// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
+//
+// In order to eliminate ADD instr, make sure that:
+// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
+// new ADDI instr and ADDI can only take int16 Imm.
+// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
+// between ADDI and ADD instr since its original def in ADDI will be changed
+// in new ADDI instr. And also there should be no new def for it between
+// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
+// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
+// between ADD and Imm instr since ADD instr will be eliminated.
+// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
+// moved to Index instr.
+bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+ // Do this opt after PEI which is after RA. The reason is stack slot expansion
+ // in PEI may expose such opportunities since in PEI, stack slot offsets to
+ // frame base(OffsetAddi) are determined.
+ if (!PostRA)
+ return false;
+ unsigned ToBeDeletedReg = 0;
+ int64_t OffsetImm = 0;
+ unsigned XFormOpcode = 0;
+ ImmInstrInfo III;
+
+ // Check if Imm instr meets requirement.
+ if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
+ III))
+ return false;
+
+ bool OtherIntermediateUse = false;
+ MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
+
+ // Exit if there is other use between ADD and Imm instr or no def found.
+ if (OtherIntermediateUse || !ADDMI)
+ return false;
+
+ // Check if ADD instr meets requirement.
+ if (!isADDInstrEligibleForFolding(*ADDMI))
+ return false;
+
+ unsigned ScaleRegIdx = 0;
+ int64_t OffsetAddi = 0;
+ MachineInstr *ADDIMI = nullptr;
+
+ // Check if there is a valid ToBeChangedReg in ADDMI.
+ // 1: It must be killed.
+ // 2: Its definition must be a valid ADDIMI.
+ // 3: It must satify int16 offset requirement.
+ if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 2;
+ else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
+ ScaleRegIdx = 1;
+ else
+ return false;
+
+ assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
+ unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+ unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+ auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
+ MachineBasicBlock::iterator End) {
+ for (auto It = ++Start; It != End; It++)
+ if (It->modifiesRegister(Reg, &getRegisterInfo()))
+ return true;
+ return false;
+ };
+ // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
+ // and Imm Instr.
+ if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
+ return false;
+
+ // Now start to do the transformation.
+ LLVM_DEBUG(dbgs() << "Replace instruction: "
+ << "\n");
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(ADDMI->dump());
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "with: "
+ << "\n");
+
+ // Update ADDI instr.
+ ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
+
+ // Update Imm instr.
+ MI.setDesc(get(XFormOpcode));
+ MI.getOperand(III.ImmOpNo)
+ .ChangeToRegister(ScaleReg, false, false,
+ ADDMI->getOperand(ScaleRegIdx).isKill());
+
+ MI.getOperand(III.OpNoForForwarding)
+ .ChangeToRegister(ToBeChangedReg, false, false, true);
+
+ // Eliminate ADD instr.
+ ADDMI->eraseFromParent();
+
+ LLVM_DEBUG(ADDIMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
+ int64_t &Imm) const {
+ unsigned Opc = ADDIMI.getOpcode();
+
+ // Exit if the instruction is not ADDI.
+ if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
+ return false;
+
+ Imm = ADDIMI.getOperand(2).getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
+ unsigned Opc = ADDMI.getOpcode();
+
+ // Exit if the instruction is not ADD.
+ return Opc == PPC::ADD4 || Opc == PPC::ADD8;
+}
+
+bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
+ unsigned &ToBeDeletedReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetImm,
+ ImmInstrInfo &III) const {
+ // Only handle load/store.
+ if (!MI.mayLoadOrStore())
+ return false;
+
+ unsigned Opc = MI.getOpcode();
+
+ XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
+
+ // Exit if instruction has no index form.
+ if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
+ return false;
+
+ // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
+ if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
+ III, true))
+ return false;
+
+ if (!III.IsSummingOperands)
+ return false;
+
+ MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
+ MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
+ // Only support imm operands, not relocation slots or others.
+ if (!ImmOperand.isImm())
+ return false;
+
+ assert(RegOperand.isReg() && "Instruction format is not right");
+
+ // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
+ if (!RegOperand.isKill())
+ return false;
+
+ ToBeDeletedReg = RegOperand.getReg();
+ OffsetImm = ImmOperand.getImm();
+
+ return true;
+}
+
+bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI,
+ int64_t &OffsetAddi,
+ int64_t OffsetImm) const {
+ assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
+ MachineOperand &MO = ADDMI->getOperand(Index);
+
+ if (!MO.isKill())
+ return false;
+
+ bool OtherIntermediateUse = false;
+
+ ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
+ // Currently handle only one "add + Imminstr" pair case, exit if other
+ // intermediate use for ToBeChangedReg found.
+ // TODO: handle the cases where there are other "add + Imminstr" pairs
+ // with same offset in Imminstr which is like:
+ //
+ // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+ // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
+ // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
+ // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
+ // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
+ //
+ // can be converted to:
+ //
+ // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
+ // (OffsetAddi + OffsetImm)
+ // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
+ // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
+
+ if (OtherIntermediateUse || !ADDIMI)
+ return false;
+ // Check if ADDI instr meets requirement.
+ if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
+ return false;
+
+ if (isInt<16>(OffsetAddi + OffsetImm))
+ return true;
+ return false;
+}
+
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 19ab30cb0908..d76809027f83 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -422,6 +422,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
+ bool foldFrameOffset(MachineInstr &MI) const;
+ bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
+ bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
+ bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
+ unsigned &XFormOpcode,
+ int64_t &OffsetOfImmInstr,
+ ImmInstrInfo &III) const;
+ bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+ MachineInstr *&ADDIMI, int64_t &OffsetAddi,
+ int64_t OffsetImm) const;
/// Fixup killed/dead flag for register \p RegNo between instructions [\p
/// StartMI, \p EndMI]. Some PostRA transformations may violate register
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index b1c0433641dd..15c09a5a8681 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,6 +35,8 @@ STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
+STATISTIC(NumFrameOffFoldInPreEmit,
+ "Number of folding frame offset by using r+r in pre-emit peephole");
static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -202,6 +204,12 @@ namespace {
InstrsToErase.push_back(DefMIToErase);
}
}
+ if (TII->foldFrameOffset(MI)) {
+ Changed = true;
+ NumFrameOffFoldInPreEmit++;
+ LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+ LLVM_DEBUG(MI.dump());
+ }
}
// Eliminate conditional branch based on a constant CR bit by
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a50e05920cd4..a5fbb0c6ec64 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,15 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
public:
PPCRegisterInfo(const PPCTargetMachine &TM);
+ /// getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode
+ /// for a given imm form load/store opcode \p ImmFormOpcode.
+ /// FIXME: move this to PPCInstrInfo class.
+ unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const {
+ if (!ImmToIdxMap.count(ImmOpcode))
+ return PPC::INSTRUCTION_LIST_END;
+ return ImmToIdxMap.find(ImmOpcode)->second;
+ }
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
diff --git a/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir b/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
index 3c5c2be935e2..b8b9660e3e14 100644
--- a/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
+++ b/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
@@ -18,11 +18,11 @@ body: |
bb.0.entry:
liveins: $x3, $x1, $x4, $x6
$x3 = ADDI8 $x1, -80
- ; CHECK: $x3 = ADDI8 $x1, -80
+ ; CHECK: $x3 = ADDI8 $x1, -76
$x4 = ADD8 killed $x3, killed $x4
- ; CHECK: $x4 = ADD8 killed $x3, killed $x4
+ ; CHECK-NOT: ADD8
$x6 = LD 4, killed $x4
- ; CHECK: $x6 = LD 4, killed $x4
+ ; CHECK: $x6 = LDX killed $x4, killed $x3
BLR8 implicit $lr8, implicit $rm
...
---
@@ -34,11 +34,11 @@ body: |
bb.0.entry:
liveins: $x3, $x1, $x4, $x6
$x3 = ADDI8 $x1, -80
- ; CHECK: $x3 = ADDI8 $x1, -80
+ ; CHECK: $x3 = ADDI8 $x1, -76
$x3 = ADD8 killed $x3, killed $x4
- ; CHECK: $x3 = ADD8 killed $x3, killed $x4
+ ; CHECK-NOT: ADD8
$x6 = LD 4, killed $x3
- ; CHECK: $x6 = LD 4, killed $x3
+ ; CHECK: $x6 = LDX killed $x4, killed $x3
BLR8 implicit $lr8, implicit $rm
...
---
@@ -112,3 +112,43 @@ body: |
STD killed $x4, killed $x6, 100
BLR8 implicit $lr8, implicit $rm
...
+---
+name: testIndexForm7
+#CHECK : name : testIndexForm7
+# There is other def for ToBeChangedReg between ADD instr and Imm instr
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x1, $x4, $x5, $x6
+ $x3 = ADDI8 $x1, -80
+ ; CHECK: $x3 = ADDI8 $x1, -80
+ $x4 = ADD8 killed $x3, killed $x5
+ ; CHECK: $x4 = ADD8 killed $x3, killed $x5
+ $x3 = LD 100, $x6
+ ; CHECK: $x3 = LD 100, $x6
+ STD killed $x3, killed $x6, 200
+ ; CHECK: STD killed $x3, killed $x6, 200
+ $x6 = LD 4, killed $x4
+ ; CHECK: $x6 = LD 4, killed $x4
+ BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testIndexForm8
+#CHECK : name : testIndexForm8
+# There is other def for ScaleReg between ADD instr and Imm instr
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x1, $x4, $x5, $x6
+ $x3 = ADDI8 $x1, -80
+ ; CHECK: $x3 = ADDI8 $x1, -80
+ $x4 = ADD8 killed $x3, killed $x5
+ ; CHECK: $x4 = ADD8 killed $x3, killed $x5
+ $x5 = LD 100, $x6
+ ; CHECK: $x5 = LD 100, $x6
+ STD killed $x5, killed $x6, 200
+ ; CHECK: STD killed $x5, killed $x6, 200
+ $x6 = LD 4, killed $x4
+ ; CHECK: $x6 = LD 4, killed $x4
+ BLR8 implicit $lr8, implicit $rm
+...
More information about the llvm-commits
mailing list