[llvm] 8220591 - [PowerPC] [Peephole] fold frame offset by using index form to save add.

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 25 01:13:51 PDT 2019


Author: czhengsz
Date: 2019-10-25T04:13:30-04:00
New Revision: 822059147bf2870d88321824d41c92abb1f671d1

URL: https://github.com/llvm/llvm-project/commit/822059147bf2870d88321824d41c92abb1f671d1
DIFF: https://github.com/llvm/llvm-project/commit/822059147bf2870d88321824d41c92abb1f671d1.diff

LOG: [PowerPC] [Peephole] fold frame offset by using index form to save add.

renamable $x6 = ADDI8 $x1, -80      ;;; 0 is replaced with -80
renamable $x6 = ADD8 killed renamable $x6, renamable $x5
STW killed renamable $r3, 4, killed renamable $x6 :: (store 4 into %ir.14, !tbaa !2)

After PEI there is a peephole opt opportunity to combine above -80 in ADDI8 with 4 in the STW to eliminate unnecessary ADD8.

Expected result:
renamable $x6 = ADDI8 $x1, -76
STWX killed renamable $r3, renamable $x5, killed renamable $x6 :: (store 4 into %ir.6, !tbaa !2)

Reviewed by: stefanp

Differential Revision: https://reviews.llvm.org/D66329

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.h
    llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b10672965c9..fbe5895cf94d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2527,6 +2527,225 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
          "RegNo should be killed or dead");
 }
 
+// This opt tries to convert the following imm form to an index form to save an
+// add for stack variables.
+// Return false if no such pattern found.
+//
+// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+// ADD instr:  ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
+// Imm instr:  Reg            = op OffsetImm, ToBeDeletedReg(killed)
+//
+// can be converted to:
+//
+// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
+// Index instr:    Reg            = opx ScaleReg, ToBeChangedReg(killed)
+//
+// In order to eliminate ADD instr, make sure that:
+// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
+//    new ADDI instr and ADDI can only take int16 Imm.
+// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
+//    between ADDI and ADD instr since its original def in ADDI will be changed
+//    in new ADDI instr. And also there should be no new def for it between
+//    ADD and Imm instr as ToBeChangedReg will be used in Index instr.
+// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
+//    between ADD and Imm instr since ADD instr will be eliminated.
+// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
+//    moved to Index instr.
+bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
+  MachineFunction *MF = MI.getParent()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  bool PostRA = !MRI->isSSA();
+  // Do this opt after PEI which is after RA. The reason is stack slot expansion
+  // in PEI may expose such opportunities since in PEI, stack slot offsets to
+  // frame base(OffsetAddi) are determined.
+  if (!PostRA)
+    return false;
+  unsigned ToBeDeletedReg = 0;
+  int64_t OffsetImm = 0;
+  unsigned XFormOpcode = 0;
+  ImmInstrInfo III;
+
+  // Check if Imm instr meets requirement.
+  if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
+                                    III))
+    return false;
+
+  bool OtherIntermediateUse = false;
+  MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
+
+  // Exit if there is other use between ADD and Imm instr or no def found.
+  if (OtherIntermediateUse || !ADDMI)
+    return false;
+
+  // Check if ADD instr meets requirement.
+  if (!isADDInstrEligibleForFolding(*ADDMI))
+    return false;
+
+  unsigned ScaleRegIdx = 0;
+  int64_t OffsetAddi = 0;
+  MachineInstr *ADDIMI = nullptr;
+
+  // Check if there is a valid ToBeChangedReg in ADDMI.
+  // 1: It must be killed.
+  // 2: Its definition must be a valid ADDIMI.
+  // 3: It must satify int16 offset requirement.
+  if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
+    ScaleRegIdx = 2;
+  else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
+    ScaleRegIdx = 1;
+  else
+    return false;
+
+  assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
+  unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+  unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+  auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
+                       MachineBasicBlock::iterator End) {
+    for (auto It = ++Start; It != End; It++)
+      if (It->modifiesRegister(Reg, &getRegisterInfo()))
+        return true;
+    return false;
+  };
+  // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
+  // and Imm Instr.
+  if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
+    return false;
+
+  // Now start to do the transformation.
+  LLVM_DEBUG(dbgs() << "Replace instruction: "
+                    << "\n");
+  LLVM_DEBUG(ADDIMI->dump());
+  LLVM_DEBUG(ADDMI->dump());
+  LLVM_DEBUG(MI.dump());
+  LLVM_DEBUG(dbgs() << "with: "
+                    << "\n");
+
+  // Update ADDI instr.
+  ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
+
+  // Update Imm instr.
+  MI.setDesc(get(XFormOpcode));
+  MI.getOperand(III.ImmOpNo)
+      .ChangeToRegister(ScaleReg, false, false,
+                        ADDMI->getOperand(ScaleRegIdx).isKill());
+
+  MI.getOperand(III.OpNoForForwarding)
+      .ChangeToRegister(ToBeChangedReg, false, false, true);
+
+  // Eliminate ADD instr.
+  ADDMI->eraseFromParent();
+
+  LLVM_DEBUG(ADDIMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  return true;
+}
+
+bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
+                                                 int64_t &Imm) const {
+  unsigned Opc = ADDIMI.getOpcode();
+
+  // Exit if the instruction is not ADDI.
+  if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
+    return false;
+
+  Imm = ADDIMI.getOperand(2).getImm();
+
+  return true;
+}
+
+bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
+  unsigned Opc = ADDMI.getOpcode();
+
+  // Exit if the instruction is not ADD.
+  return Opc == PPC::ADD4 || Opc == PPC::ADD8;
+}
+
+bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
+                                                unsigned &ToBeDeletedReg,
+                                                unsigned &XFormOpcode,
+                                                int64_t &OffsetImm,
+                                                ImmInstrInfo &III) const {
+  // Only handle load/store.
+  if (!MI.mayLoadOrStore())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+
+  XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
+
+  // Exit if instruction has no index form.
+  if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
+    return false;
+
+  // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
+  if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
+                       III, true))
+    return false;
+
+  if (!III.IsSummingOperands)
+    return false;
+
+  MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
+  MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
+  // Only support imm operands, not relocation slots or others.
+  if (!ImmOperand.isImm())
+    return false;
+
+  assert(RegOperand.isReg() && "Instruction format is not right");
+
+  // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
+  if (!RegOperand.isKill())
+    return false;
+
+  ToBeDeletedReg = RegOperand.getReg();
+  OffsetImm = ImmOperand.getImm();
+
+  return true;
+}
+
+bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+                                         MachineInstr *&ADDIMI,
+                                         int64_t &OffsetAddi,
+                                         int64_t OffsetImm) const {
+  assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
+  MachineOperand &MO = ADDMI->getOperand(Index);
+
+  if (!MO.isKill())
+    return false;
+
+  bool OtherIntermediateUse = false;
+
+  ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
+  // Currently handle only one "add + Imminstr" pair case, exit if other
+  // intermediate use for ToBeChangedReg found.
+  // TODO: handle the cases where there are other "add + Imminstr" pairs
+  // with same offset in Imminstr which is like:
+  //
+  // ADDI instr: ToBeChangedReg  = ADDI FrameBaseReg, OffsetAddi
+  // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
+  // Imm instr1: Reg1            = op1 OffsetImm, ToBeDeletedReg1(killed)
+  // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
+  // Imm instr2: Reg2            = op2 OffsetImm, ToBeDeletedReg2(killed)
+  //
+  // can be converted to:
+  //
+  // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
+  //                                       (OffsetAddi + OffsetImm)
+  // Index instr1:   Reg1           = opx1 ScaleReg1, ToBeChangedReg
+  // Index instr2:   Reg2           = opx2 ScaleReg2, ToBeChangedReg(killed)
+
+  if (OtherIntermediateUse || !ADDIMI)
+    return false;
+  // Check if ADDI instr meets requirement.
+  if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
+    return false;
+
+  if (isInt<16>(OffsetAddi + OffsetImm))
+    return true;
+  return false;
+}
+
 // If this instruction has an immediate form and one of its operands is a
 // result of a load-immediate or an add-immediate, convert it to
 // the immediate form if the constant is in range.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 19ab30cb0908..d76809027f83 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -422,6 +422,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
 
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
+  bool foldFrameOffset(MachineInstr &MI) const;
+  bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
+  bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
+  bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
+                                    unsigned &XFormOpcode,
+                                    int64_t &OffsetOfImmInstr,
+                                    ImmInstrInfo &III) const;
+  bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+                             MachineInstr *&ADDIMI, int64_t &OffsetAddi,
+                             int64_t OffsetImm) const;
 
   /// Fixup killed/dead flag for register \p RegNo between instructions [\p
   /// StartMI, \p EndMI]. Some PostRA transformations may violate register

diff  --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index b1c0433641dd..15c09a5a8681 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,6 +35,8 @@ STATISTIC(NumRemovedInPreEmit,
           "Number of instructions deleted in pre-emit peephole");
 STATISTIC(NumberOfSelfCopies,
           "Number of self copy instructions eliminated");
+STATISTIC(NumFrameOffFoldInPreEmit,
+          "Number of folding frame offset by using r+r in pre-emit peephole");
 
 static cl::opt<bool>
 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -202,6 +204,12 @@ namespace {
               InstrsToErase.push_back(DefMIToErase);
             }
           }
+          if (TII->foldFrameOffset(MI)) {
+            Changed = true;
+            NumFrameOffFoldInPreEmit++;
+            LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+            LLVM_DEBUG(MI.dump());
+          }
         }
 
         // Eliminate conditional branch based on a constant CR bit by

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a50e05920cd4..a5fbb0c6ec64 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,15 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
 public:
   PPCRegisterInfo(const PPCTargetMachine &TM);
 
+  /// getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode
+  /// for a given imm form load/store opcode \p ImmFormOpcode.
+  /// FIXME: move this to PPCInstrInfo class.
+  unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const {
+    if (!ImmToIdxMap.count(ImmOpcode))
+      return PPC::INSTRUCTION_LIST_END;
+    return ImmToIdxMap.find(ImmOpcode)->second;
+  }
+
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
   const TargetRegisterClass *

diff  --git a/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir b/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
index 3c5c2be935e2..b8b9660e3e14 100644
--- a/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
+++ b/llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir
@@ -18,11 +18,11 @@ body: |
   bb.0.entry:
     liveins: $x3, $x1, $x4, $x6
     $x3 = ADDI8 $x1, -80
-    ; CHECK: $x3 = ADDI8 $x1, -80
+    ; CHECK: $x3 = ADDI8 $x1, -76
     $x4 = ADD8 killed $x3, killed $x4
-    ; CHECK: $x4 = ADD8 killed $x3, killed $x4
+    ; CHECK-NOT: ADD8
     $x6 = LD 4, killed $x4
-    ; CHECK: $x6 = LD 4, killed $x4
+    ; CHECK: $x6 = LDX killed $x4, killed $x3
     BLR8 implicit $lr8, implicit $rm
 ...
 ---
@@ -34,11 +34,11 @@ body: |
   bb.0.entry:
     liveins: $x3, $x1, $x4, $x6
     $x3 = ADDI8 $x1, -80
-    ; CHECK: $x3 = ADDI8 $x1, -80
+    ; CHECK: $x3 = ADDI8 $x1, -76
     $x3 = ADD8 killed $x3, killed $x4
-    ; CHECK: $x3 = ADD8 killed $x3, killed $x4
+    ; CHECK-NOT: ADD8
     $x6 = LD 4, killed $x3
-    ; CHECK: $x6 = LD 4, killed $x3
+    ; CHECK: $x6 = LDX killed $x4, killed $x3
     BLR8 implicit $lr8, implicit $rm
 ...
 ---
@@ -112,3 +112,43 @@ body: |
     STD killed $x4, killed $x6, 100
     BLR8 implicit $lr8, implicit $rm
 ...
+---
+name: testIndexForm7
+#CHECK : name : testIndexForm7
+# There is other def for ToBeChangedReg between ADD instr and Imm instr
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3, $x1, $x4, $x5, $x6
+    $x3 = ADDI8 $x1, -80
+    ; CHECK: $x3 = ADDI8 $x1, -80
+    $x4 = ADD8 killed $x3, killed $x5
+    ; CHECK: $x4 = ADD8 killed $x3, killed $x5
+    $x3 = LD 100, $x6
+    ; CHECK: $x3 = LD 100, $x6
+    STD killed $x3, killed $x6, 200
+    ; CHECK: STD killed $x3, killed $x6, 200
+    $x6 = LD 4, killed $x4
+    ; CHECK: $x6 = LD 4, killed $x4
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testIndexForm8
+#CHECK : name : testIndexForm8
+# There is other def for ScaleReg between ADD instr and Imm instr
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3, $x1, $x4, $x5, $x6
+    $x3 = ADDI8 $x1, -80
+    ; CHECK: $x3 = ADDI8 $x1, -80
+    $x4 = ADD8 killed $x3, killed $x5
+    ; CHECK: $x4 = ADD8 killed $x3, killed $x5
+    $x5 = LD 100, $x6
+    ; CHECK: $x5 = LD 100, $x6
+    STD killed $x5, killed $x6, 200
+    ; CHECK: STD killed $x5, killed $x6, 200
+    $x6 = LD 4, killed $x4
+    ; CHECK: $x6 = LD 4, killed $x4
+    BLR8 implicit $lr8, implicit $rm
+...


        


More information about the llvm-commits mailing list