[llvm] r313639 - [PowerPC Peephole] Constants into a join add, use ADDI over LI/ADD.

Tony Jiang via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 19 09:14:37 PDT 2017


Author: jtony
Date: Tue Sep 19 09:14:37 2017
New Revision: 313639

URL: http://llvm.org/viewvc/llvm-project?rev=313639&view=rev
Log:
[PowerPC Peephole] Constants into a join add, use ADDI over LI/ADD.

Two blocks prior to the join each perform an li and the the join block has an
add using the initialized register. Optimize each predecessor block to instead
use addi and delete the li's and add.

Differential Revision: https://reviews.llvm.org/D36734

Added:
    llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp?rev=313639&r1=313638&r2=313639&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp Tue Sep 19 09:14:37 2017
@@ -23,6 +23,8 @@
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,6 +35,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-mi-peepholes"
 
+STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+
 namespace llvm {
   void initializePPCMIPeepholePass(PassRegistry&);
 }
@@ -51,6 +55,8 @@ struct PPCMIPeephole : public MachineFun
   }
 
 private:
+  MachineDominatorTree *MDT;
+
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
 
@@ -65,6 +71,13 @@ private:
   unsigned lookThruCopyLike(unsigned SrcReg);
 
 public:
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(*MF.getFunction()))
@@ -78,11 +91,25 @@ public:
 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   MRI = &MF->getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
   DEBUG(MF->dump());
 }
 
+static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
+                                      MachineRegisterInfo *MRI) {
+  assert(Op && "Invalid Operand!");
+  if (!Op->isReg())
+    return nullptr;
+
+  unsigned Reg = Op->getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return nullptr;
+
+  return MRI->getVRegDef(Reg);
+}
+
 // Perform peephole optimizations.
 bool PPCMIPeephole::simplifyCode(void) {
   bool Simplified = false;
@@ -340,8 +367,97 @@ bool PPCMIPeephole::simplifyCode(void) {
         }
         break;
       }
+
+      // TODO: Any instruction that has an immediate form fed only by a PHI
+      // whose operands are all load immediate can be folded away. We currently
+      // do this for ADD instructions, but should expand it to arithmetic and
+      // binary instructions with immediate forms in the future.
+      case PPC::ADD4:
+      case PPC::ADD8: {
+        auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
+          assert(PhiOp && "Invalid Operand!");
+          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+
+          return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
+                 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
+        };
+
+        auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
+                                            MachineOperand *PhiOp) {
+          assert(PhiOp && "Invalid Operand!");
+          assert(DominatorOp && "Invalid Operand!");
+          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
+          MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
+
+          // Note: the vregs only show up at odd indices position of PHI Node,
+          // the even indices position save the BB info.
+          for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+            MachineInstr *LiMI =
+                getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+            if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
+                !MDT->dominates(DefDomMI, LiMI) ||
+                (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8))
+              return false;
+          }
+
+          return true;
+        };
+
+        MachineOperand Op1 = MI.getOperand(1);
+        MachineOperand Op2 = MI.getOperand(2);
+        if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
+          std::swap(Op1, Op2);
+        else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
+          break; // We don't have an ADD fed by LI's that can be transformed
+
+        // Now we know that Op1 is the PHI node and Op2 is the dominator
+        unsigned DominatorReg = Op2.getReg();
+
+        const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
+                                             ? &PPC::G8RC_and_G8RC_NOX0RegClass
+                                             : &PPC::GPRC_and_GPRC_NOR0RegClass;
+        MRI->setRegClass(DominatorReg, TRC);
+
+        // replace LIs with ADDIs
+        MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
+        for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
+          MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
+          DEBUG(dbgs() << "Optimizing LI to ADDI: ");
+          DEBUG(LiMI->dump());
+
+          // There could be repeated registers in the PHI, e.g: %vreg1<def> =
+          // PHI %vreg6, <BB#2>, %vreg8, <BB#3>, %vreg8, <BB#6>; So if we've
+          // already replaced the def instruction, skip.
+          if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
+            continue;
+
+          assert((LiMI->getOpcode() == PPC::LI ||
+                  LiMI->getOpcode() == PPC::LI8) &&
+                 "Invalid Opcode!");
+          auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
+          LiMI->RemoveOperand(1);                    // remove the imm of LI
+          LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
+                                                              : PPC::ADDI8));
+          MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
+              .addReg(DominatorReg)
+              .addImm(LiImm); // restore the imm of LI
+          DEBUG(LiMI->dump());
+        }
+
+        // Replace ADD with COPY
+        DEBUG(dbgs() << "Optimizing ADD to COPY: ");
+        DEBUG(MI.dump());
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                MI.getOperand(0).getReg())
+            .add(Op1);
+        ToErase = &MI;
+        Simplified = true;
+        NumOptADDLIs++;
+        break;
+      }
       }
     }
+
     // If the last instruction was marked for elimination,
     // remove it now.
     if (ToErase) {

Added: llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll?rev=313639&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/opt-li-add-to-addi.ll Tue Sep 19 09:14:37 2017
@@ -0,0 +1,60 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+define i64 @testOptimizeLiAddToAddi(i64 %a) {
+; CHECK-LABEL: testOptimizeLiAddToAddi:
+; CHECK:    addi 3, 30, 2444
+; CHECK:    bl callv
+; CHECK:    addi 3, 30, 234
+; CHECK:    bl call
+; CHECK:    blr
+entry:
+  %cmp = icmp sgt i64 %a, 33
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void bitcast (void (...)* @callv to void ()*)()
+  br label %if.end
+
+if.end:
+  %add.0 = phi i64 [ 234, %if.then ], [ 2444, %entry ]
+  %add2 = add nsw i64 %add.0, %a
+  %call = tail call i64 @call(i64 %add2)
+  ret i64 %call
+}
+
+define i64 @testThreePhiIncomings(i64 %a) {
+; CHECK-LABEL: testThreePhiIncomings:
+; CHECK:    bl callv
+; CHECK:    addi 3, 30, 234
+; CHECK:    addi 3, 30, 2444
+; CHECK:    bl callv
+; CHECK:    addi 3, 30, 345
+; CHECK:    bl call
+; CHECK:    blr
+entry:
+  %cmp = icmp slt i64 %a, 33
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  tail call void bitcast (void (...)* @callv to void ()*)()
+  br label %if.end4
+
+if.else:
+  %cmp1 = icmp slt i64 %a, 55
+  br i1 %cmp1, label %if.then2, label %if.end4
+
+if.then2:
+  tail call void bitcast (void (...)* @callv to void ()*)()
+  br label %if.end4
+
+if.end4:
+  %add.0 = phi i64 [ 234, %if.then ], [ 345, %if.then2 ], [ 2444, %if.else ]
+  %add5 = add nsw i64 %add.0, %a
+  %call = tail call i64 @call(i64 %add5)
+  ret i64 %call
+}
+
+declare void @callv(...)
+
+declare i64 @call(i64)




More information about the llvm-commits mailing list