[llvm] cd83333 - [PowerPC] Fold redundant load immediates of zero and delete if possible

Kamau Bridgeman via llvm-commits llvm-commits at lists.llvm.org
Tue May 12 11:15:42 PDT 2020


Author: Kamau Bridgeman
Date: 2020-05-12T13:15:06-05:00
New Revision: cd83333fc8fa3d2924087485bc3d1998665b4987

URL: https://github.com/llvm/llvm-project/commit/cd83333fc8fa3d2924087485bc3d1998665b4987
DIFF: https://github.com/llvm/llvm-project/commit/cd83333fc8fa3d2924087485bc3d1998665b4987.diff

LOG: [PowerPC] Fold redundant load immediates of zero and delete if possible

This patch folds redundant load immediates into a zero for instructions
which recognise this as the value zero and not the register. If the load
immediate is no longer in use it is then deleted.

This is already done in earlier passes but the ppc-mi-peephole allows for
a more general implementation.

Differential Revision: https://reviews.llvm.org/D69168

Added: 
    llvm/test/CodeGen/PowerPC/fold-remove-li.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
    llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1ab3891c568f..5a329ec4f852 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1350,9 +1350,11 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
-bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
-                                 Register Reg, MachineRegisterInfo *MRI) const {
-  // For some instructions, it is legal to fold ZERO into the RA register field.
+// For some instructions, it is legal to fold ZERO into the RA register field.
+// This function performs that fold by replacing the operand with PPC::ZERO,
+// it does not consider whether the load immediate zero is no longer in use.
+bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                                     Register Reg) const {
   // A zero immediate should always be loaded with a single li.
   unsigned DefOpc = DefMI.getOpcode();
   if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
@@ -1372,6 +1374,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
   if (UseMCID.isPseudo())
     return false;
 
+  // We need to find which of the User's operands is to be folded, that will be
+  // the operand that matches the given register ID.
   unsigned UseIdx;
   for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
     if (UseMI.getOperand(UseIdx).isReg() &&
@@ -1409,13 +1413,19 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
               PPC::ZERO8 : PPC::ZERO;
   }
 
-  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
   UseMI.getOperand(UseIdx).setReg(ZeroReg);
+  return true;
+}
 
-  if (DeleteDef)
+// Folds zero into instructions which have a load immediate zero as an operand
+// but also recognize zero as immediate zero. If the definition of the load
+// has no more users it is deleted.
+bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                                 Register Reg, MachineRegisterInfo *MRI) const {
+  bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
+  if (MRI->use_nodbg_empty(Reg))
     DefMI.eraseFromParent();
-
-  return true;
+  return Changed;
 }
 
 static bool MBBDefinesCTR(MachineBasicBlock &MBB) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 02f5a78ca3bb..f82bb83f4c27 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -336,6 +336,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
                      MachineRegisterInfo *MRI) const override;
 
+  bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                         Register Reg) const;
+
   // If conversion by predication (only supported by some branch instructions).
   // All of the profitability checks always return true; it is always
   // profitable to use the predicated branches.

diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 1c7c73803c1e..575108562a71 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
           "Number of pairs of rotate left, clear left/right collapsed");
 STATISTIC(NumEXTSWAndSLDICombined,
           "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
+STATISTIC(NumLoadImmZeroFoldedAndRemoved,
+          "Number of LI(8) reg, 0 that are folded to r0 and removed");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -319,7 +321,22 @@ bool PPCMIPeephole::simplifyCode(void) {
 
       default:
         break;
-
+      case PPC::LI:
+      case PPC::LI8: {
+        // If we are materializing a zero, look for any use operands for which
+        // zero means immediate zero. All such operands can be replaced with
+        // PPC::ZERO.
+        if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
+          break;
+        unsigned MIDestReg = MI.getOperand(0).getReg();
+        for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
+          Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
+        if (MRI->use_nodbg_empty(MIDestReg)) {
+          ++NumLoadImmZeroFoldedAndRemoved;
+          ToErase = &MI;
+        }
+        break;
+      }
       case PPC::STD: {
         MachineFrameInfo &MFI = MF->getFrameInfo();
         if (MFI.hasVarSizedObjects() ||

diff  --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 31968dcae360..4d2595e1abdc 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1615,7 +1615,7 @@ body:             |
     %0 = LI8 89
     %2 = CMPDI %0, 87
     %4 = ISEL8 $zero8, %0, %2.sub_gt
-    ; CHECK: LI8 0
+    ; CHECK: ADDI8 %1, 0
     %5 = ADD8 killed %4, %1
     $x3 = COPY %5
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -2017,7 +2017,7 @@ body:             |
     %3 = LI -3
     %4 = CMPLWI %3, 87
     %6 = ISEL $zero, %3, %4.sub_gt
-    ; CHECK: LI 0
+    ; CHECK: ADDI killed %2, 0
     %7 = ADD4 killed %6, killed %2
     %9 = IMPLICIT_DEF
     %8 = INSERT_SUBREG %9, killed %7, 1

diff  --git a/llvm/test/CodeGen/PowerPC/fold-remove-li.ll b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll
new file mode 100644
index 000000000000..de92a5806fd0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold-remove-li.ll
@@ -0,0 +1,40 @@
+; NOTE: This test verifies that a redundant load immediate of zero is folded
+; NOTE: from its use in an isel and deleted as it is no longer in use.
+; RUN:  llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:      -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
+; RUN:  llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:      -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
+
+%0 = type { i32, i16 }
+
+ at val = common dso_local local_unnamed_addr global %0* null, align 8
+
+define dso_local signext i32 @redunLoadImm(%0* %arg) {
+; CHECK-LABEL: redunLoadImm:
+; verify that the load immediate has been folded into the isel and deleted
+; CHECK-NOT:   li r[[REG1:[0-9]+]], 0
+; CHECK:       isel r[[REG2:[0-9]+]], 0, r[[REG3:[0-9]+]], eq
+
+bb:
+  %tmp = icmp eq %0* %arg, null
+  br i1 %tmp, label %bb9, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = getelementptr inbounds %0, %0* %arg, i64 0, i32 1
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb1
+  %tmp4 = load i16, i16* %tmp2, align 4
+  %tmp5 = sext i16 %tmp4 to i64
+  %tmp6 = getelementptr inbounds %0, %0* %arg, i64 %tmp5
+  %tmp7 = icmp eq i16 %tmp4, 0
+  %tmp8 = select i1 %tmp7, %0* null, %0* %tmp6
+  store %0* %tmp8, %0** @val, align 8
+  br label %bb3
+
+bb9:                                              ; preds = %bb
+  %tmp10 = load %0*, %0** @val, align 8
+  %tmp11 = getelementptr inbounds %0, %0* %tmp10, i64 0, i32 0
+  %tmp12 = load i32, i32* %tmp11, align 4
+  ret i32 %tmp12
+}

diff  --git a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
index e47a5c7c3ebf..2d003da447da 100644
--- a/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
+++ b/llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll
@@ -13,8 +13,7 @@
 ; CHECK: addic 29, 0, 20
 ; Save CR through R12 using R29 as the stack pointer (aligned base pointer).
 ; CHECK: mfcr 12
-; CHECK: stw 28, -24(29)
-; CHECK: stw 12, -28(29)
+; CHECK: stw 12, -24(29)
 
 target datalayout = "E-m:e-p:32:32-i64:64-n32"
 target triple = "powerpc-unknown-freebsd"


        


More information about the llvm-commits mailing list