[llvm-branch-commits] [llvm] 1c0941e - [PowerPC] Extend folding RLWINM + RLWINM to post-RA.

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Nov 21 23:42:19 PST 2020


Author: Esme-Yi
Date: 2020-11-22T07:37:24Z
New Revision: 1c0941e1524f499e3fbde48fc3bdd0e70fc8f2e4

URL: https://github.com/llvm/llvm-project/commit/1c0941e1524f499e3fbde48fc3bdd0e70fc8f2e4
DIFF: https://github.com/llvm/llvm-project/commit/1c0941e1524f499e3fbde48fc3bdd0e70fc8f2e4.diff

LOG: [PowerPC] Extend folding RLWINM + RLWINM to post-RA.

Summary: We have the patterns to fold 2 RLWINMs before RA, while some RLWINM will be generated after RA, for example rGc4690b007743. If the RLWINM generated after RA followed by another RLWINM, we expect to perform the optimization too.

Reviewed By: shchenz

Differential Revision: https://reviews.llvm.org/D89855

Added: 
    llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
    llvm/test/CodeGen/PowerPC/vsx_builtins.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2de6a7754015..45587504df56 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3231,18 +3231,64 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   return false;
 }
 
-bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
-                                 MachineInstr **ToErase) const {
+// This function tries to combine two RLWINMs. We not only perform such
+// optimization in SSA, but also after RA, since some RLWINM is generated after
+// RA.
+bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI,
+                                              MachineInstr *&ToErase) const {
+  bool Is64Bit = false;
+  switch (MI.getOpcode()) {
+  case PPC::RLWINM:
+  case PPC::RLWINM_rec:
+    break;
+  case PPC::RLWINM8:
+  case PPC::RLWINM8_rec:
+    Is64Bit = true;
+    break;
+  default:
+    return false;
+  }
   MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
-  unsigned FoldingReg = MI.getOperand(1).getReg();
-  if (!Register::isVirtualRegister(FoldingReg))
+  Register FoldingReg = MI.getOperand(1).getReg();
+  MachineInstr *SrcMI = nullptr;
+  bool CanErase = false;
+  bool OtherIntermediateUse = true;
+  if (MRI->isSSA()) {
+    if (!Register::isVirtualRegister(FoldingReg))
+      return false;
+    SrcMI = MRI->getVRegDef(FoldingReg);
+  } else {
+    SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse);
+  }
+  if (!SrcMI)
     return false;
-  MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
-  if (SrcMI->getOpcode() != PPC::RLWINM &&
-      SrcMI->getOpcode() != PPC::RLWINM_rec &&
-      SrcMI->getOpcode() != PPC::RLWINM8 &&
-      SrcMI->getOpcode() != PPC::RLWINM8_rec)
+  // TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before
+  // RA, but after RA. And We can fold RLWINM8(RLWINM) -> RLWINM8, or
+  // RLWINM(RLWINM8) -> RLWINM.
+  switch (SrcMI->getOpcode()) {
+  case PPC::RLWINM:
+  case PPC::RLWINM_rec:
+    if (Is64Bit)
+      return false;
+    break;
+  case PPC::RLWINM8:
+  case PPC::RLWINM8_rec:
+    if (!Is64Bit)
+      return false;
+    break;
+  default:
     return false;
+  }
+  if (MRI->isSSA()) {
+    CanErase = !SrcMI->hasImplicitDef() && MRI->hasOneNonDBGUse(FoldingReg);
+  } else {
+    CanErase = !OtherIntermediateUse && MI.getOperand(1).isKill() &&
+               !SrcMI->hasImplicitDef();
+    // In post-RA, if SrcMI also defines the register to be forwarded, we can
+    // only do the folding if SrcMI is going to be erased.
+    if (!CanErase && SrcMI->definesRegister(SrcMI->getOperand(1).getReg()))
+      return false;
+  }
   assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
           MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
           SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
@@ -3253,7 +3299,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
   uint64_t MBMI = MI.getOperand(3).getImm();
   uint64_t MESrc = SrcMI->getOperand(4).getImm();
   uint64_t MEMI = MI.getOperand(4).getImm();
-
   assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
          "Invalid PPC::RLWINM Instruction!");
   // If MBMI is bigger than MEMI, we always can not get run of ones.
@@ -3297,8 +3342,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
 
   // If final mask is 0, MI result should be 0 too.
   if (FinalMask.isNullValue()) {
-    bool Is64Bit =
-        (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
     Simplified = true;
     LLVM_DEBUG(dbgs() << "Replace Instr: ");
     LLVM_DEBUG(MI.dump());
@@ -3356,12 +3399,10 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
     LLVM_DEBUG(dbgs() << "To: ");
     LLVM_DEBUG(MI.dump());
   }
-  if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
-      !SrcMI->hasImplicitDef()) {
-    // If FoldingReg has no non-debug use and it has no implicit def (it
-    // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
-    // Otherwise keep it.
-    *ToErase = SrcMI;
+  if (Simplified && CanErase) {
+    // If SrcMI has no implicit def, and FoldingReg has no non-debug use or
+    // its flag is "killed", it's safe to delete SrcMI. Otherwise keep it.
+    ToErase = SrcMI;
     LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
     LLVM_DEBUG(SrcMI->dump());
   }

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index c633916e7974..68bd3df6168a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -585,7 +585,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
   bool foldFrameOffset(MachineInstr &MI) const;
-  bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
+  bool simplifyRotateAndMaskInstr(MachineInstr &MI,
+                                  MachineInstr *&ToErase) const;
   bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
   bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
   bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,

diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 3fd02bc185c9..827d3c4693b9 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -848,7 +848,7 @@ bool PPCMIPeephole::simplifyCode(void) {
       case PPC::RLWINM_rec:
       case PPC::RLWINM8:
       case PPC::RLWINM8_rec: {
-        Simplified = TII->combineRLWINM(MI, &ToErase);
+        Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase);
         if (Simplified)
           ++NumRotatesCollapsed;
         break;

diff  --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 04749cdd61c4..f3def378a93c 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -37,6 +37,8 @@ STATISTIC(NumberOfSelfCopies,
           "Number of self copy instructions eliminated");
 STATISTIC(NumFrameOffFoldInPreEmit,
           "Number of folding frame offset by using r+r in pre-emit peephole");
+STATISTIC(NumRotateInstrFoldInPreEmit,
+          "Number of folding Rotate instructions in pre-emit peephole");
 
 static cl::opt<bool>
 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
@@ -472,6 +474,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
             LLVM_DEBUG(MI.dump());
           }
+          MachineInstr *ToErase = nullptr;
+          if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) {
+            Changed = true;
+            NumRotateInstrFoldInPreEmit++;
+            if (ToErase)
+              InstrsToErase.push_back(ToErase);
+          }
         }
 
         // Eliminate conditional branch based on a constant CR bit by

diff  --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
new file mode 100644
index 000000000000..182929053c63
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
@@ -0,0 +1,163 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \
+# RUN:   ppc-pre-emit-peephole %s -o - | FileCheck %s
+
+---
+name: testFoldRLWINM
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINM
+    ; CHECK: liveins: $r3
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcFullMask1
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask1
+    ; CHECK: liveins: $r3
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 0, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcFullMask2
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask2
+    ; CHECK: liveins: $r2, $r3
+    ; CHECK: renamable $r3 = RLWINM $r2, 14, 10, 1, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r2, 27, 10, 9
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 1, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 30, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMUserWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMUserWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK: $r3 = RLWINM killed $r3, 10, 5, 31
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 10, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMResultWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMResultWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK: $r3 = RLWINM killed $r3, 10, 20, 10
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 10, 20, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMToZero
+    ; CHECK: liveins: $r3
+    ; CHECK: renamable $r3 = LI 0, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 5, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINM_recToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINM_recToZero
+    ; CHECK: liveins: $r3
+    ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM killed $r3, 27, 5, 10
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testFoldRLWINMInvalidMask
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMInvalidMask
+    ; CHECK: liveins: $r3
+    ; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31
+    ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 20, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINCanNotBeDeleted
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testFoldRLWINCanNotBeDeleted
+    ; CHECK: liveins: $r2, $r3
+    ; CHECK: $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def dead $cr0
+    ; CHECK: dead renamable $r3 = ANDI_rec $r2, 0, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def $cr0
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testCanNotFoldRLWINM
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testCanNotFoldRLWINM
+    ; CHECK: liveins: $r3
+    ; CHECK: $r3 = RLWINM_rec killed $r3, 27, 5, 10, implicit-def dead $cr0
+    ; CHECK: dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...

diff  --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
index 0aae50af2649..b40a84a7e95c 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
@@ -131,8 +131,7 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvtdivdp cr0, v2, v3
 ; CHECK-NEXT:    mfocrf r3, 128
-; CHECK-NEXT:    srwi r3, r3, 28
-; CHECK-NEXT:    rlwinm r3, r3, 28, 31, 31
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)


        


More information about the llvm-branch-commits mailing list