[llvm] f0ba1ae - [PowerPC] folding rlwinm + rlwinm to rlwinm

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 18:51:30 PST 2019


Author: czhengsz
Date: 2019-12-03T21:51:19-05:00
New Revision: f0ba1aec35d599353b6c5eca8286791b1c410b7c

URL: https://github.com/llvm/llvm-project/commit/f0ba1aec35d599353b6c5eca8286791b1c410b7c
DIFF: https://github.com/llvm/llvm-project/commit/f0ba1aec35d599353b6c5eca8286791b1c410b7c.diff

LOG: [PowerPC] folding rlwinm + rlwinm to rlwinm

  For example:
    x3 = rlwinm x3, 27, 5, 31
    x3 = rlwinm x3, 19, 0, 12
  can be combined to
    x3 = rlwinm x3, 14, 0, 12

Reviewed by: steven.zhang, lkail

Differential Revision: https://reviews.llvm.org/D70374

Added: 
    llvm/test/CodeGen/PowerPC/fold-rlwinm.mir

Modified: 
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 6aa2fdcbec82..7eeff007b78f 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -18,6 +18,7 @@
 //
 //===---------------------------------------------------------------------===//
 
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
@@ -806,6 +807,143 @@ bool PPCMIPeephole::simplifyCode(void) {
                       combineSEXTAndSHL(MI, ToErase);
         break;
       }
+      case PPC::RLWINM:
+      case PPC::RLWINMo:
+      case PPC::RLWINM8:
+      case PPC::RLWINM8o: {
+        unsigned FoldingReg = MI.getOperand(1).getReg();
+        if (!Register::isVirtualRegister(FoldingReg))
+          break;
+
+        MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+        if (SrcMI->getOpcode() != PPC::RLWINM &&
+            SrcMI->getOpcode() != PPC::RLWINMo &&
+            SrcMI->getOpcode() != PPC::RLWINM8 &&
+            SrcMI->getOpcode() != PPC::RLWINM8o)
+          break;
+        assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+                MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+                SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+               "Invalid PPC::RLWINM Instruction!");
+        uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+        uint64_t SHMI = MI.getOperand(2).getImm();
+        uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+        uint64_t MBMI = MI.getOperand(3).getImm();
+        uint64_t MESrc = SrcMI->getOperand(4).getImm();
+        uint64_t MEMI = MI.getOperand(4).getImm();
+
+        assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+               "Invalid PPC::RLWINM Instruction!");
+
+        // If MBMI is bigger than MEMI, we always can not get run of ones.
+        // RotatedSrcMask non-wrap:
+        //                 0........31|32........63
+        // RotatedSrcMask:   B---E        B---E
+        // MaskMI:         -----------|--E  B------
+        // Result:           -----          ---      (Bad candidate)
+        //
+        // RotatedSrcMask wrap:
+        //                 0........31|32........63
+        // RotatedSrcMask: --E   B----|--E    B----
+        // MaskMI:         -----------|--E  B------
+        // Result:         ---   -----|---    -----  (Bad candidate)
+        //
+        // One special case is RotatedSrcMask is a full set mask.
+        // RotatedSrcMask full:
+        //                 0........31|32........63
+        // RotatedSrcMask: ------EB---|-------EB---
+        // MaskMI:         -----------|--E  B------
+        // Result:         -----------|---  -------  (Good candidate)
+
+        // Mark special case.
+        bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+        // For other MBMI > MEMI cases, just return.
+        if ((MBMI > MEMI) && !SrcMaskFull)
+          break;
+
+        // Handle MBMI <= MEMI cases.
+        APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+        // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+        // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+        // while in PowerPC ISA, lowerest bit is at index 63.
+        APInt MaskSrc =
+            APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+        // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
+        // equal to highBit.
+        // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
+        if (SrcMaskFull && (MBSrc - MESrc == 1))
+          MaskSrc.setAllBits();
+
+        APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+        APInt FinalMask = RotatedSrcMask & MaskMI;
+        uint32_t NewMB, NewME;
+
+        // If final mask is 0, MI result should be 0 too.
+        if (FinalMask.isNullValue()) {
+          bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
+                          MI.getOpcode() == PPC::RLWINM8o);
+
+          LLVM_DEBUG(dbgs() << "Replace Instr: ");
+          LLVM_DEBUG(MI.dump());
+
+          if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+            // Replace MI with "LI 0"
+            MI.RemoveOperand(4);
+            MI.RemoveOperand(3);
+            MI.RemoveOperand(2);
+            MI.getOperand(1).ChangeToImmediate(0);
+            MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
+          } else {
+            // Replace MI with "ANDIo reg, 0"
+            MI.RemoveOperand(4);
+            MI.RemoveOperand(3);
+            MI.getOperand(2).setImm(0);
+            MI.setDesc(TII->get(Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+          }
+          Simplified = true;
+          NumRotatesCollapsed++;
+
+          LLVM_DEBUG(dbgs() << "With: ");
+          LLVM_DEBUG(MI.dump());
+        } else if (isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+                               NewME) || SrcMaskFull) {
+          // If FoldingReg has only one use and it it not RLWINMo and
+          // RLWINM8o, safe to delete its def SrcMI. Otherwise keep it.
+          if (MRI->hasOneNonDBGUse(FoldingReg) &&
+              (SrcMI->getOpcode() == PPC::RLWINM ||
+               SrcMI->getOpcode() == PPC::RLWINM8)) {
+            ToErase = SrcMI;
+            LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+            LLVM_DEBUG(SrcMI->dump());
+          }
+
+          LLVM_DEBUG(dbgs() << "Converting Instr: ");
+          LLVM_DEBUG(MI.dump());
+
+          uint16_t NewSH = (SHSrc + SHMI) % 32;
+          MI.getOperand(2).setImm(NewSH);
+          // If SrcMI mask is full, no need to update MBMI and MEMI.
+          if (!SrcMaskFull) {
+            MI.getOperand(3).setImm(NewMB);
+            MI.getOperand(4).setImm(NewME);
+          }
+          MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+          if (SrcMI->getOperand(1).isKill()) {
+            MI.getOperand(1).setIsKill(true);
+            SrcMI->getOperand(1).setIsKill(false);
+          } else
+            // About to replace MI.getOperand(1), clear its kill flag.
+            MI.getOperand(1).setIsKill(false);
+
+          Simplified = true;
+          NumRotatesCollapsed++;
+
+          LLVM_DEBUG(dbgs() << "To: ");
+          LLVM_DEBUG(MI.dump());
+        }
+        break;
+      }
       }
     }
 

diff  --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
index b0586b06cd1f..12887d892259 100644
--- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll
@@ -11,8 +11,7 @@ define void @foo(i32 signext %var1) {
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    addis r4, r2, res at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
-; CHECK-NEXT:    slwi r3, r3, 19
+; CHECK-NEXT:    rlwinm r3, r3, 14, 0, 12
 ; CHECK-NEXT:    stw r3, res at toc@l(r4)
 ; CHECK-NEXT:    blr
 entry:
@@ -30,10 +29,10 @@ define void @foo_multiple_use(i32 signext %var1) {
 ; CHECK-NEXT:    addis r4, r2, res2 at toc@ha
 ; CHECK-NEXT:    addis r6, r2, res at toc@ha
 ; CHECK-NEXT:    cntlzw r3, r3
-; CHECK-NEXT:    srwi r3, r3, 5
-; CHECK-NEXT:    slwi r5, r3, 19
-; CHECK-NEXT:    stw r3, res2 at toc@l(r4)
-; CHECK-NEXT:    stw r5, res at toc@l(r6)
+; CHECK-NEXT:    srwi r5, r3, 5
+; CHECK-NEXT:    rlwinm r3, r3, 14, 0, 12
+; CHECK-NEXT:    stw r5, res2 at toc@l(r4)
+; CHECK-NEXT:    stw r3, res at toc@l(r6)
 ; CHECK-NEXT:    blr
 entry:
   %cmp = icmp eq i32 %var1, 1

diff  --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir
new file mode 100644
index 000000000000..426aaa7a7631
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir
@@ -0,0 +1,140 @@
+# RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
+# RUN:   -run-pass ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s
+
+---
+name: testFoldRLWINM
+#CHECK : name : testFoldRLWINM
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 5, 31
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 31
+    %3:gprc = RLWINM %2:gprc, 19, 0, 12
+    ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMSrcFullMask1
+#CHECK : name : testFoldRLWINMSrcFullMask1
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 0, 31
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 0, 31
+    %3:gprc = RLWINM %2:gprc, 19, 0, 12
+    ; CHECK: %3:gprc = RLWINM %1, 14, 0, 12
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMSrcFullMask2
+#CHECK : name : testFoldRLWINMSrcFullMask2
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 10, 9 
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 10, 9
+    %3:gprc = RLWINM %2:gprc, 19, 10, 1
+    ; CHECK: %3:gprc = RLWINM %1, 14, 10, 1
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMSrcWrapped
+#CHECK : name : testFoldRLWINMSrcWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 30, 10
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 30 ,10 
+    %3:gprc = RLWINM %2:gprc, 19, 0, 12
+    ; CHECK: %3:gprc = RLWINM %1, 14, 11, 12
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMUserWrapped
+#CHECK : name : testFoldRLWINMUserWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 10, 5, 31
+    ; CHECKT: %2:gprc = RLWINM %1:gprc, 10, 5, 31
+    %3:gprc = RLWINM %2:gprc, 10, 30, 5
+    ; CHECK: %3:gprc = RLWINM %2, 10, 30, 5
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMMultipleUses
+#CHECK : name : testFoldRLWINMMultipleUses
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM killed %1:gprc, 27, 5, 31
+    ; CHECK: %2:gprc = RLWINM %1, 27, 5, 31
+    %3:gprc = RLWINM %2:gprc, 19, 0, 12
+    ; CHECK: %3:gprc = RLWINM killed %1, 14, 0, 12
+    STW %3:gprc, %2:gprc, 100 
+    ; CHECK: STW %3, %2, 100
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMToZero
+#CHECK : name : testFoldRLWINMToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 5, 10
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10
+    %3:gprc = RLWINM %2:gprc, 8, 5, 10
+    ; CHECK: %3:gprc = LI 0
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMoToZero
+#CHECK : name : testFoldRLWINMoToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 27, 5, 10
+    ; CHECK-NOT: %2:gprc = RLWINM %1:gprc, 27, 5, 10
+    %3:gprc = RLWINMo %2:gprc, 8, 5, 10, implicit-def $cr0
+    ; CHECK: %3:gprc = ANDIo %2, 0, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm
+...
+---
+name: testFoldRLWINMInvalidMask
+#CHECK : name : testFoldRLWINMInvalidMask
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x3
+    %0:g8rc = COPY $x3
+    %1:gprc = COPY %0.sub_32:g8rc
+    %2:gprc = RLWINM %1:gprc, 20, 5, 31
+    ; CHECK: %2:gprc = RLWINM %1, 20, 5, 31
+    %3:gprc = RLWINM %2:gprc, 19, 10, 20
+    ; CHECK: %3:gprc = RLWINM %2, 19, 10, 20
+    BLR8 implicit $lr8, implicit $rm
+...


        


More information about the llvm-commits mailing list