[llvm] [PowerPC] Extend folding RLWINM + RLWINM to post-RA. (PR #67293)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 25 01:10:38 PDT 2023


https://github.com/EsmeYi created https://github.com/llvm/llvm-project/pull/67293

This PR is migrated from Phabricator https://reviews.llvm.org/D89855.
We have patterns folding 2 RLWINMs in ppc-mi-peephole, while some RLWINM will be generated after RA, for example [rGc4690b007743](https://reviews.llvm.org/rGc4690b007743d2f564bc1156fdbdbcaad2adddcc). We expect the optimization will also be performed for the RLWINM generated after RA followed by another RLWINM.

>From 5b1711c59e85cd468c47bde546b40b8244b11ae2 Mon Sep 17 00:00:00 2001
From: esmeyi <esme.yi at ibm.com>
Date: Mon, 25 Sep 2023 03:45:11 -0400
Subject: [PATCH] [PowerPC] Extend folding RLWINM + RLWINM to post-RA.

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      | 134 ++++++++----
 llvm/lib/Target/PowerPC/PPCInstrInfo.h        |   3 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp     |   2 +-
 .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp |   9 +
 .../CodeGen/PowerPC/fold-rlwinm-after-ra.mir  | 194 ++++++++++++++++++
 llvm/test/CodeGen/PowerPC/vsx_builtins.ll     |   3 +-
 6 files changed, 299 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 75c6399fe7e16b1..011a91882412d86 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3759,18 +3759,78 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   return false;
 }
 
-bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
-                                 MachineInstr **ToErase) const {
+// Return true if SrcMI and MI are both 32-bit or both 64-bit instructions.
+static bool sameWidthMIs(MachineInstr *SrcMI, MachineInstr *MI, bool &Is64Bit) {
+  unsigned Opc = MI->getOpcode();
+  unsigned SrcOpc = SrcMI->getOpcode();
+  if ((SrcOpc == PPC::RLWINM8 || SrcOpc == PPC::RLWINM8_rec) &&
+      (Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec)) {
+    Is64Bit = true;
+    return true;
+  }
+  if ((SrcOpc == PPC::RLWINM || SrcOpc == PPC::RLWINM_rec) &&
+      (Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec))
+    return true;
+  return false;
+}
+
+// This function tries to combine two RLWINMs. We not only perform such
+// optimization in SSA, but also after RA, since some RLWINM is generated after
+// RA.
+bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI,
+                                              MachineInstr *&ToErase) const {
+  unsigned UseOpc = MI.getOpcode();
+  if (UseOpc != PPC::RLWINM && UseOpc != PPC::RLWINM_rec &&
+      UseOpc != PPC::RLWINM8 && UseOpc != PPC::RLWINM8_rec)
+    return false;
+
+  // Find the source MI.
   MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
   Register FoldingReg = MI.getOperand(1).getReg();
-  if (!FoldingReg.isVirtual())
+  MachineInstr *SrcMI = nullptr;
+  bool CanErase = false;
+  bool OtherIntermediateUse = true;
+  if (MRI->isSSA()) {
+    if (!FoldingReg.isVirtual())
+      return false;
+    SrcMI = MRI->getVRegDef(FoldingReg);
+  } else {
+    if (!Register::isPhysicalRegister(FoldingReg))
+      return false;
+    SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse);
+  }
+  if (!SrcMI)
     return false;
-  MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
-  if (SrcMI->getOpcode() != PPC::RLWINM &&
-      SrcMI->getOpcode() != PPC::RLWINM_rec &&
-      SrcMI->getOpcode() != PPC::RLWINM8 &&
-      SrcMI->getOpcode() != PPC::RLWINM8_rec)
+
+  // Check if MI and SrcMI are both 32-bit or both 64-bit instructions.
+  // TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before
+  // RA, but after RA. Even they are not in the same bit-width, we can do the
+  // foldings for RLWINM8(RLWINM)->RLWINM8, or RLWINM(RLWINM8)->RLWINM.
+  bool Is64Bit = false;
+  if (!sameWidthMIs(SrcMI, &MI, Is64Bit))
     return false;
+
+  // Check if the registers(def and use) meet the requirements for folding.
+  MachineOperand ForwardRegOp = SrcMI->getOperand(1);
+  Register ForwardReg = ForwardRegOp.getReg();
+  bool IsFwdFeederRegKilled = false;
+  bool SeenIntermediateUse = false;
+  bool IsMIUseRegKilled = MI.getOperand(1).isKill();
+  if (MRI->isSSA()) {
+    CanErase = !SrcMI->hasImplicitDef() && MRI->hasOneNonDBGUse(FoldingReg);
+  } else {
+    bool KillFwdDefMI = !OtherIntermediateUse && IsMIUseRegKilled;
+    CanErase = KillFwdDefMI && !SrcMI->hasImplicitDef();
+    // In post-RA, if SrcMI also defines the register to be forwarded, we can
+    // only do the folding if SrcMI is going to be erased.
+    if (!CanErase && SrcMI->definesRegister(ForwardReg))
+      return false;
+    // Check if the SrcReg can be forwarded to MI.
+    if (!isRegElgibleForForwarding(ForwardRegOp, *SrcMI, MI, KillFwdDefMI,
+                                   IsFwdFeederRegKilled, SeenIntermediateUse))
+      return false;
+  }
+
   assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
           MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
           SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
@@ -3781,7 +3841,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
   uint64_t MBMI = MI.getOperand(3).getImm();
   uint64_t MESrc = SrcMI->getOperand(4).getImm();
   uint64_t MEMI = MI.getOperand(4).getImm();
-
   assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
          "Invalid PPC::RLWINM Instruction!");
   // If MBMI is bigger than MEMI, we always can not get run of ones.
@@ -3804,7 +3863,8 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
   // MaskMI:         -----------|--E  B------
   // Result:         -----------|---  -------  (Good candidate)
 
-  // Mark special case.
+  // Mark the special cases of all bits in a 64-bit register or the low 32 bits
+  // in a 64-bit register.
   bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
 
   // For other MBMI > MEMI cases, just return.
@@ -3814,8 +3874,8 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
   // Handle MBMI <= MEMI cases.
   APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
   // In MI, we only need low 32 bits of SrcMI, just consider about low 32
-  // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
-  // while in PowerPC ISA, lowerest bit is at index 63.
+  // bit of SrcMI mask. Note that in APInt, the least significant bit is at
+  // index 0, while in PowerPC ISA, the least significant bit is at index 63.
   APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
 
   APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
@@ -3823,29 +3883,23 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
   uint32_t NewMB, NewME;
   bool Simplified = false;
 
-  // If final mask is 0, MI result should be 0 too.
+  // If final mask is 0, replace MI with LI/LI8 0 or ANDI_rec/ANDI8_rec 0.
   if (FinalMask.isZero()) {
-    bool Is64Bit =
-        (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
     Simplified = true;
     LLVM_DEBUG(dbgs() << "Replace Instr: ");
     LLVM_DEBUG(MI.dump());
 
-    if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
-      // Replace MI with "LI 0"
-      MI.removeOperand(4);
-      MI.removeOperand(3);
-      MI.removeOperand(2);
-      MI.getOperand(1).ChangeToImmediate(0);
-      MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
-    } else {
-      // Replace MI with "ANDI_rec reg, 0"
-      MI.removeOperand(4);
-      MI.removeOperand(3);
-      MI.getOperand(2).setImm(0);
-      MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
-      MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
-      if (SrcMI->getOperand(1).isKill()) {
+    LoadImmediateInfo LII;
+    LII.Imm = 0;
+    LII.Is64Bit = Is64Bit;
+    LII.SetCR = (UseOpc == PPC::RLWINM_rec || UseOpc == PPC::RLWINM8_rec);
+    replaceInstrWithLI(MI, LII);
+    if (LII.SetCR) {
+      MI.getOperand(1).setReg(ForwardReg);
+      // FIXME: If the register used by MI is `killed` before change, we need
+      // update the kill flag on the previous use of that register. Here we only
+      // considered the kill flag of the register used by SrcMI.
+      if (ForwardRegOp.isKill()) {
         MI.getOperand(1).setIsKill(true);
         SrcMI->getOperand(1).setIsKill(false);
       } else
@@ -3855,7 +3909,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
 
     LLVM_DEBUG(dbgs() << "With: ");
     LLVM_DEBUG(MI.dump());
-
   } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
               NewMB <= NewME) ||
              SrcMaskFull) {
@@ -3866,15 +3919,14 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
     LLVM_DEBUG(dbgs() << "Converting Instr: ");
     LLVM_DEBUG(MI.dump());
 
-    uint16_t NewSH = (SHSrc + SHMI) % 32;
-    MI.getOperand(2).setImm(NewSH);
-    // If SrcMI mask is full, no need to update MBMI and MEMI.
+    MI.getOperand(2).setImm((SHSrc + SHMI) % 32);
+    // If SrcMI mask is full, do not update MBMI and MEMI.
     if (!SrcMaskFull) {
       MI.getOperand(3).setImm(NewMB);
       MI.getOperand(4).setImm(NewME);
     }
-    MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
-    if (SrcMI->getOperand(1).isKill()) {
+    MI.getOperand(1).setReg(ForwardReg);
+    if (ForwardRegOp.isKill()) {
       MI.getOperand(1).setIsKill(true);
       SrcMI->getOperand(1).setIsKill(false);
     } else
@@ -3884,12 +3936,10 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
     LLVM_DEBUG(dbgs() << "To: ");
     LLVM_DEBUG(MI.dump());
   }
-  if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
-      !SrcMI->hasImplicitDef()) {
-    // If FoldingReg has no non-debug use and it has no implicit def (it
-    // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
-    // Otherwise keep it.
-    *ToErase = SrcMI;
+  if (Simplified && CanErase) {
+    // If SrcMI has no implicit def, and FoldingReg has no non-debug use or
+    // its flag is "killed", it's safe to delete SrcMI. Otherwise keep it.
+    ToErase = SrcMI;
     LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
     LLVM_DEBUG(SrcMI->dump());
   }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 1f59e994d9cb1ad..450db1e5a1807d8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -589,7 +589,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                               SmallSet<Register, 4> &RegsToUpdate,
                               MachineInstr **KilledDef = nullptr) const;
   bool foldFrameOffset(MachineInstr &MI) const;
-  bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
+  bool simplifyRotateAndMaskInstr(MachineInstr &MI,
+                                  MachineInstr *&ToErase) const;
   bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
   bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
   bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 9ad319d334354a8..a2ae4c1100b4c86 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1206,7 +1206,7 @@ bool PPCMIPeephole::simplifyCode() {
         Register OrigOp1Reg = MI.getOperand(1).isReg()
                                   ? MI.getOperand(1).getReg()
                                   : PPC::NoRegister;
-        Simplified = TII->combineRLWINM(MI, &ToErase);
+        Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase);
         if (Simplified) {
           addRegToUpdate(OrigOp1Reg);
           if (MI.getOperand(1).isReg())
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 647f8a7475813bd..bf25125817ce72f 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -40,6 +40,8 @@ STATISTIC(NumFrameOffFoldInPreEmit,
           "Number of folding frame offset by using r+r in pre-emit peephole");
 STATISTIC(NumCmpsInPreEmit,
           "Number of compares eliminated in pre-emit peephole");
+STATISTIC(NumRotateInstrFoldInPreEmit,
+          "Number of folding Rotate instructions in pre-emit peephole");
 
 static cl::opt<bool>
 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
@@ -518,6 +520,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
             LLVM_DEBUG(MI.dump());
             InstrsToErase.push_back(&MI);
           }
+          MachineInstr *ToErase = nullptr;
+          if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) {
+            Changed = true;
+            NumRotateInstrFoldInPreEmit++;
+            if (ToErase)
+              InstrsToErase.push_back(ToErase);
+          }
         }
 
         // Eliminate conditional branch based on a constant CR bit by
diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
new file mode 100644
index 000000000000000..ecbd51c24f2ecb8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
@@ -0,0 +1,194 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \
+# RUN:   ppc-pre-emit-peephole %s -o - | FileCheck %s
+
+---
+name: testFoldRLWINM
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINM
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcFullMask1
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask1
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 0, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcFullMask2
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask2
+    ; CHECK: liveins: $r2, $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $r3 = RLWINM $r2, 14, 10, 1, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r2, 27, 10, 9
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 1, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMSrcWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMSrcWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 30, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMUserWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMUserWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r3 = RLWINM killed $r3, 10, 5, 31
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 10, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMResultWrapped
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMResultWrapped
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r3 = RLWINM killed $r3, 10, 20, 10
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 10, 20, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINMToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMToZero
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $r3 = LI 0
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 27, 5, 10
+    dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINM_recToZero
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINM_recToZero
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM killed $r3, 27, 5, 10
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testFoldRLWINMInvalidMask
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testFoldRLWINMInvalidMask
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r3 = RLWINM killed $r3, 20, 5, 31
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+    $r3 = RLWINM killed $r3, 20, 5, 31
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...
+---
+name: testFoldRLWINCanNotBeDeleted
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testFoldRLWINCanNotBeDeleted
+    ; CHECK: liveins: $r2, $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def dead $cr0
+    ; CHECK-NEXT: dead renamable $r3 = ANDI_rec $r2, 0, implicit-def $cr0
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def $cr0
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testCanNotFoldRLWINM
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r3
+    ; CHECK-LABEL: name: testCanNotFoldRLWINM
+    ; CHECK: liveins: $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r3 = RLWINM_rec killed $r3, 27, 5, 10, implicit-def dead $cr0
+    ; CHECK-NEXT: dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+    $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0
+    dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
+    BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
+...
+---
+name: testCanNotFoldRLWINM2
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $r2, $r3
+    ; CHECK-LABEL: name: testCanNotFoldRLWINM2
+    ; CHECK: liveins: $r2, $r3, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: STD $x2, -8, $x1 :: (store (s64) into %stack.0)
+    ; CHECK-NEXT: $r3 = RLWINM killed $r2, 4, 28, 31
+    ; CHECK-NEXT: $r2 = LI 0, implicit-def $x2
+    ; CHECK-NEXT: $x2 = LD -8, $x1 :: (load (s64) from %stack.0)
+    ; CHECK-NEXT: renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x2, implicit killed $x3
+    $r3 = RLWINM killed $r2, 4, 28, 31
+    $r2 = LI 0, implicit-def $x2
+    dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x2, implicit killed $x3
+...
diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
index 694981b67a6c079..accad1a67e3a5ba 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
@@ -160,8 +160,7 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvtdivdp cr0, v2, v3
 ; CHECK-NEXT:    mfocrf r3, 128
-; CHECK-NEXT:    srwi r3, r3, 28
-; CHECK-NEXT:    rlwinm r3, r3, 28, 31, 31
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)



More information about the llvm-commits mailing list