[llvm] d1115c2 - [PowerPC] Optimize compare by using record form in post-RA.

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 30 22:35:20 PDT 2022


Author: esmeyi
Date: 2022-10-31T01:33:50-04:00
New Revision: d1115c2b84d42eefd1546463507ce84f45c6b7cd

URL: https://github.com/llvm/llvm-project/commit/d1115c2b84d42eefd1546463507ce84f45c6b7cd
DIFF: https://github.com/llvm/llvm-project/commit/d1115c2b84d42eefd1546463507ce84f45c6b7cd.diff

LOG: [PowerPC] Optimize compare by using record form in post-RA.

Summary: We currently optimize the comparison only in SSA, therefore we will miss some optimization opportunities where the input of comparison is lowered from COPY in post-RA.
Ie. ExpandPostRA::LowerCopy is called after PPCInstrInfo::optimizeCompareInstr.
This patch optimizes the comparison in post-RA and only the cases that compare against zero can be handled.
D131374 converts the comparison and its user to a compare against zero with the appropriate predicate on the branch, which creates additional opportunities for this patch.

Reviewed By: shchenz, lkail

Differential Revision: https://reviews.llvm.org/D131873

Added: 
    llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 42685e7c489aa..965bdaaa8ecc5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2768,6 +2768,85 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   return true;
 }
 
+bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
+  MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
+  if (MRI->isSSA())
+    return false;
+
+  Register SrcReg, SrcReg2;
+  int64_t CmpMask, CmpValue;
+  if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
+    return false;
+
+  // Try to optimize the comparison against 0.
+  if (CmpValue || !CmpMask || SrcReg2)
+    return false;
+
+  // The record forms set the condition register based on a signed comparison
+  // with zero (see comments in optimizeCompareInstr). Since we can't do the
+  // equality checks in post-RA, we are more restricted on a unsigned
+  // comparison.
+  unsigned Opc = CmpMI.getOpcode();
+  if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
+    return false;
+
+  // The record forms are always based on a 64-bit comparison on PPC64
+  // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
+  // comparison. Since we can't do the equality checks in post-RA, we bail out
+  // the case.
+  if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
+    return false;
+
+  // CmpMI can't be deleted if it has implicit def.
+  if (CmpMI.hasImplicitDef())
+    return false;
+
+  bool SrcRegHasOtherUse = false;
+  MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
+  if (!SrcMI || !SrcMI->definesRegister(SrcReg))
+    return false;
+
+  MachineOperand RegMO = CmpMI.getOperand(0);
+  Register CRReg = RegMO.getReg();
+  if (CRReg != PPC::CR0)
+    return false;
+
+  // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
+  bool SeenUseOfCRReg = false;
+  bool IsCRRegKilled = false;
+  if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
+                                 SeenUseOfCRReg) ||
+      SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
+    return false;
+
+  int SrcMIOpc = SrcMI->getOpcode();
+  int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
+  if (NewOpC == -1)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Replace Instr: ");
+  LLVM_DEBUG(SrcMI->dump());
+
+  const MCInstrDesc &NewDesc = get(NewOpC);
+  SrcMI->setDesc(NewDesc);
+  MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
+      .addReg(CRReg, RegState::ImplicitDefine);
+  SrcMI->clearRegisterDeads(CRReg);
+
+  // Fix up killed/dead flag for SrcReg after transformation.
+  if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
+    fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
+
+  assert(SrcMI->definesRegister(PPC::CR0) &&
+         "Record-form instruction does not define cr0?");
+
+  LLVM_DEBUG(dbgs() << "with: ");
+  LLVM_DEBUG(SrcMI->dump());
+  LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+  LLVM_DEBUG(CmpMI.dump());
+  return true;
+}
+
 bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
     int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
 bool PPCInstrInfo::isRegElgibleForForwarding(
     const MachineOperand &RegMO, const MachineInstr &DefMI,
     const MachineInstr &MI, bool KillDefMI,
-    bool &IsFwdFeederRegKilled) const {
+    bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
   // x = addi y, imm
   // ...
   // z = lfdx 0, x   -> z = lfd imm(y)
@@ -4449,6 +4528,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
       return false;
     else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
       IsFwdFeederRegKilled = true;
+    if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+      SeenIntermediateUse = true;
     // Made it to DefMI without encountering a clobber.
     if ((&*It) == &DefMI)
       break;
@@ -4888,9 +4969,10 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
     return false;
 
   bool IsFwdFeederRegKilled = false;
+  bool SeenIntermediateUse = false;
   // Check if the RegMO can be forwarded to MI.
   if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
-                                 IsFwdFeederRegKilled))
+                                 IsFwdFeederRegKilled, SeenIntermediateUse))
     return false;
 
   // Get killed info in case fixup needed after transformation.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 26e9d0e52d573..4c720e251f15c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -251,7 +251,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   bool isRegElgibleForForwarding(const MachineOperand &RegMO,
                                  const MachineInstr &DefMI,
                                  const MachineInstr &MI, bool KillDefMI,
-                                 bool &IsFwdFeederRegKilled) const;
+                                 bool &IsFwdFeederRegKilled,
+                                 bool &SeenIntermediateUse) const;
   unsigned getSpillTarget() const;
   const unsigned *getStoreOpcodesForSpillArray() const;
   const unsigned *getLoadOpcodesForSpillArray() const;
@@ -644,6 +645,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                                     int64_t &Offset, unsigned &Width,
                                     const TargetRegisterInfo *TRI) const;
 
+  bool optimizeCmpPostRA(MachineInstr &MI) const;
+
   /// Get the base operand and byte offset of an instruction that reads/writes
   /// memory.
   bool getMemOperandsWithOffsetWidth(

diff  --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index cd6169ad52df9..6f1b34843343a 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -38,6 +38,8 @@ STATISTIC(NumberOfSelfCopies,
           "Number of self copy instructions eliminated");
 STATISTIC(NumFrameOffFoldInPreEmit,
           "Number of folding frame offset by using r+r in pre-emit peephole");
+STATISTIC(NumCmpsInPreEmit,
+          "Number of compares eliminated in pre-emit peephole");
 
 static cl::opt<bool>
 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
@@ -508,6 +510,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
             LLVM_DEBUG(MI.dump());
           }
+          if (TII->optimizeCmpPostRA(MI)) {
+            Changed = true;
+            NumCmpsInPreEmit++;
+            LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
+            LLVM_DEBUG(MI.dump());
+            InstrsToErase.push_back(&MI);
+          }
         }
 
         // Eliminate conditional branch based on a constant CR bit by

diff  --git a/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
new file mode 100644
index 0000000000000..1c00df2d26cfa
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
@@ -0,0 +1,142 @@
+# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s
+
+---
+name: test1
+# The cmp instr is optimized with the record form.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4
+    renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+    renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3
+    ; CHECK-LABEL: name: test1
+    ; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0
+    ; CHECK-NOT: CMPDI
+    BCC 68, killed renamable $cr0, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test2
+# The imm of the comparison instr isn't 0.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4
+    renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+    renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3
+    ; CHECK-LABEL: name: test2
+    ; CHECK: CMPDI
+    BCC 68, killed renamable $cr0, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test3
+# The comparison instr has a implicit def.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4
+    renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+    renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3
+    ; CHECK-LABEL: name: test3
+    ; CHECK: CMPDI
+    BCC 68, killed renamable $cr0, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test4
+# There is another use for cr0 between OR8 instr and CMPWI instr.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4, $cr0
+    renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+    renamable $cr1 = MCRF killed $cr0, implicit $x3
+    renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
+    ; CHECK-LABEL: name: test4
+    ; CHECK: CMPDI
+    BCC 68, killed renamable $cr0, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test5
+# There is another def for cr0 between OR8 instr and CMPWI instr.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4
+    renamable $x3 = OR8 killed renamable $x3, renamable $x4
+    renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0
+    renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
+    ; CHECK-LABEL: name: test5
+    ; CHECK: CMPDI
+    BCC 68, killed renamable $cr0, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test6
+# The SrcReg isn't CR0.
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.2(0x50000000)
+    liveins: $x3, $x4
+    renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+    renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3
+    ; CHECK-LABEL: name: test6
+    ; CHECK: CMPDI
+    BCC 68, killed renamable $cr1, %bb.2
+
+  bb.1:
+    $x3 = LI8 102
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+  bb.2:
+    $x3 = LI8 116
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+...

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index f7f2f4b1c1867..bf3020732ee9b 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -2946,10 +2946,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P10-O0-NEXT:    std r0, 16(r1)
 ; LE-P10-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P10-O0-NEXT:    stdu r1, -64(r1)
-; LE-P10-O0-NEXT:    mr r4, r3
+; LE-P10-O0-NEXT:    mr. r4, r3
 ; LE-P10-O0-NEXT:    std r4, 40(r1) # 8-byte Folded Spill
 ; LE-P10-O0-NEXT:    li r3, 0
-; LE-P10-O0-NEXT:    cmpdi r4, 0
 ; LE-P10-O0-NEXT:    stw r3, 48(r1) # 4-byte Folded Spill
 ; LE-P10-O0-NEXT:    beq cr0, .LBB2_2
 ; LE-P10-O0-NEXT:  # %bb.1: # %if.end
@@ -2979,10 +2978,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-O0-NEXT:    std r0, 16(r1)
 ; LE-P9-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P9-O0-NEXT:    stdu r1, -128(r1)
-; LE-P9-O0-NEXT:    mr r4, r3
+; LE-P9-O0-NEXT:    mr. r4, r3
 ; LE-P9-O0-NEXT:    std r4, 104(r1) # 8-byte Folded Spill
 ; LE-P9-O0-NEXT:    li r3, 0
-; LE-P9-O0-NEXT:    cmpdi r4, 0
 ; LE-P9-O0-NEXT:    stw r3, 112(r1) # 4-byte Folded Spill
 ; LE-P9-O0-NEXT:    beq cr0, .LBB2_2
 ; LE-P9-O0-NEXT:  # %bb.1: # %if.end
@@ -3012,10 +3010,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-O0-NEXT:    std r0, 16(r1)
 ; LE-P8-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P8-O0-NEXT:    stdu r1, -128(r1)
-; LE-P8-O0-NEXT:    mr r4, r3
+; LE-P8-O0-NEXT:    mr. r4, r3
 ; LE-P8-O0-NEXT:    std r4, 104(r1) # 8-byte Folded Spill
 ; LE-P8-O0-NEXT:    li r3, 0
-; LE-P8-O0-NEXT:    cmpdi r4, 0
 ; LE-P8-O0-NEXT:    stw r3, 112(r1) # 4-byte Folded Spill
 ; LE-P8-O0-NEXT:    beq cr0, .LBB2_2
 ; LE-P8-O0-NEXT:  # %bb.1: # %if.end


        


More information about the llvm-commits mailing list