[llvm] d1115c2 - [PowerPC] Optimize compare by using record form in post-RA.
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 30 22:35:20 PDT 2022
Author: esmeyi
Date: 2022-10-31T01:33:50-04:00
New Revision: d1115c2b84d42eefd1546463507ce84f45c6b7cd
URL: https://github.com/llvm/llvm-project/commit/d1115c2b84d42eefd1546463507ce84f45c6b7cd
DIFF: https://github.com/llvm/llvm-project/commit/d1115c2b84d42eefd1546463507ce84f45c6b7cd.diff
LOG: [PowerPC] Optimize compare by using record form in post-RA.
Summary: We currently optimize the comparison only in SSA, therefore we will miss some optimization opportunities where the input of comparison is lowered from COPY in post-RA.
Ie. ExpandPostRA::LowerCopy is called after PPCInstrInfo::optimizeCompareInstr.
This patch optimizes the comparison in post-RA and only the cases that compare against zero can be handled.
D131374 converts the comparison and its user to a compare against zero with the appropriate predicate on the branch, which creates additional opportunities for this patch.
Reviewed By: shchenz, lkail
Differential Revision: https://reviews.llvm.org/D131873
Added:
llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
Modified:
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 42685e7c489aa..965bdaaa8ecc5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2768,6 +2768,85 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return true;
}
+bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
+ MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
+ if (MRI->isSSA())
+ return false;
+
+ Register SrcReg, SrcReg2;
+ int64_t CmpMask, CmpValue;
+ if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
+ return false;
+
+ // Try to optimize the comparison against 0.
+ if (CmpValue || !CmpMask || SrcReg2)
+ return false;
+
+ // The record forms set the condition register based on a signed comparison
+ // with zero (see comments in optimizeCompareInstr). Since we can't do the
+ // equality checks in post-RA, we are more restricted on a unsigned
+ // comparison.
+ unsigned Opc = CmpMI.getOpcode();
+ if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
+ return false;
+
+ // The record forms are always based on a 64-bit comparison on PPC64
+ // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
+ // comparison. Since we can't do the equality checks in post-RA, we bail out
+ // the case.
+ if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
+ return false;
+
+ // CmpMI can't be deleted if it has implicit def.
+ if (CmpMI.hasImplicitDef())
+ return false;
+
+ bool SrcRegHasOtherUse = false;
+ MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
+ if (!SrcMI || !SrcMI->definesRegister(SrcReg))
+ return false;
+
+ MachineOperand RegMO = CmpMI.getOperand(0);
+ Register CRReg = RegMO.getReg();
+ if (CRReg != PPC::CR0)
+ return false;
+
+ // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
+ bool SeenUseOfCRReg = false;
+ bool IsCRRegKilled = false;
+ if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
+ SeenUseOfCRReg) ||
+ SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
+ return false;
+
+ int SrcMIOpc = SrcMI->getOpcode();
+ int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
+ if (NewOpC == -1)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(SrcMI->dump());
+
+ const MCInstrDesc &NewDesc = get(NewOpC);
+ SrcMI->setDesc(NewDesc);
+ MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
+ .addReg(CRReg, RegState::ImplicitDefine);
+ SrcMI->clearRegisterDeads(CRReg);
+
+ // Fix up killed/dead flag for SrcReg after transformation.
+ if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
+ fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
+
+ assert(SrcMI->definesRegister(PPC::CR0) &&
+ "Record-form instruction does not define cr0?");
+
+ LLVM_DEBUG(dbgs() << "with: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(CmpMI.dump());
+ return true;
+}
+
bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
bool PPCInstrInfo::isRegElgibleForForwarding(
const MachineOperand &RegMO, const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
- bool &IsFwdFeederRegKilled) const {
+ bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
// x = addi y, imm
// ...
// z = lfdx 0, x -> z = lfd imm(y)
@@ -4449,6 +4528,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
return false;
else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
IsFwdFeederRegKilled = true;
+ if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ SeenIntermediateUse = true;
// Made it to DefMI without encountering a clobber.
if ((&*It) == &DefMI)
break;
@@ -4888,9 +4969,10 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
return false;
bool IsFwdFeederRegKilled = false;
+ bool SeenIntermediateUse = false;
// Check if the RegMO can be forwarded to MI.
if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
- IsFwdFeederRegKilled))
+ IsFwdFeederRegKilled, SeenIntermediateUse))
return false;
// Get killed info in case fixup needed after transformation.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 26e9d0e52d573..4c720e251f15c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -251,7 +251,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
- bool &IsFwdFeederRegKilled) const;
+ bool &IsFwdFeederRegKilled,
+ bool &SeenIntermediateUse) const;
unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
@@ -644,6 +645,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ bool optimizeCmpPostRA(MachineInstr &MI) const;
+
/// Get the base operand and byte offset of an instruction that reads/writes
/// memory.
bool getMemOperandsWithOffsetWidth(
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index cd6169ad52df9..6f1b34843343a 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -38,6 +38,8 @@ STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
+STATISTIC(NumCmpsInPreEmit,
+ "Number of compares eliminated in pre-emit peephole");
static cl::opt<bool>
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
@@ -508,6 +510,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
LLVM_DEBUG(MI.dump());
}
+ if (TII->optimizeCmpPostRA(MI)) {
+ Changed = true;
+ NumCmpsInPreEmit++;
+ LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
+ LLVM_DEBUG(MI.dump());
+ InstrsToErase.push_back(&MI);
+ }
}
// Eliminate conditional branch based on a constant CR bit by
diff --git a/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
new file mode 100644
index 0000000000000..1c00df2d26cfa
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
@@ -0,0 +1,142 @@
+# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s
+
+---
+name: test1
+# The cmp instr is optimized with the record form.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4
+ renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+ renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3
+ ; CHECK-LABEL: name: test1
+ ; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0
+ ; CHECK-NOT: CMPDI
+ BCC 68, killed renamable $cr0, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test2
+# The imm of the comparison instr isn't 0.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4
+ renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+ renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3
+ ; CHECK-LABEL: name: test2
+ ; CHECK: CMPDI
+ BCC 68, killed renamable $cr0, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test3
+# The comparison instr has a implicit def.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4
+ renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+ renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3
+ ; CHECK-LABEL: name: test3
+ ; CHECK: CMPDI
+ BCC 68, killed renamable $cr0, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test4
+# There is another use for cr0 between OR8 instr and CMPWI instr.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4, $cr0
+ renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+ renamable $cr1 = MCRF killed $cr0, implicit $x3
+ renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
+ ; CHECK-LABEL: name: test4
+ ; CHECK: CMPDI
+ BCC 68, killed renamable $cr0, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test5
+# There is another def for cr0 between OR8 instr and CMPWI instr.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4
+ renamable $x3 = OR8 killed renamable $x3, renamable $x4
+ renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0
+ renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
+ ; CHECK-LABEL: name: test5
+ ; CHECK: CMPDI
+ BCC 68, killed renamable $cr0, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+---
+name: test6
+# The SrcReg isn't CR0.
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
+ liveins: $x3, $x4
+ renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
+ renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3
+ ; CHECK-LABEL: name: test6
+ ; CHECK: CMPDI
+ BCC 68, killed renamable $cr1, %bb.2
+
+ bb.1:
+ $x3 = LI8 102
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+ bb.2:
+ $x3 = LI8 116
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index f7f2f4b1c1867..bf3020732ee9b 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -2946,10 +2946,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P10-O0-NEXT: std r0, 16(r1)
; LE-P10-O0-NEXT: hashst r0, -8(r1)
; LE-P10-O0-NEXT: stdu r1, -64(r1)
-; LE-P10-O0-NEXT: mr r4, r3
+; LE-P10-O0-NEXT: mr. r4, r3
; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill
; LE-P10-O0-NEXT: li r3, 0
-; LE-P10-O0-NEXT: cmpdi r4, 0
; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill
; LE-P10-O0-NEXT: beq cr0, .LBB2_2
; LE-P10-O0-NEXT: # %bb.1: # %if.end
@@ -2979,10 +2978,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P9-O0-NEXT: std r0, 16(r1)
; LE-P9-O0-NEXT: hashst r0, -8(r1)
; LE-P9-O0-NEXT: stdu r1, -128(r1)
-; LE-P9-O0-NEXT: mr r4, r3
+; LE-P9-O0-NEXT: mr. r4, r3
; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
; LE-P9-O0-NEXT: li r3, 0
-; LE-P9-O0-NEXT: cmpdi r4, 0
; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
; LE-P9-O0-NEXT: beq cr0, .LBB2_2
; LE-P9-O0-NEXT: # %bb.1: # %if.end
@@ -3012,10 +3010,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P8-O0-NEXT: std r0, 16(r1)
; LE-P8-O0-NEXT: hashst r0, -8(r1)
; LE-P8-O0-NEXT: stdu r1, -128(r1)
-; LE-P8-O0-NEXT: mr r4, r3
+; LE-P8-O0-NEXT: mr. r4, r3
; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
; LE-P8-O0-NEXT: li r3, 0
-; LE-P8-O0-NEXT: cmpdi r4, 0
; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
; LE-P8-O0-NEXT: beq cr0, .LBB2_2
; LE-P8-O0-NEXT: # %bb.1: # %if.end
More information about the llvm-commits
mailing list