[llvm] c2c2dc5 - [AArch64LoadStoreOptimizer] Skip debug insts during pattern matching [12/14]
Vedant Kumar via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 22 17:05:28 PDT 2020
Author: Vedant Kumar
Date: 2020-04-22T17:03:40-07:00
New Revision: c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3
URL: https://github.com/llvm/llvm-project/commit/c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3
DIFF: https://github.com/llvm/llvm-project/commit/c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3.diff
LOG: [AArch64LoadStoreOptimizer] Skip debug insts during pattern matching [12/14]
Do not count the presence of debug insts against the limit set by
LdStLimit, and allow the optimizer to find matching insts by skipping
over debug insts.
Differential Revision: https://reviews.llvm.org/D78411
Added:
llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
Modified:
llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index acd71bce0152..25237bf50dde 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -678,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
"Expected promotable zero stores.");
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == MergeMI)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -748,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
const TargetRegisterInfo *TRI, unsigned Limit,
std::function<bool(MachineInstr &, bool)> &Fn) {
auto MBB = MI.getParent();
- for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
- E = MBB->rend();
- I != E; I++) {
+ for (MachineInstr &I :
+ instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
if (!Limit)
return false;
--Limit;
- bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
TRI->regsOverlap(MOP.getReg(), DefReg);
});
- if (!Fn(*I, isDef))
+ if (!Fn(I, isDef))
return false;
if (isDef)
break;
@@ -783,14 +782,14 @@ MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
const LdStPairFlags &Flags) {
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == Paired)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
@@ -1009,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator
AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator StoreI) {
- MachineBasicBlock::iterator NextI = LoadI;
- ++NextI;
+ MachineBasicBlock::iterator NextI =
+ next_nodbg(LoadI, LoadI->getParent()->end());
int LoadSize = TII->getMemScale(*LoadI);
int StoreSize = TII->getMemScale(*StoreI);
@@ -1188,7 +1187,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1440,7 +1439,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MBBI = I;
MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
- ++MBBI;
+ MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1468,7 +1467,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
UsedInBetween.accumulate(MI);
@@ -1637,12 +1637,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
- MachineBasicBlock::iterator NextI = I;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
- if (++NextI == Update)
- ++NextI;
+ if (NextI == Update)
+ NextI = next_nodbg(NextI, E);
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1780,7 +1781,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
- ++MBBI;
+ MBBI = next_nodbg(MBBI, E);
// We can't post-increment the stack pointer if any instruction between
// the memory access (I) and the increment (MBBI) can access the memory
@@ -1796,7 +1797,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
return E;
}
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1854,7 +1856,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
new file mode 100644
index 000000000000..ce2174a58577
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
@@ -0,0 +1,377 @@
+# Strip out debug info, then run ldst-opt with limit=1.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s
+#
+# Run ldst-opt with limit=1, then strip out debug info.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs -o - %s | FileCheck %s
+---
+
+### STG and its offset limits
+
+# CHECK-LABEL: name: test_STG_post
+# CHECK: STGPostIndex $x0, $x0, 7
+name: test_STG_post
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_same_reg
+# CHECK: STGPostIndex $x1, $x0, 7
+name: test_STG_post_same_reg
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ STGOffset $x1, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_unaligned
+# CHECK: STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 8, 0
+name: test_STG_post_unaligned
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 8, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post2
+# CHECK: STGPostIndex $x0, $x0, -256
+name: test_STG_post2
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = SUBXri $x0, 4096, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post3
+# CHECK: STGOffset $x0, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 4112, 0
+name: test_STG_post3
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = SUBXri $x0, 4112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post4
+# CHECK: STGPostIndex $x0, $x0, 255
+name: test_STG_post4
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 4080, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post5
+# CHECK: STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 4096, 0
+name: test_STG_post5
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 4096, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+### The rest of ST*G variants.
+
+# CHECK-LABEL: name: test_STZG_post
+# CHECK: STZGPostIndex $x0, $x0, 7
+name: test_STZG_post
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STZGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_ST2G_post
+# CHECK: ST2GPostIndex $x0, $x0, 7
+name: test_ST2G_post
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ ST2GOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STZ2G_post
+# CHECK: STZ2GPostIndex $x0, $x0, 7
+name: test_STZ2G_post
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STZ2GOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+### STGP and its offset limits
+
+# CHECK-LABEL: name: test_STGP_post
+# CHECK: STGPpost $x1, $x2, $x0, 7
+name: test_STGP_post
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post2
+# CHECK: STGPpost $x1, $x2, $x0, -64
+name: test_STGP_post2
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = SUBXri $x0, 1024, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post3
+# CHECK: STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 1040, 0
+name: test_STGP_post3
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = SUBXri $x0, 1040, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post4
+# CHECK: STGPpost $x1, $x2, $x0, 63
+name: test_STGP_post4
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 1008, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post5
+# CHECK: STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 1024, 0
+name: test_STGP_post5
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 1024, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms
+
+# CHECK-LABEL: name: test_STG_pre
+# CHECK: STGPreIndex $x0, $x0, 10
+name: test_STG_pre
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGOffset $x0, $x0, 10
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 160, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre
+# CHECK: STGPpre $x1, $x2, $x0, 10
+name: test_STGP_pre
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ STGPi $x1, $x2, $x0, 10
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 160, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms with add/sub coming before the store.
+
+# CHECK-LABEL: name: test_STG_pre_back
+# CHECK: STGPreIndex $x0, $x0, 2
+name: test_STG_pre_back
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ $x0 = ADDXri $x0, 32, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ STGOffset $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_back
+# CHECK: STGPpre $x1, $x2, $x0, -3
+name: test_STGP_pre_back
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+
+ $x0 = SUBXri $x0, 48, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ STGPi $x1, $x2, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+### STGP with source register == address register
+
+# CHECK-LABEL: name: test_STGP_post_same_reg
+# CHECK: STGPpost $x0, $x0, $x0, 7
+name: test_STGP_post_same_reg
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGPi $x0, $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_same_reg
+# CHECK: STGPpre $x0, $x0, $x0, 7
+name: test_STGP_pre_same_reg
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ STGPi $x0, $x0, $x0, 7
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ $x0 = ADDXri $x0, 112, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
+
+# This case can not be merged because the source register is always read before writeback.
+# CHECK-LABEL: name: test_STGP_pre_back_same_reg
+# CHECK: SUBXri $x0, 48, 0
+# CHECK-NEXT: STGPi $x0, $x0, $x0, 0
+name: test_STGP_pre_back_same_reg
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ $x0 = SUBXri $x0, 48, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ STGPi $x0, $x0, $x0, 0
+ DBG_VALUE $x0, 0
+ DBG_VALUE $x0, 0
+ RET_ReallyLR implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
index b44258abd887..fd09af8a85fc 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
---
### STG and its offset limits
More information about the llvm-commits
mailing list