[llvm] c2c2dc5 - [AArch64LoadStoreOptimizer] Skip debug insts during pattern matching [12/14]

Wed Apr 22 17:05:28 PDT 2020

Author: Vedant Kumar
Date: 2020-04-22T17:03:40-07:00
New Revision: c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3

URL: https://github.com/llvm/llvm-project/commit/c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3
DIFF: https://github.com/llvm/llvm-project/commit/c2c2dc526a6592c3863ae2b3b6027c6f7c80fda3.diff

LOG: [AArch64LoadStoreOptimizer] Skip debug insts during pattern matching [12/14]

Do not count the presence of debug insts against the limit set by
LdStLimit, and allow the optimizer to find matching insts by skipping
over debug insts.

Differential Revision: https://reviews.llvm.org/D78411

Added: 
    llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
    llvm/test/CodeGen/AArch64/ldst-opt-mte.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index acd71bce0152..25237bf50dde 100644

--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -678,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
   assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
          "Expected promotable zero stores.");
 
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == MergeMI)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   unsigned Opc = I->getOpcode();
   bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -748,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
                               const TargetRegisterInfo *TRI, unsigned Limit,
                               std::function<bool(MachineInstr &, bool)> &Fn) {
   auto MBB = MI.getParent();
-  for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
-                                           E = MBB->rend();
-       I != E; I++) {
+  for (MachineInstr &I :
+       instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
     if (!Limit)
       return false;
     --Limit;
 
-    bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+    bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
       return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
              TRI->regsOverlap(MOP.getReg(), DefReg);
     });
-    if (!Fn(*I, isDef))
+    if (!Fn(I, isDef))
       return false;
     if (isDef)
       break;
@@ -783,14 +782,14 @@ MachineBasicBlock::iterator
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
                                       const LdStPairFlags &Flags) {
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == Paired)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   int SExtIdx = Flags.getSExtIdx();
   unsigned Opc =
@@ -1009,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
                                           MachineBasicBlock::iterator StoreI) {
-  MachineBasicBlock::iterator NextI = LoadI;
-  ++NextI;
+  MachineBasicBlock::iterator NextI =
+      next_nodbg(LoadI, LoadI->getParent()->end());
 
   int LoadSize = TII->getMemScale(*LoadI);
   int StoreSize = TII->getMemScale(*StoreI);
@@ -1188,7 +1187,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
 
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1440,7 +1439,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   MachineBasicBlock::iterator MBBI = I;
   MachineBasicBlock::iterator MBBIWithRenameReg;
   MachineInstr &FirstMI = *I;
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   bool MayLoad = FirstMI.mayLoad();
   bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1468,7 +1467,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   // Remember any instructions that read/write memory between FirstMI and MI.
   SmallVector<MachineInstr *, 4> MemInsns;
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     UsedInBetween.accumulate(MI);
@@ -1637,12 +1637,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
   assert((Update->getOpcode() == AArch64::ADDXri ||
           Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // Return the instruction following the merged instruction, which is
   // the instruction following our unmerged load. Unless that's the add/sub
   // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
+  if (NextI == Update)
+    NextI = next_nodbg(NextI, E);
 
   int Value = Update->getOperand(2).getImm();
   assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1780,7 +1781,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
   // insn (inclusive) and the second insn.
   ModifiedRegUnits.clear();
   UsedRegUnits.clear();
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   // We can't post-increment the stack pointer if any instruction between
   // the memory access (I) and the increment (MBBI) can access the memory
@@ -1796,7 +1797,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
       return E;
   }
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1854,7 +1856,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
   UsedRegUnits.clear();
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there

diff  --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
new file mode 100644
index 000000000000..ce2174a58577
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
@@ -0,0 +1,377 @@
+# Strip out debug info, then run ldst-opt with limit=1.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
+#
+# Run ldst-opt with limit=1, then strip out debug info.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
+---
+
+### STG and its offset limits
+
+# CHECK-LABEL: name: test_STG_post
+# CHECK: STGPostIndex $x0, $x0, 7
+name: test_STG_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_same_reg
+# CHECK: STGPostIndex $x1, $x0, 7
+name: test_STG_post_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    STGOffset $x1, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post_unaligned
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 8, 0
+name: test_STG_post_unaligned
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 8, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post2
+# CHECK: STGPostIndex $x0, $x0, -256
+name: test_STG_post2
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = SUBXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post3
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 4112, 0
+name: test_STG_post3
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = SUBXri $x0, 4112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post4
+# CHECK: STGPostIndex $x0, $x0, 255
+name: test_STG_post4
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 4080, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STG_post5
+# CHECK:      STGOffset $x0, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 4096, 0
+name: test_STG_post5
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### The rest of ST*G variants.
+
+# CHECK-LABEL: name: test_STZG_post
+# CHECK: STZGPostIndex $x0, $x0, 7
+name: test_STZG_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STZGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_ST2G_post
+# CHECK: ST2GPostIndex $x0, $x0, 7
+name: test_ST2G_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    ST2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STZ2G_post
+# CHECK: STZ2GPostIndex $x0, $x0, 7
+name: test_STZ2G_post
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STZ2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### STGP and its offset limits
+
+# CHECK-LABEL: name: test_STGP_post
+# CHECK: STGPpost $x1, $x2, $x0, 7
+name: test_STGP_post
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post2
+# CHECK: STGPpost $x1, $x2, $x0, -64
+name: test_STGP_post2
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = SUBXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post3
+# CHECK:      STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: SUBXri $x0, 1040, 0
+name: test_STGP_post3
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = SUBXri $x0, 1040, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post4
+# CHECK: STGPpost $x1, $x2, $x0, 63
+name: test_STGP_post4
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 1008, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_post5
+# CHECK:      STGPi $x1, $x2, $x0, 0
+# CHECK-NEXT: ADDXri $x0, 1024, 0
+name: test_STGP_post5
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms
+
+# CHECK-LABEL: name: test_STG_pre
+# CHECK: STGPreIndex $x0, $x0, 10
+name: test_STG_pre
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGOffset $x0, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre
+# CHECK: STGPpre $x1, $x2, $x0, 10
+name: test_STGP_pre
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    STGPi $x1, $x2, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### Pre-indexed forms with add/sub coming before the store.
+
+# CHECK-LABEL: name: test_STG_pre_back
+# CHECK: STGPreIndex $x0, $x0, 2
+name: test_STG_pre_back
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    $x0 = ADDXri $x0, 32, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_back
+# CHECK: STGPpre $x1, $x2, $x0, -3
+name: test_STGP_pre_back
+body: |
+  bb.0.entry:
+    liveins: $x0, $x1, $x2
+
+    $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+### STGP with source register == address register
+
+# CHECK-LABEL: name: test_STGP_post_same_reg
+# CHECK: STGPpost $x0, $x0, $x0, 7
+name: test_STGP_post_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK-LABEL: name: test_STGP_pre_same_reg
+# CHECK: STGPpre $x0, $x0, $x0, 7
+name: test_STGP_pre_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    STGPi $x0, $x0, $x0, 7
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...
+
+# This case can not be merged because the source register is always read before writeback.
+# CHECK-LABEL: name: test_STGP_pre_back_same_reg
+# CHECK:      SUBXri $x0, 48, 0
+# CHECK-NEXT: STGPi $x0, $x0, $x0, 0
+name: test_STGP_pre_back_same_reg
+body: |
+  bb.0.entry:
+    liveins: $x0
+
+    $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
+    RET_ReallyLR implicit $x0
+...

diff  --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
index b44258abd887..fd09af8a85fc 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
 ---
 
 ### STG and its offset limits