[llvm] 7091a7f - [GlobalISel][Legalizer] Don't use eraseFromParentAndMarkDBGValuesForRemoval() for some artifacts.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 20 23:34:56 PDT 2021


Author: Amara Emerson
Date: 2021-09-20T23:34:42-07:00
New Revision: 7091a7f781c9889f109b6be7b07822bfd91094dc

URL: https://github.com/llvm/llvm-project/commit/7091a7f781c9889f109b6be7b07822bfd91094dc
DIFF: https://github.com/llvm/llvm-project/commit/7091a7f781c9889f109b6be7b07822bfd91094dc.diff

LOG: [GlobalISel][Legalizer] Don't use eraseFromParentAndMarkDBGValuesForRemoval() for some artifacts.

For artifacts excluding G_TRUNC/G_SEXT, which have IR counterparts, we don't
seem to have debug users of defs. However, in the legalizer we're always calling
MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() which is expensive.
In some rare cases, this contributes significantly to unreasonably long compile
times when we have lots of artifact combiner activity.

To verify this, I added asserts to that function when it actually replaced a debug
use operand with undef for these artifacts. On CTMark with both -O0 and -Os and
debug info enabled, I didn't see a single case where it triggered.

In my measurements I saw around a 0.5% geomean compile-time improvement on -g -O0
for AArch64 with this change.

Differential Revision: https://reviews.llvm.org/D109750

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index bd1f44a0a4804..177d4025bbb8f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1051,6 +1051,25 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
          llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
 }
 
+/// These artifacts generally don't have any debug users because they don't
+/// directly originate from IR instructions, but instead usually from
+/// legalization. Avoiding checking for debug users improves compile time.
+/// Note that truncates or extends aren't included because they have IR
+/// counterparts which can have debug users after translation.
+static bool shouldSkipDbgValueFor(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_UNMERGE_VALUES:
+  case TargetOpcode::G_MERGE_VALUES:
+  case TargetOpcode::G_CONCAT_VECTORS:
+  case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_EXTRACT:
+  case TargetOpcode::G_INSERT:
+    return true;
+  default:
+    return false;
+  }
+}
+
 void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
                             LostDebugLocObserver *LocObserver,
                             SmallInstListTy &DeadInstChain) {
@@ -1060,7 +1079,10 @@ void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
   }
   LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
   DeadInstChain.remove(&MI);
-  MI.eraseFromParentAndMarkDBGValuesForRemoval();
+  if (shouldSkipDbgValueFor(MI))
+    MI.eraseFromParent();
+  else
+    MI.eraseFromParentAndMarkDBGValuesForRemoval();
   if (LocObserver)
     LocObserver->checkpoint(false);
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
index 1850bada924bb..a4070498dafd9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -5,7 +5,6 @@
 
   define void @value_finder_bug() { ret void }
   define void @value_finder_bug_before_artifact_combine() { ret void }
-  define void @value_finder_bug_before_artifact_combine_dbg_use() { ret void }
 
   !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "llvm", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
   !1 = !DIFile(filename: "bug-legalization-artifact-combiner-dead-def", directory: "/tmp")
@@ -86,40 +85,3 @@ body: |
     G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
 
 ...
-
----
-name: value_finder_bug_before_artifact_combine_dbg_use
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2
-
-    ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine_dbg_use
-    ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
-    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
-    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
-    ; GFX10: DBG_VALUE $noreg, $noreg, {{.*}}, !DIExpression(), debug-location !DILocation(line: 1, column: 1
-    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
-    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-    ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
-    %0:_(p5) = COPY $vgpr0
-    %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = COPY $vgpr2
-    %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
-    %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
-    %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
-    %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
-    %7:_(s32) = G_IMPLICIT_DEF
-    %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
-    %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
-    %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
-    DBG_VALUE %dbg_use(s32), $noreg, !7, !DIExpression(), debug-location !9
-    G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
-    %12:_(s32) = G_CONSTANT i32 4
-    %13:_(p5) = G_PTR_ADD %0, %12(s32)
-    G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
-...


        


More information about the llvm-commits mailing list