[llvm] 7091a7f - [GlobalISel][Legalizer] Don't use eraseFromParentAndMarkDBGValuesForRemoval() for some artifacts.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 20 23:34:56 PDT 2021
Author: Amara Emerson
Date: 2021-09-20T23:34:42-07:00
New Revision: 7091a7f781c9889f109b6be7b07822bfd91094dc
URL: https://github.com/llvm/llvm-project/commit/7091a7f781c9889f109b6be7b07822bfd91094dc
DIFF: https://github.com/llvm/llvm-project/commit/7091a7f781c9889f109b6be7b07822bfd91094dc.diff
LOG: [GlobalISel][Legalizer] Don't use eraseFromParentAndMarkDBGValuesForRemoval() for some artifacts.
For artifacts excluding G_TRUNC/G_SEXT, which have IR counterparts, we don't
seem to have debug users of defs. However, in the legalizer we're always calling
MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() which is expensive.
In some rare cases, this contributes significantly to unreasonably long compile
times when we have lots of artifact combiner activity.
To verify this, I added asserts to that function when it actually replaced a debug
use operand with undef for these artifacts. On CTMark with both -O0 and -Os and
debug info enabled, I didn't see a single case where it triggered.
In my measurements I saw around a 0.5% geomean compile-time improvement on -g -O0
for AArch64 with this change.
Differential Revision: https://reviews.llvm.org/D109750
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index bd1f44a0a4804..177d4025bbb8f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1051,6 +1051,25 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
+/// These artifacts generally don't have any debug users because they don't
+/// directly originate from IR instructions, but instead usually from
+/// legalization. Avoiding checking for debug users improves compile time.
+/// Note that truncates or extends aren't included because they have IR
+/// counterparts which can have debug users after translation.
+static bool shouldSkipDbgValueFor(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_INSERT:
+ return true;
+ default:
+ return false;
+ }
+}
+
void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
LostDebugLocObserver *LocObserver,
SmallInstListTy &DeadInstChain) {
@@ -1060,7 +1079,10 @@ void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
}
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
DeadInstChain.remove(&MI);
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ if (shouldSkipDbgValueFor(MI))
+ MI.eraseFromParent();
+ else
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
if (LocObserver)
LocObserver->checkpoint(false);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
index 1850bada924bb..a4070498dafd9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -5,7 +5,6 @@
define void @value_finder_bug() { ret void }
define void @value_finder_bug_before_artifact_combine() { ret void }
- define void @value_finder_bug_before_artifact_combine_dbg_use() { ret void }
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "llvm", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "bug-legalization-artifact-combiner-dead-def", directory: "/tmp")
@@ -86,40 +85,3 @@ body: |
G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
...
-
----
-name: value_finder_bug_before_artifact_combine_dbg_use
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2
-
- ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine_dbg_use
- ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
- ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
- ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
- ; GFX10: DBG_VALUE $noreg, $noreg, {{.*}}, !DIExpression(), debug-location !DILocation(line: 1, column: 1
- ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
- ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
- %0:_(p5) = COPY $vgpr0
- %1:_(s32) = COPY $vgpr1
- %2:_(s32) = COPY $vgpr2
- %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
- %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
- %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
- %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
- %7:_(s32) = G_IMPLICIT_DEF
- %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
- %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
- %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
- DBG_VALUE %dbg_use(s32), $noreg, !7, !DIExpression(), debug-location !9
- G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
- %12:_(s32) = G_CONSTANT i32 4
- %13:_(p5) = G_PTR_ADD %0, %12(s32)
- G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
-...
More information about the llvm-commits
mailing list