[llvm] dc3fbe2 - GlobalISel: Fix infinite loop in legalization artifact combiner
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 2 03:58:37 PDT 2021
Author: Petar Avramovic
Date: 2021-08-02T12:58:10+02:00
New Revision: dc3fbe293f1a1c1068e2cd27151fb373798fdfb6
URL: https://github.com/llvm/llvm-project/commit/dc3fbe293f1a1c1068e2cd27151fb373798fdfb6
DIFF: https://github.com/llvm/llvm-project/commit/dc3fbe293f1a1c1068e2cd27151fb373798fdfb6.diff
LOG: GlobalISel: Fix infinite loop in legalization artifact combiner
ArtifactValueFinder keeps trying to combine g_unmerge_values in some cases.
Fix is to skip combine attempt for dead defs.
Differential Revision: https://reviews.llvm.org/D106879
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 19501935e49a3..c2d7a72c2c70a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -756,6 +756,10 @@ class LegalizationArtifactCombiner {
SmallBitVector DeadDefs(NumDefs);
for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
Register DefReg = MI.getReg(DefIdx);
+ if (MRI.use_nodbg_empty(DefReg)) {
+ DeadDefs[DefIdx] = true;
+ continue;
+ }
Register FoundVal =
findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
if (!FoundVal || FoundVal == DefReg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
new file mode 100644
index 0000000000000..35932d42a0646
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+
+define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> addrspace(4)* %ptr) {
+; GFX10-LABEL: value_finder_bug:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
+; GFX10-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %vec = load <4 x float>, <4 x float> addrspace(4)* %ptr, align 4
+ %vec.3 = extractelement <4 x float> %vec, i32 3
+ %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
+ %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
+ store <2 x float> %new_vec, <2 x float> addrspace(5)* %store_ptr, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
new file mode 100644
index 0000000000000..eb37e464279e0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -0,0 +1,137 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GFX10
+
+--- |
+
+ define void @value_finder_bug() { ret void }
+ define void @value_finder_bug_before_artifact_combine() { ret void }
+ define void @value_finder_bug_before_artifact_combine_dbg_use() { ret void }
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "llvm", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+ !1 = !DIFile(filename: "bug-legalization-artifact-combiner-dead-def", directory: "/tmp")
+ !2 = !{}
+ !3 = !{i32 2, !"Dwarf Version", i32 4}
+ !4 = !{i32 2, !"Debug Info Version", i32 3}
+ !5 = distinct !DISubprogram(name: "value_finder_bug_before_artifact_combine_dbg_use", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+ !6 = !DISubroutineType(types: !2)
+ !7 = !DILocalVariable(name: "in", arg: 1, scope: !5, file: !1, line: 1, type: !8)
+ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+ !9 = !DILocation(line: 1, column: 1, scope: !5)
+...
+
+---
+name: value_finder_bug
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX10-LABEL: name: value_finder_bug
+ ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+ ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+ ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+ ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+ ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; GFX10: G_STORE [[UV1]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+ %0:_(p5) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+ %4:_(<4 x s32>) = G_IMPLICIT_DEF
+ %5:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ %6:_(s32) = G_CONSTANT i32 3
+ %7:_(s32) = G_EXTRACT_VECTOR_ELT %5(<4 x s32>), %6(s32)
+ %8:_(<2 x s32>) = G_SHUFFLE_VECTOR %5(<4 x s32>), %4, shufflemask(2, undef)
+ %9:_(s32) = G_CONSTANT i32 1
+ %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %8, %7(s32), %9(s32)
+ G_STORE %10(<2 x s32>), %0(p5) :: (store (<2 x s32>), addrspace 5)
+...
+
+---
+name: value_finder_bug_before_artifact_combine
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine
+ ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+ ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+ ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+ ; GFX10: %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+ ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+ ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+ %0:_(p5) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+ %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
+ %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
+ %7:_(s32) = G_IMPLICIT_DEF
+ %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
+ %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
+ %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
+ G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
+ %12:_(s32) = G_CONSTANT i32 4
+ %13:_(p5) = G_PTR_ADD %0, %12(s32)
+ G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+
+...
+
+---
+name: value_finder_bug_before_artifact_combine_dbg_use
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine_dbg_use
+ ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+ ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+ ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+ ; GFX10: %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+ ; GFX10: DBG_VALUE %dbg_use(s32), $noreg
+ ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+ ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+ %0:_(p5) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+ %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+ %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
+ %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
+ %7:_(s32) = G_IMPLICIT_DEF
+ %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
+ %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
+ %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
+ DBG_VALUE %dbg_use(s32), $noreg, !7, !DIExpression(), debug-location !9
+ G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
+ %12:_(s32) = G_CONSTANT i32 4
+ %13:_(p5) = G_PTR_ADD %0, %12(s32)
+ G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+...
More information about the llvm-commits
mailing list