[llvm] dc3fbe2 - GlobalISel: Fix infinite loop in legalization artifact combiner

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 2 03:58:37 PDT 2021


Author: Petar Avramovic
Date: 2021-08-02T12:58:10+02:00
New Revision: dc3fbe293f1a1c1068e2cd27151fb373798fdfb6

URL: https://github.com/llvm/llvm-project/commit/dc3fbe293f1a1c1068e2cd27151fb373798fdfb6
DIFF: https://github.com/llvm/llvm-project/commit/dc3fbe293f1a1c1068e2cd27151fb373798fdfb6.diff

LOG: GlobalISel: Fix infinite loop in legalization artifact combiner

ArtifactValueFinder keeps trying to combine g_unmerge_values in some cases.
Fix is to skip combine attempt for dead defs.

Differential Revision: https://reviews.llvm.org/D106879

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 19501935e49a3..c2d7a72c2c70a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -756,6 +756,10 @@ class LegalizationArtifactCombiner {
       SmallBitVector DeadDefs(NumDefs);
       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
         Register DefReg = MI.getReg(DefIdx);
+        if (MRI.use_nodbg_empty(DefReg)) {
+          DeadDefs[DefIdx] = true;
+          continue;
+        }
         Register FoundVal =
             findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
         if (!FoundVal || FoundVal == DefReg)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
new file mode 100644
index 0000000000000..35932d42a0646
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+
+define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> addrspace(4)* %ptr) {
+; GFX10-LABEL: value_finder_bug:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    global_load_dwordx4 v[1:4], v[1:2], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen
+; GFX10-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %vec = load <4 x float>, <4 x float> addrspace(4)* %ptr, align 4
+  %vec.3 = extractelement <4 x float> %vec, i32 3
+  %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
+  %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
+  store <2 x float> %new_vec, <2 x float> addrspace(5)* %store_ptr, align 8
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
new file mode 100644
index 0000000000000..eb37e464279e0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir
@@ -0,0 +1,137 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=GFX10
+
+--- |
+
+  define void @value_finder_bug() { ret void }
+  define void @value_finder_bug_before_artifact_combine() { ret void }
+  define void @value_finder_bug_before_artifact_combine_dbg_use() { ret void }
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "llvm", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+  !1 = !DIFile(filename: "bug-legalization-artifact-combiner-dead-def", directory: "/tmp")
+  !2 = !{}
+  !3 = !{i32 2, !"Dwarf Version", i32 4}
+  !4 = !{i32 2, !"Debug Info Version", i32 3}
+  !5 = distinct !DISubprogram(name: "value_finder_bug_before_artifact_combine_dbg_use", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+  !6 = !DISubroutineType(types: !2)
+  !7 = !DILocalVariable(name: "in", arg: 1, scope: !5, file: !1, line: 1, type: !8)
+  !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !9 = !DILocation(line: 1, column: 1, scope: !5)
+...
+
+---
+name: value_finder_bug
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GFX10-LABEL: name: value_finder_bug
+    ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+    ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10: G_STORE [[UV1]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<4 x s32>) = G_IMPLICIT_DEF
+    %5:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    %6:_(s32) = G_CONSTANT i32 3
+    %7:_(s32) = G_EXTRACT_VECTOR_ELT %5(<4 x s32>), %6(s32)
+    %8:_(<2 x s32>) = G_SHUFFLE_VECTOR %5(<4 x s32>), %4, shufflemask(2, undef)
+    %9:_(s32) = G_CONSTANT i32 1
+    %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %8, %7(s32), %9(s32)
+    G_STORE %10(<2 x s32>), %0(p5) :: (store (<2 x s32>), addrspace 5)
+...
+
+---
+name: value_finder_bug_before_artifact_combine
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine
+    ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+    ; GFX10: %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
+    %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
+    %7:_(s32) = G_IMPLICIT_DEF
+    %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
+    %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
+    %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
+    G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
+    %12:_(s32) = G_CONSTANT i32 4
+    %13:_(p5) = G_PTR_ADD %0, %12(s32)
+    G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+
+...
+
+---
+name: value_finder_bug_before_artifact_combine_dbg_use
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GFX10-LABEL: name: value_finder_bug_before_artifact_combine_dbg_use
+    ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX10: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
+    ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
+    ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32)
+    ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32
+    ; GFX10: %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>)
+    ; GFX10: DBG_VALUE %dbg_use(s32), $noreg
+    ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
+    ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(p4) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<4 x s32>) = G_LOAD %3(p4) :: (load (<4 x s32>), align 4, addrspace 4)
+    %5:_(s32) = G_EXTRACT %4(<4 x s32>), 96
+    %6:_(s32) = G_EXTRACT %4(<4 x s32>), 64
+    %7:_(s32) = G_IMPLICIT_DEF
+    %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
+    %9:_(<2 x s32>) = G_INSERT %8, %5(s32), 32
+    %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(<2 x s32>)
+    DBG_VALUE %dbg_use(s32), $noreg, !7, !DIExpression(), debug-location !9
+    G_STORE %6(s32), %0(p5) :: (store (s32), align 8, addrspace 5)
+    %12:_(s32) = G_CONSTANT i32 4
+    %13:_(p5) = G_PTR_ADD %0, %12(s32)
+    G_STORE %11(s32), %13(p5) :: (store (s32) into unknown-address + 4, addrspace 5)
+...


        


More information about the llvm-commits mailing list