[llvm] [GISel] Fix ShuffleVector assert (PR #139769)
Alan Li via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 16:40:06 PDT 2025
https://github.com/lialan updated https://github.com/llvm/llvm-project/pull/139769
>From 4119be5632deb942c1714d9b73794f02a9b984d7 Mon Sep 17 00:00:00 2001
From: Alan Li <me at alanli.org>
Date: Tue, 13 May 2025 13:10:46 -0400
Subject: [PATCH 1/3] [GISel] Fix ShuffleVector assert
Fixes issue: https://github.com/llvm/llvm-project/issues/139752
When G_SHUFFLE_VECTOR has only 1 element then it is possible the vector
is decayed into a scalar.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 8 +++++--
.../prelegalizer-combiner-shuffle.mir | 24 +++++++++++++++++++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 5191360c7718a..3abed4d062bfa 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -420,8 +420,12 @@ void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
else
Extracts.push_back(Unmerge2.getReg(Val - Width));
}
-
- Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
+ assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
+ if (Extracts.size() == 1) {
+ Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
+ } else {
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
+ }
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
index bba608cceee19..e500cfe085110 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
@@ -135,3 +135,27 @@ body: |
SI_RETURN
...
+
+---
+name: shuffle_vector_to_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: shuffle_vector_to_copy
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>)
+ ; CHECK-NEXT: G_STORE [[UV4]](s16), [[COPY1]](p3) :: (store (s16), addrspace 3)
+ ; CHECK-NEXT: SI_RETURN
+ %0:_(p3) = COPY $vgpr0
+ %1:_(p3) = COPY $vgpr1
+ %12:_(<8 x s16>) = G_IMPLICIT_DEF
+ %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %11:_(s16) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4)
+ G_STORE %11(s16), %1(p3) :: (store (s16), addrspace 3)
+ SI_RETURN
+...
>From a0d423049944c34d33a232bc67f4c70c490d4e62 Mon Sep 17 00:00:00 2001
From: Alan Li <me at alanli.org>
Date: Wed, 14 May 2025 13:50:27 -0400
Subject: [PATCH 2/3] compact vreg numbers, adding new tests
---
.../bug_shuffle_vector_to_scalar.ll | 35 ++++++++++++++
.../prelegalizer-combiner-shuffle.mir | 48 +++++++++----------
2 files changed, 59 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/bug_shuffle_vector_to_scalar.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug_shuffle_vector_to_scalar.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug_shuffle_vector_to_scalar.ll
new file mode 100644
index 0000000000000..b19872aba2cca
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug_shuffle_vector_to_scalar.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Description: an end-to-end IR test for https://github.com/llvm/llvm-project/issues/139752
+; To test combine shuffle_vectors into build_vector
+
+define amdgpu_gs <4 x float> @_amdgpu_gs_main() {
+; CHECK-LABEL: _amdgpu_gs_main:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: v_mov_b32_e32 v0, 16
+; CHECK-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: s_mov_b32 s1, s0
+; CHECK-NEXT: s_mov_b32 s2, s0
+; CHECK-NEXT: s_mov_b32 s3, s0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v2, s[0:3], 0 idxen
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: v_mov_b32_e32 v0, v1
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: ; return to shader part epilog
+bb:
+ %i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 16), align 4
+ %i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 20), align 4
+ %i2 = shufflevector <1 x float> %i, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+ call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i2, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
+ %i3 = shufflevector <1 x float> %i1, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+ ret <4 x float> %i3
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #0
+
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
index e500cfe085110..6e4c6bcf91d11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-shuffle.mir
@@ -20,10 +20,10 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, 6, 7)
- G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, 6, 7)
+ G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
SI_RETURN
...
@@ -46,10 +46,10 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(<2 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(3, 4)
- G_STORE %11(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(<2 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(3, 4)
+ G_STORE %4(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
SI_RETURN
...
@@ -73,10 +73,10 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(<3 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(0, 1, 2)
- G_STORE %11(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(<3 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(0, 1, 2)
+ G_STORE %4(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
SI_RETURN
...
@@ -101,10 +101,10 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, -1, 7)
- G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, -1, 7)
+ G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
SI_RETURN
...
@@ -128,10 +128,10 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(6, 7, 8, 9)
- G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(6, 7, 8, 9)
+ G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
SI_RETURN
...
@@ -153,9 +153,9 @@ body: |
; CHECK-NEXT: SI_RETURN
%0:_(p3) = COPY $vgpr0
%1:_(p3) = COPY $vgpr1
- %12:_(<8 x s16>) = G_IMPLICIT_DEF
- %10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
- %11:_(s16) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4)
- G_STORE %11(s16), %1(p3) :: (store (s16), addrspace 3)
+ %2:_(<8 x s16>) = G_IMPLICIT_DEF
+ %3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
+ %4:_(s16) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4)
+ G_STORE %4(s16), %1(p3) :: (store (s16), addrspace 3)
SI_RETURN
...
>From 3fbc4e542be003fb25298d5cfa7d392fc5fb8c6c Mon Sep 17 00:00:00 2001
From: Alan Li <me at alanli.org>
Date: Tue, 20 May 2025 19:39:52 -0400
Subject: [PATCH 3/3] small update
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3abed4d062bfa..b1e851183de0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -421,11 +421,10 @@ void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
Extracts.push_back(Unmerge2.getReg(Val - Width));
}
assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
- if (Extracts.size() == 1) {
+ if (Extracts.size() == 1)
Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
- } else {
+ else
Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
- }
MI.eraseFromParent();
}
More information about the llvm-commits
mailing list