[llvm] [AMDGPU] Regenerate test case to cover gfx10 check lines (PR #154909)
Abhinav Garg via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 01:15:55 PDT 2025
https://github.com/abhigargrepo updated https://github.com/llvm/llvm-project/pull/154909
>From f4815cd1d1824ed0422f7a0ff61265531ccb6824 Mon Sep 17 00:00:00 2001
From: Abhinav Garg <abhigarg at amd.com>
Date: Fri, 22 Aug 2025 01:05:39 -0700
Subject: [PATCH] Regenerate test case to cover gfx10 check lines
---
.../llvm.amdgcn.image.gather4.a16.dim.ll | 129 +++++++++++++++++-
1 file changed, 128 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index 3b4db4a4075fd..d45705edce2c8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -18,6 +18,16 @@ define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -74,6 +84,16 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_cube:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_cube:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -132,6 +152,16 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_2darray:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_2darray:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -190,6 +220,16 @@ define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -246,6 +286,16 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -306,6 +356,16 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -362,6 +422,16 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_b_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_b_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -418,6 +488,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_b_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_b_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -476,6 +556,16 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_b_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_b_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -538,6 +628,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_b_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_b_cl_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo
@@ -591,6 +691,13 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_l_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
@@ -636,6 +743,13 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_l_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_l_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
@@ -677,6 +791,13 @@ define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_lz_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_lz_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
@@ -718,6 +839,13 @@ define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
+; GFX10-LABEL: gather4_c_lz_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
; GFX11-TRUE16-LABEL: gather4_c_lz_2d:
; GFX11-TRUE16: ; %bb.0: ; %main_body
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
@@ -773,5 +901,4 @@ attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10: {{.*}}
; GFX12: {{.*}}
More information about the llvm-commits
mailing list