[llvm] [NFC][AMDGPU] Autogenerate tests for uniform i32 promo in ISel (PR #106382)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 08:02:31 PDT 2024


================
@@ -1,25 +1,65 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,VI %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI %s
 
-; GCN-LABEL: {{^}}extract_vector_elt_v1i8:
-; GCN: s_load_dword [[LOAD:s[0-9]+]]
-; GCN: v_mov_b32_e32 [[V_LOAD:v[0-9]+]], [[LOAD]]
-; GCN: buffer_store_byte [[V_LOAD]]
 define amdgpu_kernel void @extract_vector_elt_v1i8(ptr addrspace(1) %out, <1 x i8> %foo) #0 {
+; SI-LABEL: extract_vector_elt_v1i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[6:7], 0x2
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; SI-NEXT:    s_mov_b32 s3, 0x100f000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: extract_vector_elt_v1i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s4, s[6:7], 0x8
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %p0 = extractelement <1 x i8> %foo, i32 0
   store i8 %p0, ptr addrspace(1) %out
   ret void
 }
 
-; GCN-LABEL: {{^}}extract_vector_elt_v2i8:
-; GCN: s_load_dword s
-; GCN-NOT: {{flat|buffer|global}}
-; SI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
-; VI: v_lshrrev_b16_e64 v{{[0-9]+}}, 8, s{{[0-9]+}}
-; GCN-NOT: {{flat|buffer|global}}
-; GCN: buffer_store_byte
-; GCN: buffer_store_byte
 define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i8> %foo) #0 {
+; SI-LABEL: extract_vector_elt_v2i8:
----------------
arsenm wrote:

If you want to do deeper maintenance, you can stop using amdgpu_kernel and use inreg arguments to control SGPR inputs, and avoid all the kernel boilerplate 

https://github.com/llvm/llvm-project/pull/106382


More information about the llvm-commits mailing list