[llvm] [NFC][AMDGPU] Convert amdpal.ll to autogenerated CHECK lines (PR #141026)

Harrison Hao via llvm-commits llvm-commits at lists.llvm.org
Thu May 22 02:35:02 PDT 2025


https://github.com/harrisonGPU created https://github.com/llvm/llvm-project/pull/141026

None

>From c8231aa1a7ba3ee63499a9844ffe49c06239620a Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 22 May 2025 17:32:25 +0800
Subject: [PATCH] [NFC][AMDGPU] Convert amdpal.ll to autogenerated CHECK lines

---
 llvm/test/CodeGen/AMDGPU/amdpal.ll | 118 ++++++++++++++++++++---------
 1 file changed, 83 insertions(+), 35 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 2e47b0163aa8c..a97732b2f39a5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -1,9 +1,23 @@
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1200 | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s
 
-; PAL-NOT: .AMDGPU.config
-; PAL-LABEL: {{^}}simple:
 define amdgpu_kernel void @simple(ptr addrspace(1) %out) {
+; CI-LABEL: simple:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; CI-NEXT:    v_mov_b32_e32 v0, 0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    global_store_b32 v0, v0, s[0:1]
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: simple:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
+; VI-NEXT:    v_mov_b32_e32 v0, 0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    global_store_b32 v0, v0, s[0:1]
+; VI-NEXT:    s_endpgm
 entry:
   store i32 0, ptr addrspace(1) %out
   ret void
@@ -12,13 +26,28 @@ entry:
 ; Check code sequence for amdpal use of scratch for alloca. This is the case
 ; where the high half of the address comes from s_getpc.
 
-; PAL-LABEL: {{^}}scratch:
-; PAL: s_getpc_b64 s[[[GITPTR:[0-9]+]]:
-; PAL: s_mov_b32 s[[GITPTR]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) {
+; CI-LABEL: scratch:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 1
+; CI-NEXT:    s_cselect_b32 s0, s1, s0
+; CI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    scratch_store_b32 off, v0, s3
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    s_cselect_b32 s0, s1, s0
+; VI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    scratch_store_b32 off, v0, s3
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [2 x i32], addrspace(5)
   store <2 x i32> %in, ptr addrspace(5) %v
@@ -35,13 +64,28 @@ entry:
 ; that the s_movk_i32 is into a reg that is one more than the following
 ; s_mov_b32.
 
-; PAL-LABEL: {{^}}scratch2:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) #0 {
+; CI-LABEL: scratch2:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_cmp_eq_u32 s2, 1
+; CI-NEXT:    s_cselect_b32 s0, s1, s0
+; CI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    scratch_store_b32 off, v0, s3
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch2:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_wait_kmcnt 0x0
+; VI-NEXT:    s_cmp_eq_u32 s2, 1
+; VI-NEXT:    s_cselect_b32 s0, s1, s0
+; VI-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    scratch_store_b32 off, v0, s3
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [2 x i32], addrspace(5)
   store <2 x i32> %in, ptr addrspace(5) %v
@@ -56,14 +100,28 @@ entry:
 ; 0 in a graphics shader.
 ; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4.
 
-; PAL-LABEL: {{^}}scratch2_cs:
-; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234
-; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0
-; CI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x4
-; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
-; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
-
 define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
+; CI-LABEL: scratch2_cs:
+; CI:       ; %bb.0: ; %entry
+; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v6
+; CI-NEXT:    v_mov_b32_e32 v2, v5
+; CI-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; CI-NEXT:    v_add3_u32 v0, 0, v0, 4
+; CI-NEXT:    scratch_store_b96 off, v[2:4], off
+; CI-NEXT:    scratch_load_b32 v0, v0, off
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; CI-NEXT:    s_endpgm
+;
+; VI-LABEL: scratch2_cs:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    v_mov_b32_e32 v2, v5
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v6
+; VI-NEXT:    scratch_store_b96 off, v[2:4], off
+; VI-NEXT:    scratch_load_b32 v0, v0, off offset:4
+; VI-NEXT:    s_wait_loadcnt 0x0
+; VI-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; VI-NEXT:    s_endpgm
 entry:
   %v = alloca [3 x i32], addrspace(5)
   %v1 = getelementptr [3 x i32], ptr addrspace(5) %v, i32 0, i32 1
@@ -79,15 +137,5 @@ entry:
 attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
 
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
-
-; PAL:         .amdgpu_pal_metadata
-; PAL-NEXT: ---
-; PAL-NEXT: amdpal.pipelines:
-; PAL-NEXT:   - .hardware_stages:
-; PAL-NEXT:       .cs:
-; PAL-NEXT:         .entry_point:    _amdgpu_cs_main
-; PAL-NEXT:         .entry_point_symbol:    scratch2_cs
-; PAL-NEXT:         .scratch_memory_size: 0x10
-; PAL-NEXT:         .sgpr_count:     0x
-; PAL-NEXT:         .vgpr_count:     0x
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; PAL: {{.*}}



More information about the llvm-commits mailing list