[llvm] [AMDGPU] Add few missing gfx1250 codegen tests. NFC (PR #155314)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 25 14:50:00 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>



---

Patch is 94.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155314.diff


5 Files Affected:

- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+1171-10) 
- (added) llvm/test/CodeGen/AMDGPU/hard-clauses-load-monitor.mir (+38) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll (+321) 
- (modified) llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll (+20-10) 
- (modified) llvm/test/CodeGen/AMDGPU/scale-offset-scratch.ll (+3-3) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 2db7b28c7de97..ddd3b1520bf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -3,6 +3,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-FAKE16 %s
 
 ; Make sure we don't crash or assert on spir_kernel calling convention.
 
@@ -34,6 +36,14 @@ define spir_kernel void @kernel(ptr addrspace(1) %out) {
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: kernel:
+; GFX1250:       ; %bb.0: ; %entry
+; GFX1250-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    global_store_b32 v0, v0, s[0:1]
+; GFX1250-NEXT:    s_endpgm
 entry:
   store i32 0, ptr addrspace(1) %out
   ret void
@@ -70,6 +80,16 @@ define amdgpu_ps half @ps_ret_cc_f16(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: ps_ret_cc_f16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: ps_ret_cc_f16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -96,26 +116,71 @@ define amdgpu_ps half @ps_ret_cc_inreg_f16(half inreg %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e64 v0, s0, 1.0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: ps_ret_cc_inreg_f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_add_f16 s0, s0, 1.0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1250-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
 
 define fastcc float @fastcc(float %arg0) #0 {
-; GCN-LABEL: fastcc:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_add_f32_e32 v0, 4.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SI-LABEL: fastcc:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: fastcc:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: fastcc:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: fastcc:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %add = fadd float %arg0, 4.0
   ret float %add
 }
 
 define coldcc float @coldcc(float %arg0) #0 {
-; GCN-LABEL: coldcc:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_add_f32_e32 v0, 4.0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SI-LABEL: coldcc:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: coldcc:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: coldcc:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: coldcc:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_add_f32_e32 v0, 4.0, v0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
  %add = fadd float %arg0, 4.0
  ret float %add
 }
@@ -209,6 +274,23 @@ define amdgpu_kernel void @call_coldcc() #0 {
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: call_coldcc:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1250-NEXT:    s_get_pc_i64 s[6:7]
+; GFX1250-NEXT:    s_add_nc_u64 s[6:7], s[6:7], coldcc at gotpcrel+4
+; GFX1250-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
+; GFX1250-NEXT:    s_load_b64 s[12:13], s[6:7], 0x0
+; GFX1250-NEXT:    s_add_nc_u64 s[8:9], s[4:5], 36
+; GFX1250-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; GFX1250-NEXT:    s_mov_b32 s32, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    s_swap_pc_i64 s[30:31], s[12:13]
+; GFX1250-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX1250-NEXT:    s_endpgm
   %val = call coldcc float @coldcc(float 1.0)
   store float %val, ptr addrspace(1) poison
   ret void
@@ -303,6 +385,23 @@ define amdgpu_kernel void @call_fastcc() #0 {
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: call_fastcc:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; GFX1250-NEXT:    s_get_pc_i64 s[6:7]
+; GFX1250-NEXT:    s_add_nc_u64 s[6:7], s[6:7], fastcc at gotpcrel+4
+; GFX1250-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 1.0
+; GFX1250-NEXT:    s_load_b64 s[12:13], s[6:7], 0x0
+; GFX1250-NEXT:    s_add_nc_u64 s[8:9], s[4:5], 36
+; GFX1250-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; GFX1250-NEXT:    s_wait_xcnt 0x0
+; GFX1250-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; GFX1250-NEXT:    s_mov_b32 s32, 0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    s_swap_pc_i64 s[30:31], s[12:13]
+; GFX1250-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX1250-NEXT:    s_endpgm
   %val = call fastcc float @fastcc(float 1.0)
   store float %val, ptr addrspace(1) poison
   ret void
@@ -331,6 +430,16 @@ define amdgpu_cs half @cs_mesa(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: cs_mesa:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: cs_mesa:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -358,6 +467,16 @@ define amdgpu_ps half @ps_mesa_f16(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: ps_mesa_f16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: ps_mesa_f16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -385,6 +504,16 @@ define amdgpu_vs half @vs_mesa(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: vs_mesa:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: vs_mesa:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -412,6 +541,16 @@ define amdgpu_gs half @gs_mesa(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: gs_mesa:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: gs_mesa:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -439,6 +578,16 @@ define amdgpu_hs half @hs_mesa(half %arg0) {
 ; GFX11-FAKE16:       ; %bb.0:
 ; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
 ; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-TRUE16-LABEL: hs_mesa:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX1250-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250-FAKE16-LABEL: hs_mesa:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX1250-FAKE16-NEXT:    ; return to shader part epilog
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -468,6 +617,11 @@ define amdgpu_ps <2 x half> @ps_mesa_v2f16(<2 x half> %arg0) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: ps_mesa_v2f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    ; return to shader part epilog
   %add = fadd <2 x half> %arg0, <half 1.0, half 1.0>
   ret <2 x half> %add
 }
@@ -497,6 +651,11 @@ define amdgpu_ps <2 x half> @ps_mesa_inreg_v2f16(<2 x half> inreg %arg0) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_pk_add_f16 v0, s0, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: ps_mesa_inreg_v2f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_f16 v0, s0, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    ; return to shader part epilog
   %add = fadd <2 x half> %arg0, <half 1.0, half 1.0>
   ret <2 x half> %add
 }
@@ -528,6 +687,12 @@ define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {
 ; GFX11-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_v2i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX1250-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX1250-NEXT:    s_endpgm
   %add = add <2 x i16> %arg0, <i16 1, i16 1>
   store <2 x i16> %add, ptr addrspace(1) poison
   ret void
@@ -563,6 +728,12 @@ define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
 ; GFX11-NEXT:    v_pk_add_u16 v0, s0, 1 op_sel_hi:[1,0]
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_inreg_v2i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_u16 v0, s0, 1 op_sel_hi:[1,0]
+; GFX1250-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX1250-NEXT:    s_endpgm
   %add = add <2 x i16> %arg0, <i16 1, i16 1>
   store <2 x i16> %add, ptr addrspace(1) poison
   ret void
@@ -603,6 +774,12 @@ define amdgpu_ps <4 x half> @ps_mesa_v4f16(<4 x half> %arg0) {
 ; GFX11-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    v_pk_add_f16 v1, v1, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: ps_mesa_v4f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    v_pk_add_f16 v1, v1, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    ; return to shader part epilog
   %add = fadd <4 x half> %arg0, <half 1.0, half 1.0, half 1.0, half 1.0>
   ret <4 x half> %add
 }
@@ -644,6 +821,12 @@ define amdgpu_ps <4 x half> @ps_mesa_inreg_v4f16(<4 x half> inreg %arg0) {
 ; GFX11-NEXT:    v_pk_add_f16 v0, s0, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    v_pk_add_f16 v1, s1, 1.0 op_sel_hi:[1,0]
 ; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX1250-LABEL: ps_mesa_inreg_v4f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_pk_add_f16 v0, s0, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    v_pk_add_f16 v1, s1, 1.0 op_sel_hi:[1,0]
+; GFX1250-NEXT:    ; return to shader part epilog
   %add = fadd <4 x half> %arg0, <half 1.0, half 1.0, half 1.0, half 1.0>
   ret <4 x half> %add
 }
@@ -685,6 +868,17 @@ define amdgpu_ps void @ps_mesa_inreg_v3i32(<3 x i32> inreg %arg0) {
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_inreg_v3i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_add_co_i32 s2, s2, 3
+; GFX1250-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-NEXT:    s_add_co_i32 s1, s1, 2
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-NEXT:    global_store_b96 v[0:1], v[0:2], off
+; GFX1250-NEXT:    s_endpgm
   %add = add <3 x i32> %arg0, <i32 1, i32 2, i32 3>
   store <3 x i32> %add, ptr addrspace(1) poison
   ret void
@@ -717,6 +911,17 @@ define amdgpu_ps void @ps_mesa_inreg_v3f32(<3 x float> inreg %arg0) {
 ; GFX11-NEXT:    v_add_f32_e64 v0, s0, 1.0
 ; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_inreg_v3f32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_add_f32 s0, s0, 1.0
+; GFX1250-NEXT:    s_add_f32 s1, s1, 2.0
+; GFX1250-NEXT:    s_add_f32 s2, s2, 4.0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_2)
+; GFX1250-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX1250-NEXT:    v_mov_b32_e32 v2, s2
+; GFX1250-NEXT:    global_store_b96 v[0:1], v[0:2], off
+; GFX1250-NEXT:    s_endpgm
   %add = fadd <3 x float> %arg0, <float 1.0, float 2.0, float 4.0>
   store <3 x float> %add, ptr addrspace(1) poison
   ret void
@@ -772,6 +977,22 @@ define amdgpu_ps void @ps_mesa_inreg_v5i32(<5 x i32> inreg %arg0) {
 ; GFX11-NEXT:    global_store_b32 v[0:1], v4, off
 ; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_inreg_v5i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_add_co_i32 s3, s3, 4
+; GFX1250-NEXT:    s_add_co_i32 s2, s2, 3
+; GFX1250-NEXT:    s_add_co_i32 s1, s1, 2
+; GFX1250-NEXT:    s_add_co_i32 s4, s4, 5
+; GFX1250-NEXT:    s_add_co_i32 s0, s0, 1
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v0, s0
+; GFX1250-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX1250-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX1250-NEXT:    s_endpgm
   %add = add <5 x i32> %arg0, <i32 1, i32 2, i32 3, i32 4, i32 5>
   store <5 x i32> %add, ptr addrspace(1) poison
   ret void
@@ -813,6 +1034,22 @@ define amdgpu_ps void @ps_mesa_inreg_v5f32(<5 x float> inreg %arg0) {
 ; GFX11-NEXT:    global_store_b32 v[0:1], v4, off
 ; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_inreg_v5f32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_add_f32 s3, s3, -1.0
+; GFX1250-NEXT:    s_add_f32 s4, s4, 0.5
+; GFX1250-NEXT:    s_add_f32 s0, s0, 1.0
+; GFX1250-NEXT:    s_add_f32 s1, s1, 2.0
+; GFX1250-NEXT:    s_add_f32 s2, s2, 4.0
+; GFX1250-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_2)
+; GFX1250-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v0, s0
+; GFX1250-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
+; GFX1250-NEXT:    v_mov_b32_e32 v3, s3
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX1250-NEXT:    s_endpgm
   %add = fadd <5 x float> %arg0, <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>
   store <5 x float> %add, ptr addrspace(1) poison
   ret void
@@ -845,6 +1082,13 @@ define amdgpu_ps void @ps_mesa_v3i32(<3 x i32> %arg0) {
 ; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0
 ; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_v3i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_add_nc_u32 v2, 3, v2 :: v_dual_add_nc_u32 v1, 2, v1
+; GFX1250-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-NEXT:    global_store_b96 v[0:1], v[0:2], off
+; GFX1250-NEXT:    s_endpgm
   %add = add <3 x i32> %arg0, <i32 1, i32 2, i32 3>
   store <3 x i32> %add, ptr addrspace(1) poison
   ret void
@@ -876,6 +1120,13 @@ define amdgpu_ps void @ps_mesa_v3f32(<3 x float> %arg0) {
 ; GFX11-NEXT:    v_add_f32_e32 v0, 1.0, v0
 ; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_v3f32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_add_f32 v2, 4.0, v2 :: v_dual_add_f32 v1, 2.0, v1
+; GFX1250-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GFX1250-NEXT:    global_store_b96 v[0:1], v[0:2], off
+; GFX1250-NEXT:    s_endpgm
   %add = fadd <3 x float> %arg0, <float 1.0, float 2.0, float 4.0>
   store <3 x float> %add, ptr addrspace(1) poison
   ret void
@@ -917,6 +1168,16 @@ define amdgpu_ps void @ps_mesa_v5i32(<5 x i32> %arg0) {
 ; GFX11-NEXT:    global_store_b32 v[0:1], v4, off
 ; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_v5i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_add_nc_u32 v3, 4, v3 :: v_dual_add_nc_u32 v2, 3, v2
+; GFX1250-NEXT:    v_dual_add_nc_u32 v1, 2, v1 :: v_dual_add_nc_u32 v4, 5, v4
+; GFX1250-NEXT:    v_add_nc_u32_e32 v0, 1, v0
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX1250-NEXT:    s_endpgm
   %add = add <5 x i32> %arg0, <i32 1, i32 2, i32 3, i32 4, i32 5>
   store <5 x i32> %add, ptr addrspace(1) poison
   ret void
@@ -956,6 +1217,16 @@ define amdgpu_ps void @ps_mesa_v5f32(<5 x float> %arg0) {
 ; GFX11-NEXT:    global_store_b32 v[0:1], v4, off
 ; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX1250-LABEL: ps_mesa_v5f32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_add_f32 v3, -1.0, v3 :: v_dual_add_f32 v2, 4.0, v2
+; GFX1250-NEXT:    v_dual_add_f32 v1, 2.0, v1 :: v_dual_add_f32 v4, 0.5, v4
+; GFX1250-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b32 v[0:1], v4, off
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX1250-NEXT:    s_endpgm
   %add = fadd <5 x float> %arg0, <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>
   store <5 x float> %add, ptr addrspace(1) poison
   ret void
@@ -987,6 +1258,18 @@ define amdgpu_ps void @ps_mesa_i16(i16 %arg0) {
 ; GFX11-FAKE16-NEXT:    v_add_nc_u16 v0, v0, v0
 ; GFX11-FAKE16-NEXT:    global_store_b16 v[0:1], v0, off
 ; GFX11-FAKE16-NEXT:    s_endpgm
+;
+; GFX1250-TRUE16-LABEL: ps_mesa_i16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, v0.l
+; GFX1250-TRUE16-NEXT:    flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT:    s_endpgm
+;
+; GFX1250-FAKE16-LABEL: ps_mesa_i16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v0, v0, v0
+; GFX1250-FAKE16-NEXT:    global_store_b16 v[0:1], v0, off
+; GFX1250-FAKE16-NEXT:    s_endpgm
   %add = add i16 %arg0, %arg0
   store i16 %add, ptr addrspace(1) poison
   r...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/155314


More information about the llvm-commits mailing list