[llvm] [AMDGPU][CodeGen] enable D16Writes32BitVgpr for gfx12 (PR #165587)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 7 12:01:24 PST 2025


https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/165587

>From 8039d1856749ec713eb9374c394af3855f2bd7b0 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 28 Oct 2025 14:35:17 -0400
Subject: [PATCH] set D16 HW fix for gfx12

---
 llvm/lib/Target/AMDGPU/AMDGPU.td     |   2 +
 llvm/test/CodeGen/AMDGPU/spillv16.ll | 528 ++++++++++++++++++++++-----
 2 files changed, 445 insertions(+), 85 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 54d94b1f8682e..0b61adf409948 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2069,6 +2069,7 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureMemoryAtomicFAddF32DenormalSupport,
    FeatureBVHDualAndBVH8Insts,
    FeatureWaitsBeforeSystemScopeStores,
+   FeatureD16Writes32BitVgpr
    ]>;
 
 def FeatureISAVersion12_50 : FeatureSet<
@@ -2143,6 +2144,7 @@ def FeatureISAVersion12_50 : FeatureSet<
    FeatureSupportsXNACK,
    FeatureXNACK,
    FeatureClusters,
+   FeatureD16Writes32BitVgpr,
 ]>;
 
 def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll
index 2d54ac8283a3a..16a7bf9bc91dd 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.ll
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-TRUE16
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-FAKE16
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16,+d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-TRUE16-D16W32
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-FAKE16
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16,-d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-TRUE16-D16W16
+
 
 define void @spill_i16_alu() {
 ; GCN-TRUE16-LABEL: spill_i16_alu:
@@ -35,23 +37,23 @@ define void @spill_i16_alu() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-TRUE16-LABEL: spill_i16_alu:
-; GFX1250-TRUE16:       ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
-; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
-; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_alu:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; GFX1250-FAKE16-LABEL: spill_i16_alu:
 ; GFX1250-FAKE16:       ; %bb.0: ; %entry
@@ -69,6 +71,41 @@ define void @spill_i16_alu() {
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_alu:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_i16_alu:
+; GFX1250-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 entry:
   %alloca = alloca i16, i32 1, align 4, addrspace(5)
 
@@ -126,28 +163,28 @@ define void @spill_i16_alu_two_vals() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
-; GFX1250-TRUE16:       ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
-; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
-; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
-; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
-; GFX1250-TRUE16-NEXT:    scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; GFX1250-FAKE16-LABEL: spill_i16_alu_two_vals:
 ; GFX1250-FAKE16:       ; %bb.0: ; %entry
@@ -170,6 +207,51 @@ define void @spill_i16_alu_two_vals() {
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-NEXT:    scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 entry:
   %alloca = alloca i16, i32 1, align 4, addrspace(5)
   %alloca2 = alloca i16, i32 1, align 4, addrspace(5)
@@ -223,6 +305,53 @@ define void @spill_i16() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_i16:
+; GFX1250-FAKE16:       ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
+; GFX1250-FAKE16-NEXT:    ;;#ASMEND
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
 ; GFX1250-LABEL: spill_i16:
 ; GFX1250:       ; %bb.0: ; %entry
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -282,6 +411,53 @@ define void @spill_half() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX1250-TRUE16-D16W32-LABEL: spill_half:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_half:
+; GFX1250-FAKE16:       ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
+; GFX1250-FAKE16-NEXT:    ;;#ASMEND
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_half:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
 ; GFX1250-LABEL: spill_half:
 ; GFX1250:       ; %bb.0: ; %entry
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -341,6 +517,53 @@ define void @spill_i16_from_v2i16() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_from_v2i16:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_i16_from_v2i16:
+; GFX1250-FAKE16:       ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
+; GFX1250-FAKE16-NEXT:    ;;#ASMEND
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_from_v2i16:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
 ; GFX1250-LABEL: spill_i16_from_v2i16:
 ; GFX1250:       ; %bb.0: ; %entry
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -414,29 +637,29 @@ define void @spill_2xi16_from_v2i16() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
-; GFX1250-TRUE16:       ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    s_clause 0x1
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
-; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
-; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_clause 0x1 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; GFX1250-FAKE16-LABEL: spill_2xi16_from_v2i16:
 ; GFX1250-FAKE16:       ; %bb.0: ; %entry
@@ -444,7 +667,7 @@ define void @spill_2xi16_from_v2i16() {
 ; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT:    s_clause 0x1
+; GFX1250-FAKE16-NEXT:    s_clause 0x1 ; 4-byte Folded Spill
 ; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
@@ -461,6 +684,53 @@ define void @spill_2xi16_from_v2i16() {
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_clause 0x1 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    s_clause 0x1
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 entry:
   %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
 
@@ -520,26 +790,26 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
 ; GCN-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
-; GFX1250-TRUE16:       ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
-; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
-; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v7.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; GFX1250-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
 ; GFX1250-FAKE16:       ; %bb.0: ; %entry
@@ -560,6 +830,47 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 entry:
   %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
 
@@ -595,6 +906,53 @@ define void @spill_v2i16() {
 ; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX1250-TRUE16-D16W32-LABEL: spill_v2i16:
+; GFX1250-TRUE16-D16W32:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_v2i16:
+; GFX1250-FAKE16:       ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
+; GFX1250-FAKE16-NEXT:    ;;#ASMEND
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_v2i16:
+; GFX1250-TRUE16-D16W16:       ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT:    ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT:    s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT:    s_set_pc_i64 s[30:31]
 ; GFX1250-LABEL: spill_v2i16:
 ; GFX1250:       ; %bb.0: ; %entry
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0



More information about the llvm-commits mailing list