[llvm] [AMDGPU][CodeGen] enable D16Writes32BitVgpr for gfx12 (PR #165587)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 7 12:01:24 PST 2025
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/165587
>From 8039d1856749ec713eb9374c394af3855f2bd7b0 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 28 Oct 2025 14:35:17 -0400
Subject: [PATCH] set D16 HW fix for gfx12
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 2 +
llvm/test/CodeGen/AMDGPU/spillv16.ll | 528 ++++++++++++++++++++++-----
2 files changed, 445 insertions(+), 85 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 54d94b1f8682e..0b61adf409948 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2069,6 +2069,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
+ FeatureD16Writes32BitVgpr
]>;
def FeatureISAVersion12_50 : FeatureSet<
@@ -2143,6 +2144,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureSupportsXNACK,
FeatureXNACK,
FeatureClusters,
+ FeatureD16Writes32BitVgpr,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll
index 2d54ac8283a3a..16a7bf9bc91dd 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.ll
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-TRUE16
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250,GFX1250-FAKE16
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16,+d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-TRUE16-D16W32
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-FAKE16
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16,-d16-write-vgpr32 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GFX1250-TRUE16-D16W16
+
define void @spill_i16_alu() {
; GCN-TRUE16-LABEL: spill_i16_alu:
@@ -35,23 +37,23 @@ define void @spill_i16_alu() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-TRUE16-LABEL: spill_i16_alu:
-; GFX1250-TRUE16: ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
-; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT: ;;#ASMSTART
-; GFX1250-TRUE16-NEXT: ;;#ASMEND
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_alu:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: spill_i16_alu:
; GFX1250-FAKE16: ; %bb.0: ; %entry
@@ -69,6 +71,41 @@ define void @spill_i16_alu() {
; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_alu:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_i16_alu:
+; GFX1250-TRUE16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
entry:
%alloca = alloca i16, i32 1, align 4, addrspace(5)
@@ -126,28 +163,28 @@ define void @spill_i16_alu_two_vals() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
-; GFX1250-TRUE16: ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
-; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT: ;;#ASMSTART
-; GFX1250-TRUE16-NEXT: ;;#ASMEND
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
-; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
-; GFX1250-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: spill_i16_alu_two_vals:
; GFX1250-FAKE16: ; %bb.0: ; %entry
@@ -170,6 +207,51 @@ define void @spill_i16_alu_two_vals() {
; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_i16_alu_two_vals:
+; GFX1250-TRUE16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
entry:
%alloca = alloca i16, i32 1, align 4, addrspace(5)
%alloca2 = alloca i16, i32 1, align 4, addrspace(5)
@@ -223,6 +305,53 @@ define void @spill_i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_i16:
+; GFX1250-FAKE16: ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT: ;;#ASMSTART
+; GFX1250-FAKE16-NEXT: ;;#ASMEND
+; GFX1250-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-LABEL: spill_i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -282,6 +411,53 @@ define void @spill_half() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-TRUE16-D16W32-LABEL: spill_half:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_half:
+; GFX1250-FAKE16: ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT: ;;#ASMSTART
+; GFX1250-FAKE16-NEXT: ;;#ASMEND
+; GFX1250-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_half:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-LABEL: spill_half:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -341,6 +517,53 @@ define void @spill_i16_from_v2i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-TRUE16-D16W32-LABEL: spill_i16_from_v2i16:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_i16_from_v2i16:
+; GFX1250-FAKE16: ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT: ;;#ASMSTART
+; GFX1250-FAKE16-NEXT: ;;#ASMEND
+; GFX1250-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_i16_from_v2i16:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-LABEL: spill_i16_from_v2i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -414,29 +637,29 @@ define void @spill_2xi16_from_v2i16() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
-; GFX1250-TRUE16: ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: s_clause 0x1
-; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
-; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT: ;;#ASMSTART
-; GFX1250-TRUE16-NEXT: ;;#ASMEND
-; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_clause 0x1 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: spill_2xi16_from_v2i16:
; GFX1250-FAKE16: ; %bb.0: ; %entry
@@ -444,7 +667,7 @@ define void @spill_2xi16_from_v2i16() {
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8
; GFX1250-FAKE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
@@ -461,6 +684,53 @@ define void @spill_2xi16_from_v2i16() {
; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_clause 0x1 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16:
+; GFX1250-TRUE16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
@@ -520,26 +790,26 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
-; GFX1250-TRUE16: ; %bb.0: ; %entry
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
-; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
-; GFX1250-TRUE16-NEXT: ;;#ASMSTART
-; GFX1250-TRUE16-NEXT: ;;#ASMEND
-; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.l
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
-; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
-; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-D16W32-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
;
; GFX1250-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
; GFX1250-FAKE16: ; %bb.0: ; %entry
@@ -560,6 +830,47 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
; GFX1250-FAKE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg:
+; GFX1250-TRUE16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, v7.l
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
+; GFX1250-TRUE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
entry:
%alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5)
@@ -595,6 +906,53 @@ define void @spill_v2i16() {
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
+; GFX1250-TRUE16-D16W32-LABEL: spill_v2i16:
+; GFX1250-TRUE16-D16W32: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W32-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W32-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W32-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W32-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: spill_v2i16:
+; GFX1250-FAKE16: ; %bb.0: ; %entry
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-FAKE16-NEXT: ;;#ASMSTART
+; GFX1250-FAKE16-NEXT: ;;#ASMEND
+; GFX1250-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-FAKE16-NEXT: s_wait_storecnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-TRUE16-D16W16-LABEL: spill_v2i16:
+; GFX1250-TRUE16-D16W16: ; %bb.0: ; %entry
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_xcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMSTART
+; GFX1250-TRUE16-D16W16-NEXT: ;;#ASMEND
+; GFX1250-TRUE16-D16W16-NEXT: scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
+; GFX1250-TRUE16-D16W16-NEXT: s_wait_storecnt 0x0
+; GFX1250-TRUE16-D16W16-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-LABEL: spill_v2i16:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
More information about the llvm-commits
mailing list