[llvm] e264cff - [NFC][AMDGPU] Update tests to use autogened CHECKs (#140648)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 19 19:47:45 PDT 2025
Author: Chinmay Deshpande
Date: 2025-05-19T19:47:40-07:00
New Revision: e264cff6fd676784adf1e436747259548ba708f8
URL: https://github.com/llvm/llvm-project/commit/e264cff6fd676784adf1e436747259548ba708f8
DIFF: https://github.com/llvm/llvm-project/commit/e264cff6fd676784adf1e436747259548ba708f8.diff
LOG: [NFC][AMDGPU] Update tests to use autogened CHECKs (#140648)
Added:
Modified:
llvm/test/CodeGen/AMDGPU/fceil64.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
llvm/test/CodeGen/AMDGPU/scratch-simple.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll
index f34a64c470c4e..367bbe7eb68e9 100644
--- a/llvm/test/CodeGen/AMDGPU/fceil64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=CI -check-prefix=FUNC %s
@@ -9,36 +10,98 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
-; FUNC-LABEL: {{^}}fceil_f64:
-; CI: v_ceil_f64_e32
-; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01
-; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]]
-; SI-DAG: s_andn2_b64
-; SI-DAG: cmp_gt_i32
-; SI-DAG: s_cselect_b32
-; SI-DAG: s_cselect_b32
-; SI-DAG: cmp_lt_i32
-; SI-DAG: s_cselect_b32
-; SI-DAG: s_cselect_b32
-; SI-DAG: v_cmp_gt_f64_e64 [[FCMP:s[[0-9]+:[0-9]+]]]
-; SI-DAG: v_cmp_lg_f64_e32 vcc
-; SI-DAG: s_and_b64 [[AND1:s[[0-9]+:[0-9]+]]], [[FCMP]], vcc
-; SI-DAG: s_and_b64 [[AND1]], [[AND1]], exec
-; SI-DAG: s_cselect_b32 s{{[0-9]+}}, 0x3ff00000, 0
-; SI: v_add_f64
-; SI: s_endpgm
define amdgpu_kernel void @fceil_f64(ptr addrspace(1) %out, double %x) {
+; SI-LABEL: fceil_f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s5, 0xfffff
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfe_u32 s8, s3, 0xb0014
+; SI-NEXT: s_and_b32 s9, s3, 0x80000000
+; SI-NEXT: s_addk_i32 s8, 0xfc01
+; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
+; SI-NEXT: s_andn2_b64 s[4:5], s[2:3], s[4:5]
+; SI-NEXT: s_cmp_lt_i32 s8, 0
+; SI-NEXT: s_cselect_b32 s4, 0, s4
+; SI-NEXT: s_cselect_b32 s5, s9, s5
+; SI-NEXT: s_cmp_gt_i32 s8, 51
+; SI-NEXT: s_cselect_b32 s9, s3, s5
+; SI-NEXT: s_cselect_b32 s8, s2, s4
+; SI-NEXT: v_cmp_gt_f64_e64 s[4:5], s[2:3], 0
+; SI-NEXT: v_mov_b32_e32 v0, s8
+; SI-NEXT: v_mov_b32_e32 v1, s9
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT: s_and_b64 s[2:3], s[4:5], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: v_mov_b32_e32 v1, s2
+; SI-NEXT: v_add_f64 v[0:1], s[8:9], v[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT: s_endpgm
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
store double %y, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fceil_v2f64:
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
define amdgpu_kernel void @fceil_v2f64(ptr addrspace(1) %out, <2 x double> %x) {
+; SI-LABEL: fceil_v2f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_mov_b32 s9, 0xfffff
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfe_u32 s10, s7, 0xb0014
+; SI-NEXT: s_and_b32 s12, s7, 0x80000000
+; SI-NEXT: s_add_i32 s13, s10, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[10:11], s[8:9], s13
+; SI-NEXT: s_andn2_b64 s[10:11], s[6:7], s[10:11]
+; SI-NEXT: s_cmp_lt_i32 s13, 0
+; SI-NEXT: s_cselect_b32 s10, 0, s10
+; SI-NEXT: s_cselect_b32 s11, s12, s11
+; SI-NEXT: s_cmp_gt_i32 s13, 51
+; SI-NEXT: s_cselect_b32 s11, s7, s11
+; SI-NEXT: s_cselect_b32 s10, s6, s10
+; SI-NEXT: v_cmp_gt_f64_e64 s[12:13], s[6:7], 0
+; SI-NEXT: v_mov_b32_e32 v1, s10
+; SI-NEXT: v_mov_b32_e32 v2, s11
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[6:7], v[1:2]
+; SI-NEXT: s_and_b64 s[6:7], s[12:13], vcc
+; SI-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; SI-NEXT: s_cselect_b32 s12, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
+; SI-NEXT: s_and_b32 s13, s5, 0x80000000
+; SI-NEXT: s_add_i32 s14, s6, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[6:7], s[8:9], s14
+; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
+; SI-NEXT: s_cmp_lt_i32 s14, 0
+; SI-NEXT: s_cselect_b32 s6, 0, s6
+; SI-NEXT: s_cselect_b32 s7, s13, s7
+; SI-NEXT: s_cmp_gt_i32 s14, 51
+; SI-NEXT: s_cselect_b32 s7, s5, s7
+; SI-NEXT: s_cselect_b32 s6, s4, s6
+; SI-NEXT: v_cmp_gt_f64_e64 s[8:9], s[4:5], 0
+; SI-NEXT: v_mov_b32_e32 v1, s12
+; SI-NEXT: v_mov_b32_e32 v2, s6
+; SI-NEXT: v_mov_b32_e32 v3, s7
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[2:3]
+; SI-NEXT: s_and_b64 s[4:5], s[8:9], vcc
+; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: v_add_f64 v[2:3], s[10:11], v[0:1]
+; SI-NEXT: v_mov_b32_e32 v1, s4
+; SI-NEXT: v_add_f64 v[0:1], s[6:7], v[0:1]
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: s_endpgm
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
store <2 x double> %y, ptr addrspace(1) %out
ret void
@@ -54,51 +117,640 @@ define amdgpu_kernel void @fceil_v2f64(ptr addrspace(1) %out, <2 x double> %x) {
; ret void
; }
-; FUNC-LABEL: {{^}}fceil_v4f64:
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
define amdgpu_kernel void @fceil_v4f64(ptr addrspace(1) %out, <4 x double> %x) {
+; SI-LABEL: fceil_v4f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x11
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s13, 0xfffff
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: s_mov_b32 s12, s10
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfe_u32 s18, s3, 0xb0014
+; SI-NEXT: s_and_b32 s20, s3, 0x80000000
+; SI-NEXT: v_cmp_gt_f64_e64 s[14:15], s[2:3], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[16:17], s[0:1], 0
+; SI-NEXT: s_add_i32 s21, s18, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[18:19], s[12:13], s21
+; SI-NEXT: s_andn2_b64 s[18:19], s[2:3], s[18:19]
+; SI-NEXT: s_cmp_lt_i32 s21, 0
+; SI-NEXT: s_cselect_b32 s18, 0, s18
+; SI-NEXT: s_cselect_b32 s19, s20, s19
+; SI-NEXT: s_cmp_gt_i32 s21, 51
+; SI-NEXT: s_cselect_b32 s19, s3, s19
+; SI-NEXT: s_cselect_b32 s18, s2, s18
+; SI-NEXT: v_cmp_gt_f64_e64 s[20:21], s[6:7], 0
+; SI-NEXT: v_mov_b32_e32 v1, s18
+; SI-NEXT: v_mov_b32_e32 v2, s19
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[2:3], v[1:2]
+; SI-NEXT: s_and_b64 s[2:3], s[14:15], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s22, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s2, s1, 0xb0014
+; SI-NEXT: s_and_b32 s14, s1, 0x80000000
+; SI-NEXT: s_add_i32 s15, s2, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[12:13], s15
+; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s15, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s14, s3
+; SI-NEXT: s_cmp_gt_i32 s15, 51
+; SI-NEXT: s_cselect_b32 s3, s1, s3
+; SI-NEXT: s_cselect_b32 s2, s0, s2
+; SI-NEXT: v_cmp_gt_f64_e64 s[14:15], s[4:5], 0
+; SI-NEXT: v_mov_b32_e32 v1, s22
+; SI-NEXT: v_mov_b32_e32 v2, s2
+; SI-NEXT: v_mov_b32_e32 v3, s3
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[0:1], v[2:3]
+; SI-NEXT: s_and_b64 s[0:1], s[16:17], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s16, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014
+; SI-NEXT: s_and_b32 s17, s7, 0x80000000
+; SI-NEXT: s_add_i32 s22, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[12:13], s22
+; SI-NEXT: s_andn2_b64 s[0:1], s[6:7], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s22, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s17, s1
+; SI-NEXT: s_cmp_gt_i32 s22, 51
+; SI-NEXT: s_cselect_b32 s1, s7, s1
+; SI-NEXT: s_cselect_b32 s0, s6, s0
+; SI-NEXT: v_add_f64 v[4:5], s[18:19], v[0:1]
+; SI-NEXT: v_mov_b32_e32 v1, s16
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: v_mov_b32_e32 v2, s0
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[6:7], v[2:3]
+; SI-NEXT: s_and_b64 s[6:7], s[20:21], vcc
+; SI-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; SI-NEXT: s_cselect_b32 s16, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s6, s5, 0xb0014
+; SI-NEXT: s_and_b32 s17, s5, 0x80000000
+; SI-NEXT: s_add_i32 s18, s6, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[6:7], s[12:13], s18
+; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
+; SI-NEXT: s_cmp_lt_i32 s18, 0
+; SI-NEXT: s_cselect_b32 s6, 0, s6
+; SI-NEXT: s_cselect_b32 s7, s17, s7
+; SI-NEXT: s_cmp_gt_i32 s18, 51
+; SI-NEXT: s_cselect_b32 s7, s5, s7
+; SI-NEXT: s_cselect_b32 s6, s4, s6
+; SI-NEXT: v_add_f64 v[2:3], s[2:3], v[0:1]
+; SI-NEXT: v_mov_b32_e32 v1, s16
+; SI-NEXT: v_mov_b32_e32 v6, s6
+; SI-NEXT: v_mov_b32_e32 v7, s7
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[6:7]
+; SI-NEXT: s_and_b64 s[2:3], s[14:15], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: v_add_f64 v[8:9], s[0:1], v[0:1]
+; SI-NEXT: v_mov_b32_e32 v1, s2
+; SI-NEXT: v_add_f64 v[6:7], s[6:7], v[0:1]
+; SI-NEXT: buffer_store_dwordx4 v[6:9], off, s[8:11], 0 offset:16
+; SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[8:11], 0
+; SI-NEXT: s_endpgm
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
store <4 x double> %y, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fceil_v8f64:
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
define amdgpu_kernel void @fceil_v8f64(ptr addrspace(1) %out, <8 x double> %x) {
+; SI-LABEL: fceil_v8f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x19
+; SI-NEXT: s_mov_b32 s19, 0xf000
+; SI-NEXT: s_mov_b32 s18, -1
+; SI-NEXT: s_mov_b32 s21, 0xfffff
+; SI-NEXT: v_mov_b32_e32 v4, 0
+; SI-NEXT: s_mov_b32 s20, s18
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfe_u32 s33, s3, 0xb0014
+; SI-NEXT: s_and_b32 s40, s3, 0x80000000
+; SI-NEXT: v_cmp_gt_f64_e64 s[22:23], s[2:3], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[26:27], s[0:1], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[30:31], s[6:7], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[36:37], s[4:5], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[24:25], s[10:11], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[28:29], s[8:9], 0
+; SI-NEXT: v_cmp_gt_f64_e64 s[34:35], s[14:15], 0
+; SI-NEXT: s_addk_i32 s33, 0xfc01
+; SI-NEXT: s_lshr_b64 s[38:39], s[20:21], s33
+; SI-NEXT: s_andn2_b64 s[38:39], s[2:3], s[38:39]
+; SI-NEXT: s_cmp_lt_i32 s33, 0
+; SI-NEXT: s_cselect_b32 s38, 0, s38
+; SI-NEXT: s_cselect_b32 s39, s40, s39
+; SI-NEXT: s_cmp_gt_i32 s33, 51
+; SI-NEXT: s_cselect_b32 s41, s3, s39
+; SI-NEXT: s_cselect_b32 s40, s2, s38
+; SI-NEXT: v_cmp_gt_f64_e64 s[38:39], s[12:13], 0
+; SI-NEXT: v_mov_b32_e32 v0, s40
+; SI-NEXT: v_mov_b32_e32 v1, s41
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT: s_and_b64 s[2:3], s[22:23], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s3, s1, 0xb0014
+; SI-NEXT: s_and_b32 s22, s1, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s2
+; SI-NEXT: s_add_i32 s23, s3, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s23
+; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s23, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s22, s3
+; SI-NEXT: s_cmp_gt_i32 s23, 51
+; SI-NEXT: s_cselect_b32 s3, s1, s3
+; SI-NEXT: s_cselect_b32 s2, s0, s2
+; SI-NEXT: v_add_f64 v[2:3], s[40:41], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v0, s2
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[0:1], v[0:1]
+; SI-NEXT: s_and_b64 s[0:1], s[26:27], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s0, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s1, s7, 0xb0014
+; SI-NEXT: s_and_b32 s22, s7, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s0
+; SI-NEXT: s_add_i32 s23, s1, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[20:21], s23
+; SI-NEXT: s_andn2_b64 s[0:1], s[6:7], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s23, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s22, s1
+; SI-NEXT: s_cmp_gt_i32 s23, 51
+; SI-NEXT: s_cselect_b32 s1, s7, s1
+; SI-NEXT: s_cselect_b32 s0, s6, s0
+; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v6, s1
+; SI-NEXT: v_mov_b32_e32 v5, s0
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[6:7], v[5:6]
+; SI-NEXT: s_and_b64 s[2:3], s[30:31], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s3, s5, 0xb0014
+; SI-NEXT: s_and_b32 s6, s5, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s2
+; SI-NEXT: s_add_i32 s7, s3, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s7
+; SI-NEXT: s_andn2_b64 s[2:3], s[4:5], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s7, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s6, s3
+; SI-NEXT: s_cmp_gt_i32 s7, 51
+; SI-NEXT: s_cselect_b32 s3, s5, s3
+; SI-NEXT: s_cselect_b32 s2, s4, s2
+; SI-NEXT: v_add_f64 v[7:8], s[0:1], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v6, s3
+; SI-NEXT: v_mov_b32_e32 v5, s2
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[5:6]
+; SI-NEXT: s_and_b64 s[0:1], s[36:37], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s0, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s1, s11, 0xb0014
+; SI-NEXT: s_and_b32 s4, s11, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s0
+; SI-NEXT: v_add_f64 v[5:6], s[2:3], v[4:5]
+; SI-NEXT: s_add_i32 s2, s1, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[20:21], s2
+; SI-NEXT: s_andn2_b64 s[0:1], s[10:11], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s2, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s4, s1
+; SI-NEXT: s_cmp_gt_i32 s2, 51
+; SI-NEXT: s_cselect_b32 s1, s11, s1
+; SI-NEXT: s_cselect_b32 s0, s10, s0
+; SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[16:19], 0 offset:16
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v6, s1
+; SI-NEXT: v_mov_b32_e32 v5, s0
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[10:11], v[5:6]
+; SI-NEXT: s_and_b64 s[2:3], s[24:25], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s3, s9, 0xb0014
+; SI-NEXT: s_and_b32 s4, s9, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s2
+; SI-NEXT: s_add_i32 s5, s3, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s5
+; SI-NEXT: s_andn2_b64 s[2:3], s[8:9], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s5, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s4, s3
+; SI-NEXT: s_cmp_gt_i32 s5, 51
+; SI-NEXT: s_cselect_b32 s3, s9, s3
+; SI-NEXT: s_cselect_b32 s2, s8, s2
+; SI-NEXT: v_add_f64 v[7:8], s[0:1], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v6, s3
+; SI-NEXT: v_mov_b32_e32 v5, s2
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[8:9], v[5:6]
+; SI-NEXT: s_and_b64 s[0:1], s[28:29], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s0, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s1, s15, 0xb0014
+; SI-NEXT: s_and_b32 s4, s15, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v5, s0
+; SI-NEXT: s_add_i32 s5, s1, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[20:21], s5
+; SI-NEXT: s_andn2_b64 s[0:1], s[14:15], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s5, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s4, s1
+; SI-NEXT: s_cmp_gt_i32 s5, 51
+; SI-NEXT: s_cselect_b32 s1, s15, s1
+; SI-NEXT: s_cselect_b32 s0, s14, s0
+; SI-NEXT: v_add_f64 v[5:6], s[2:3], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v10, s1
+; SI-NEXT: v_mov_b32_e32 v9, s0
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[14:15], v[9:10]
+; SI-NEXT: s_and_b64 s[2:3], s[34:35], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s2, s13, 0xb0014
+; SI-NEXT: s_and_b32 s5, s13, 0x80000000
+; SI-NEXT: s_add_i32 s6, s2, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[20:21], s6
+; SI-NEXT: s_andn2_b64 s[2:3], s[12:13], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s6, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s5, s3
+; SI-NEXT: s_cmp_gt_i32 s6, 51
+; SI-NEXT: s_cselect_b32 s3, s13, s3
+; SI-NEXT: s_cselect_b32 s2, s12, s2
+; SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[16:19], 0 offset:32
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, s4
+; SI-NEXT: v_mov_b32_e32 v7, s3
+; SI-NEXT: v_mov_b32_e32 v6, s2
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[12:13], v[6:7]
+; SI-NEXT: s_and_b64 s[4:5], s[38:39], vcc
+; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: v_add_f64 v[6:7], s[0:1], v[4:5]
+; SI-NEXT: v_mov_b32_e32 v5, s4
+; SI-NEXT: v_add_f64 v[4:5], s[2:3], v[4:5]
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:48
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0
+; SI-NEXT: s_endpgm
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
store <8 x double> %y, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fceil_v16f64:
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
-; CI: v_ceil_f64_e32
define amdgpu_kernel void @fceil_v16f64(ptr addrspace(1) %out, <16 x double> %x) {
+; SI-LABEL: fceil_v16f64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x29
+; SI-NEXT: s_mov_b32 s26, -1
+; SI-NEXT: s_mov_b32 s29, 0xfffff
+; SI-NEXT: s_load_dwordx2 s[24:25], s[4:5], 0x9
+; SI-NEXT: v_mov_b32_e32 v8, 0
+; SI-NEXT: s_mov_b32 s28, s26
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfe_u32 s0, s11, 0xb0014
+; SI-NEXT: s_and_b32 s2, s11, 0x80000000
+; SI-NEXT: s_add_i32 s3, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s3
+; SI-NEXT: s_andn2_b64 s[0:1], s[10:11], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s3, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s2, s1
+; SI-NEXT: s_cmp_gt_i32 s3, 51
+; SI-NEXT: s_cselect_b32 s3, s11, s1
+; SI-NEXT: s_cselect_b32 s2, s10, s0
+; SI-NEXT: v_cmp_gt_f64_e64 s[0:1], s[10:11], 0
+; SI-NEXT: v_mov_b32_e32 v0, s2
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[10:11], v[0:1]
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s10, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s9, 0xb0014
+; SI-NEXT: s_and_b32 s6, s9, 0x80000000
+; SI-NEXT: s_add_i32 s7, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s7
+; SI-NEXT: s_andn2_b64 s[0:1], s[8:9], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s7, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s6, s1
+; SI-NEXT: s_cmp_gt_i32 s7, 51
+; SI-NEXT: s_cselect_b32 s7, s9, s1
+; SI-NEXT: s_cselect_b32 s6, s8, s0
+; SI-NEXT: v_cmp_gt_f64_e64 s[0:1], s[8:9], 0
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: v_mov_b32_e32 v1, s7
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[8:9], v[0:1]
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s27, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s15, 0xb0014
+; SI-NEXT: s_and_b32 s8, s15, 0x80000000
+; SI-NEXT: s_add_i32 s9, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s9
+; SI-NEXT: s_andn2_b64 s[0:1], s[14:15], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s9, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s8, s1
+; SI-NEXT: s_cmp_gt_i32 s9, 51
+; SI-NEXT: s_cselect_b32 s9, s15, s1
+; SI-NEXT: s_cselect_b32 s8, s14, s0
+; SI-NEXT: v_cmp_gt_f64_e64 s[0:1], s[14:15], 0
+; SI-NEXT: v_mov_b32_e32 v9, s10
+; SI-NEXT: v_mov_b32_e32 v0, s8
+; SI-NEXT: v_mov_b32_e32 v1, s9
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[14:15], v[0:1]
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s14, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s13, 0xb0014
+; SI-NEXT: s_and_b32 s10, s13, 0x80000000
+; SI-NEXT: s_add_i32 s15, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s15
+; SI-NEXT: s_andn2_b64 s[0:1], s[12:13], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s15, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s10, s1
+; SI-NEXT: v_cmp_gt_f64_e64 s[10:11], s[12:13], 0
+; SI-NEXT: s_cmp_gt_i32 s15, 51
+; SI-NEXT: s_cselect_b32 s1, s13, s1
+; SI-NEXT: s_cselect_b32 s0, s12, s0
+; SI-NEXT: v_add_f64 v[2:3], s[2:3], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s27
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[12:13], v[0:1]
+; SI-NEXT: s_and_b64 s[2:3], s[10:11], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s10, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s2, s19, 0xb0014
+; SI-NEXT: s_and_b32 s11, s19, 0x80000000
+; SI-NEXT: s_add_i32 s12, s2, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[28:29], s12
+; SI-NEXT: s_andn2_b64 s[2:3], s[18:19], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s12, 0
+; SI-NEXT: s_cselect_b32 s13, 0, s2
+; SI-NEXT: s_cselect_b32 s11, s11, s3
+; SI-NEXT: v_cmp_gt_f64_e64 s[2:3], s[18:19], 0
+; SI-NEXT: s_cmp_gt_i32 s12, 51
+; SI-NEXT: s_cselect_b32 s31, s19, s11
+; SI-NEXT: s_cselect_b32 s30, s18, s13
+; SI-NEXT: v_add_f64 v[0:1], s[6:7], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s14
+; SI-NEXT: v_mov_b32_e32 v4, s30
+; SI-NEXT: v_mov_b32_e32 v5, s31
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[18:19], v[4:5]
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s33, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s2, s17, 0xb0014
+; SI-NEXT: s_and_b32 s6, s17, 0x80000000
+; SI-NEXT: s_add_i32 s7, s2, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[28:29], s7
+; SI-NEXT: s_andn2_b64 s[2:3], s[16:17], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s7, 0
+; SI-NEXT: s_cselect_b32 s11, 0, s2
+; SI-NEXT: s_cselect_b32 s6, s6, s3
+; SI-NEXT: v_cmp_gt_f64_e64 s[2:3], s[16:17], 0
+; SI-NEXT: s_cmp_gt_i32 s7, 51
+; SI-NEXT: s_cselect_b32 s19, s17, s6
+; SI-NEXT: s_cselect_b32 s18, s16, s11
+; SI-NEXT: v_add_f64 v[6:7], s[8:9], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s10
+; SI-NEXT: v_mov_b32_e32 v4, s18
+; SI-NEXT: v_mov_b32_e32 v5, s19
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[16:17], v[4:5]
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s36, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s2, s23, 0xb0014
+; SI-NEXT: s_and_b32 s6, s23, 0x80000000
+; SI-NEXT: s_add_i32 s7, s2, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[28:29], s7
+; SI-NEXT: s_andn2_b64 s[2:3], s[22:23], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s7, 0
+; SI-NEXT: s_cselect_b32 s6, s6, s3
+; SI-NEXT: s_cselect_b32 s8, 0, s2
+; SI-NEXT: v_cmp_gt_f64_e64 s[2:3], s[22:23], 0
+; SI-NEXT: s_cmp_gt_i32 s7, 51
+; SI-NEXT: s_cselect_b32 s35, s23, s6
+; SI-NEXT: s_cselect_b32 s34, s22, s8
+; SI-NEXT: v_add_f64 v[4:5], s[0:1], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s34
+; SI-NEXT: v_mov_b32_e32 v10, s35
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[22:23], v[9:10]
+; SI-NEXT: s_and_b64 s[0:1], s[2:3], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s37, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s21, 0xb0014
+; SI-NEXT: s_and_b32 s2, s21, 0x80000000
+; SI-NEXT: s_add_i32 s3, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s3
+; SI-NEXT: s_andn2_b64 s[0:1], s[20:21], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s3, 0
+; SI-NEXT: s_cselect_b32 s1, s2, s1
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cmp_gt_i32 s3, 51
+; SI-NEXT: s_cselect_b32 s17, s21, s1
+; SI-NEXT: s_cselect_b32 s16, s20, s0
+; SI-NEXT: v_cmp_gt_f64_e64 s[22:23], s[20:21], 0
+; SI-NEXT: v_mov_b32_e32 v9, s16
+; SI-NEXT: v_mov_b32_e32 v10, s17
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[20:21], v[9:10]
+; SI-NEXT: s_load_dwordx16 s[0:15], s[4:5], 0x39
+; SI-NEXT: s_mov_b32 s27, 0xf000
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:16
+; SI-NEXT: v_mov_b32_e32 v9, s33
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_add_f64 v[6:7], s[30:31], v[8:9]
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_cmp_gt_f64_e64 s[20:21], s[2:3], 0
+; SI-NEXT: v_mov_b32_e32 v9, s36
+; SI-NEXT: v_cmp_gt_f64_e64 s[30:31], s[0:1], 0
+; SI-NEXT: v_add_f64 v[4:5], s[18:19], v[8:9]
+; SI-NEXT: v_cmp_gt_f64_e64 s[18:19], s[6:7], 0
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:32
+; SI-NEXT: v_mov_b32_e32 v9, s37
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_add_f64 v[6:7], s[34:35], v[8:9]
+; SI-NEXT: v_cmp_gt_f64_e64 s[34:35], s[4:5], 0
+; SI-NEXT: s_and_b64 s[22:23], s[22:23], vcc
+; SI-NEXT: s_and_b64 s[22:23], s[22:23], exec
+; SI-NEXT: s_cselect_b32 s22, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s23, s3, 0xb0014
+; SI-NEXT: s_and_b32 s33, s3, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s22
+; SI-NEXT: s_add_i32 s36, s23, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[22:23], s[28:29], s36
+; SI-NEXT: s_andn2_b64 s[22:23], s[2:3], s[22:23]
+; SI-NEXT: s_cmp_lt_i32 s36, 0
+; SI-NEXT: s_cselect_b32 s38, 0, s22
+; SI-NEXT: s_cselect_b32 s33, s33, s23
+; SI-NEXT: v_cmp_gt_f64_e64 s[22:23], s[10:11], 0
+; SI-NEXT: s_cmp_gt_i32 s36, 51
+; SI-NEXT: s_cselect_b32 s37, s3, s33
+; SI-NEXT: s_cselect_b32 s36, s2, s38
+; SI-NEXT: v_add_f64 v[4:5], s[16:17], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s36
+; SI-NEXT: v_mov_b32_e32 v10, s37
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[2:3], v[9:10]
+; SI-NEXT: s_and_b64 s[2:3], s[20:21], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s3, s1, 0xb0014
+; SI-NEXT: s_and_b32 s16, s1, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s2
+; SI-NEXT: s_add_i32 s17, s3, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[28:29], s17
+; SI-NEXT: s_andn2_b64 s[2:3], s[0:1], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s17, 0
+; SI-NEXT: s_cselect_b32 s20, 0, s2
+; SI-NEXT: s_cselect_b32 s16, s16, s3
+; SI-NEXT: v_cmp_gt_f64_e64 s[2:3], s[8:9], 0
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:48
+; SI-NEXT: s_cmp_gt_i32 s17, 51
+; SI-NEXT: s_cselect_b32 s17, s1, s16
+; SI-NEXT: s_cselect_b32 s16, s0, s20
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_add_f64 v[6:7], s[36:37], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v4, s16
+; SI-NEXT: v_mov_b32_e32 v5, s17
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[0:1], v[4:5]
+; SI-NEXT: s_and_b64 s[0:1], s[30:31], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s0, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s1, s7, 0xb0014
+; SI-NEXT: s_and_b32 s20, s7, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s0
+; SI-NEXT: s_add_i32 s21, s1, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s21
+; SI-NEXT: s_andn2_b64 s[0:1], s[6:7], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s21, 0
+; SI-NEXT: s_cselect_b32 s30, 0, s0
+; SI-NEXT: s_cselect_b32 s20, s20, s1
+; SI-NEXT: v_cmp_gt_f64_e64 s[0:1], s[14:15], 0
+; SI-NEXT: s_cmp_gt_i32 s21, 51
+; SI-NEXT: s_cselect_b32 s21, s7, s20
+; SI-NEXT: s_cselect_b32 s20, s6, s30
+; SI-NEXT: v_add_f64 v[4:5], s[16:17], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s20
+; SI-NEXT: v_mov_b32_e32 v10, s21
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[6:7], v[9:10]
+; SI-NEXT: s_and_b64 s[6:7], s[18:19], vcc
+; SI-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; SI-NEXT: s_cselect_b32 s6, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s7, s5, 0xb0014
+; SI-NEXT: s_and_b32 s16, s5, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s6
+; SI-NEXT: s_add_i32 s17, s7, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[6:7], s[28:29], s17
+; SI-NEXT: s_andn2_b64 s[6:7], s[4:5], s[6:7]
+; SI-NEXT: s_cmp_lt_i32 s17, 0
+; SI-NEXT: s_cselect_b32 s18, 0, s6
+; SI-NEXT: s_cselect_b32 s16, s16, s7
+; SI-NEXT: v_cmp_gt_f64_e64 s[6:7], s[12:13], 0
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:64
+; SI-NEXT: s_cmp_gt_i32 s17, 51
+; SI-NEXT: s_cselect_b32 s17, s5, s16
+; SI-NEXT: s_cselect_b32 s16, s4, s18
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_add_f64 v[6:7], s[20:21], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v4, s16
+; SI-NEXT: v_mov_b32_e32 v5, s17
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[4:5], v[4:5]
+; SI-NEXT: s_and_b64 s[4:5], s[34:35], vcc
+; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s5, s11, 0xb0014
+; SI-NEXT: s_and_b32 s18, s11, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s4
+; SI-NEXT: v_add_f64 v[4:5], s[16:17], v[8:9]
+; SI-NEXT: s_add_i32 s16, s5, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[4:5], s[28:29], s16
+; SI-NEXT: s_andn2_b64 s[4:5], s[10:11], s[4:5]
+; SI-NEXT: s_cmp_lt_i32 s16, 0
+; SI-NEXT: s_cselect_b32 s4, 0, s4
+; SI-NEXT: s_cselect_b32 s5, s18, s5
+; SI-NEXT: s_cmp_gt_i32 s16, 51
+; SI-NEXT: s_cselect_b32 s5, s11, s5
+; SI-NEXT: s_cselect_b32 s4, s10, s4
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:80
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v4, s4
+; SI-NEXT: v_mov_b32_e32 v5, s5
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[10:11], v[4:5]
+; SI-NEXT: s_and_b64 s[10:11], s[22:23], vcc
+; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
+; SI-NEXT: s_cselect_b32 s10, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s11, s9, 0xb0014
+; SI-NEXT: s_and_b32 s16, s9, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s10
+; SI-NEXT: s_add_i32 s17, s11, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[10:11], s[28:29], s17
+; SI-NEXT: s_andn2_b64 s[10:11], s[8:9], s[10:11]
+; SI-NEXT: s_cmp_lt_i32 s17, 0
+; SI-NEXT: s_cselect_b32 s10, 0, s10
+; SI-NEXT: s_cselect_b32 s11, s16, s11
+; SI-NEXT: s_cmp_gt_i32 s17, 51
+; SI-NEXT: s_cselect_b32 s11, s9, s11
+; SI-NEXT: s_cselect_b32 s10, s8, s10
+; SI-NEXT: v_add_f64 v[6:7], s[4:5], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v4, s10
+; SI-NEXT: v_mov_b32_e32 v5, s11
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[8:9], v[4:5]
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
+; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec
+; SI-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s3, s15, 0xb0014
+; SI-NEXT: s_and_b32 s4, s15, 0x80000000
+; SI-NEXT: v_mov_b32_e32 v9, s2
+; SI-NEXT: s_add_i32 s5, s3, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[2:3], s[28:29], s5
+; SI-NEXT: s_andn2_b64 s[2:3], s[14:15], s[2:3]
+; SI-NEXT: s_cmp_lt_i32 s5, 0
+; SI-NEXT: s_cselect_b32 s2, 0, s2
+; SI-NEXT: s_cselect_b32 s3, s4, s3
+; SI-NEXT: s_cmp_gt_i32 s5, 51
+; SI-NEXT: s_cselect_b32 s3, s15, s3
+; SI-NEXT: s_cselect_b32 s2, s14, s2
+; SI-NEXT: v_add_f64 v[4:5], s[10:11], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v10, s3
+; SI-NEXT: v_mov_b32_e32 v9, s2
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[14:15], v[9:10]
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: s_bfe_u32 s0, s13, 0xb0014
+; SI-NEXT: s_and_b32 s5, s13, 0x80000000
+; SI-NEXT: s_add_i32 s8, s0, 0xfffffc01
+; SI-NEXT: s_lshr_b64 s[0:1], s[28:29], s8
+; SI-NEXT: s_andn2_b64 s[0:1], s[12:13], s[0:1]
+; SI-NEXT: s_cmp_lt_i32 s8, 0
+; SI-NEXT: s_cselect_b32 s0, 0, s0
+; SI-NEXT: s_cselect_b32 s1, s5, s1
+; SI-NEXT: s_cmp_gt_i32 s8, 51
+; SI-NEXT: s_cselect_b32 s1, s13, s1
+; SI-NEXT: s_cselect_b32 s0, s12, s0
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:96
+; SI-NEXT: v_mov_b32_e32 v9, s4
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, s1
+; SI-NEXT: v_mov_b32_e32 v4, s0
+; SI-NEXT: v_cmp_lg_f64_e32 vcc, s[12:13], v[4:5]
+; SI-NEXT: s_and_b64 s[4:5], s[6:7], vcc
+; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; SI-NEXT: s_cselect_b32 s4, 0x3ff00000, 0
+; SI-NEXT: v_add_f64 v[6:7], s[2:3], v[8:9]
+; SI-NEXT: v_mov_b32_e32 v9, s4
+; SI-NEXT: v_add_f64 v[4:5], s[0:1], v[8:9]
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[24:27], 0 offset:112
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[24:27], 0
+; SI-NEXT: s_endpgm
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
store <16 x double> %y, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CI: {{.*}}
+; FUNC: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
index 5b4e2e4ce171b..cf1425cd8ffee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
@@ -1,75 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
-; GCN-LABEL: {{^}}buffer_store_format_d16_x:
-; GCN: s_load_dword s[[LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]]
-; GCN: buffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_x(<4 x i32> %rsrc, [8 x i32], half %data, [8 x i32], i32 %voffset) {
+; GCN-LABEL: buffer_store_format_d16_x:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: s_load_dword s4, s[8:9], 0x30
+; GCN-NEXT: s_load_dword s5, s[8:9], 0x54
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 offen
+; GCN-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xy:
-
-; UNPACKED: s_load_dwordx2 s[[[S_DATA:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-; UNPACKED-DAG: s_lshr_b32 [[SHR:s[0-9]+]], s[[S_DATA]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], s[[S_DATA]], 0xffff{{$}}
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xy:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s6, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s6
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: buffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xy:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: buffer_store_format_d16_xy v0, v1, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-
-; UNPACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-
-; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyz:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s7, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyz:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_lshr_b32 [[SHR1:s[0-9]+]], s[[S_DATA_1]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-
-; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-
-; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyzw:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s7, s5, 16
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s8, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s8
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v4, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyzw:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
index b1a2747cb5784..6c23a87292749 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
@@ -1,75 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
-; GCN-LABEL: {{^}}buffer_store_format_d16_x:
-; GCN: s_load_dword s[[LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]]
-; GCN: buffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_x(ptr addrspace(8) %rsrc, [8 x i32], half %data, [8 x i32], i32 %voffset) {
+; GCN-LABEL: buffer_store_format_d16_x:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: s_load_dword s4, s[8:9], 0x30
+; GCN-NEXT: s_load_dword s5, s[8:9], 0x54
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 offen
+; GCN-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xy:
-
-; UNPACKED: s_load_dwordx2 s[[[S_DATA:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-; UNPACKED-DAG: s_lshr_b32 [[SHR:s[0-9]+]], s[[S_DATA]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], s[[S_DATA]], 0xffff{{$}}
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xy(ptr addrspace(8) %rsrc, <2 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xy:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s6, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s6
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: buffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xy:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: buffer_store_format_d16_xy v0, v1, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half> %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-
-; UNPACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-
-; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyz:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s7, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyz:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_lshr_b32 [[SHR1:s[0-9]+]], s[[S_DATA_1]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-
-; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
-
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-
-; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyzw:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s7, s5, 16
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s8, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s8
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v4, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 offen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyzw:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 offen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
index aa7064dad9e95..70e12ea0625f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
@@ -1,85 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s
-; GCN-LABEL: {{^}}buffer_store_format_d16_x:
-; GCN: s_load_dword s[[LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]]
-; GCN: buffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_x(<4 x i32> %rsrc, [8 x i32], half %data, [8 x i32], i32 %index) {
+; GCN-LABEL: buffer_store_format_d16_x:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: s_load_dword s4, s[8:9], 0x30
+; GCN-NEXT: s_load_dword s5, s[8:9], 0x54
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 idxen
+; GCN-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xy:
-
-; UNPACKED: s_load_dwordx2 s[[[S_DATA:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-; UNPACKED-DAG: s_lshr_b32 [[SHR:s[0-9]+]], s[[S_DATA]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], s[[S_DATA]], 0xffff{{$}}
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: buffer_store_format_d16_xy v[[[V_LO]]:[[V_HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
-
-; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %index) {
+; UNPACKED-LABEL: buffer_store_format_d16_xy:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s6, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s6
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: buffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 idxen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xy:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: buffer_store_format_d16_xy v0, v1, s[0:3], 0 idxen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyz:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED1]]
-
-; UNPACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
-
-; PACKED: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], [[MASKED0]]
-
-; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyz:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s7, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 idxen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyz:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen
+; PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s[[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
-
-; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED0:s[0-9]+]], s[[S_DATA_0]], 0xffff{{$}}
-; UNPACKED-DAG: s_lshr_b32 [[SHR1:s[0-9]+]], s[[S_DATA_1]], 16
-; UNPACKED-DAG: s_and_b32 [[MASKED1:s[0-9]+]], s[[S_DATA_1]], 0xffff{{$}}
-
-; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
-; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-
-; UNPACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
-
-; PACKED: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
-; PACKED: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-
-; PACKED: buffer_store_format_d16_xyzw v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
+; UNPACKED-LABEL: buffer_store_format_d16_xyzw:
+; UNPACKED: ; %bb.0: ; %main_body
+; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; UNPACKED-NEXT: s_waitcnt lgkmcnt(0)
+; UNPACKED-NEXT: s_lshr_b32 s7, s5, 16
+; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff
+; UNPACKED-NEXT: s_lshr_b32 s8, s4, 16
+; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff
+; UNPACKED-NEXT: v_mov_b32_e32 v0, s4
+; UNPACKED-NEXT: v_mov_b32_e32 v1, s8
+; UNPACKED-NEXT: v_mov_b32_e32 v2, s5
+; UNPACKED-NEXT: v_mov_b32_e32 v3, s7
+; UNPACKED-NEXT: v_mov_b32_e32 v4, s6
+; UNPACKED-NEXT: buffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 idxen
+; UNPACKED-NEXT: s_endpgm
+;
+; PACKED-LABEL: buffer_store_format_d16_xyzw:
+; PACKED: ; %bb.0: ; %main_body
+; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
+; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18
+; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; PACKED-NEXT: s_waitcnt lgkmcnt(0)
+; PACKED-NEXT: v_mov_b32_e32 v0, s4
+; PACKED-NEXT: v_mov_b32_e32 v1, s5
+; PACKED-NEXT: v_mov_b32_e32 v2, s6
+; PACKED-NEXT: buffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 idxen
+; PACKED-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}
-; GCN-LABEL: {{^}}buffer_store_format_i16_x:
-; GCN: s_load_dword s[[LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[LO]]
-; GCN: buffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_i16_x(<4 x i32> %rsrc, [8 x i32], i16 %data, [8 x i32], i32 %index) {
+; GCN-LABEL: buffer_store_format_i16_x:
+; GCN: ; %bb.0: ; %main_body
+; GCN-NEXT: s_load_dword s4, s[8:9], 0x30
+; GCN-NEXT: s_load_dword s5, s[8:9], 0x54
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 idxen
+; GCN-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index 6f214cd23ed30..8508de491e880 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9PLUS,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s
@@ -17,261 +18,5115 @@
; This used to fail due to a v_add_i32 instruction with an illegal immediate
; operand that was created during Local Stack Slot Allocation. Test case derived
; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
-;
-; GCN-LABEL: {{^}}ps_main:
-
-; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; SIVI-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
-; SIVI-DAG: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
-; SIVI-DAG: s_mov_b32 s6, -1
-
-; GFX9-MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GFX9-MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
-; GFX9-MUBUF-DAG: s_mov_b32 s2, -1
-
-; SI-DAG: s_mov_b32 s7, 0xe8f000
-; VI-DAG: s_mov_b32 s7, 0xe80000
-; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000
-; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000
-; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
-
-; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:
-
-; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
-; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-; GFX11-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
-
-; MUBUF-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x200, [[CLAMP_IDX]]
-; FLATSCR: v_mov_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]]
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, [[CLAMP_IDX]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, [[CLAMP_IDX]], off{{$}}
define amdgpu_ps float @ps_main(i32 %idx) {
+; SI-LABEL: ps_main:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s7, 0xe8f000
+; SI-NEXT: s_add_u32 s4, s4, s0
+; SI-NEXT: s_addc_u32 s5, s5, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: ps_main:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s7, 0xe80000
+; VI-NEXT: s_add_u32 s4, s4, s0
+; VI-NEXT: s_addc_u32 s5, s5, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: ps_main:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: ps_main:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: ps_main:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: ps_main:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: ps_main:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: ps_main:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: ps_main:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: ps_main:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v9, 0xb702e758 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v13 :: v_dual_mov_b32 v33, v22
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%r = fadd float %v1, %v2
ret float %r
}
-; GCN-LABEL: {{^}}vs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; MUBUF-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
-; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
-
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-
define amdgpu_vs float @vs_main(i32 %idx) {
+; SI-LABEL: vs_main:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s7, 0xe8f000
+; SI-NEXT: s_add_u32 s4, s4, s0
+; SI-NEXT: s_addc_u32 s5, s5, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: vs_main:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s7, 0xe80000
+; VI-NEXT: s_add_u32 s4, s4, s0
+; VI-NEXT: s_addc_u32 s5, s5, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: vs_main:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: vs_main:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: vs_main:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: vs_main:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: vs_main:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: vs_main:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: vs_main:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: vs_main:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v9, 0xb702e758 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v13 :: v_dual_mov_b32 v33, v22
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%r = fadd float %v1, %v2
ret float %r
}
-; GCN-LABEL: {{^}}cs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s2
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
-; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
-; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-
-; MUBUF-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-
define amdgpu_cs float @cs_main(i32 %idx) {
+; SI-LABEL: cs_main:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s7, 0xe8f000
+; SI-NEXT: s_add_u32 s4, s4, s0
+; SI-NEXT: s_addc_u32 s5, s5, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: cs_main:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s7, 0xe80000
+; VI-NEXT: s_add_u32 s4, s4, s0
+; VI-NEXT: s_addc_u32 s5, s5, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: cs_main:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: cs_main:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: cs_main:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: cs_main:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: cs_main:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: cs_main:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: cs_main:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: cs_main:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v9, 0xb702e758 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v13 :: v_dual_mov_b32 v33, v22
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%r = fadd float %v1, %v2
ret float %r
}
-; GCN-LABEL: {{^}}hs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; SIVI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
-; SIVI-NOT: s_mov_b32 s4
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
-; GFX9PLUS-NOT: s_mov_b32 s5
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
define amdgpu_hs float @hs_main(i32 %idx) {
+; SI-LABEL: hs_main:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s7, 0xe8f000
+; SI-NEXT: s_add_u32 s4, s4, s0
+; SI-NEXT: s_addc_u32 s5, s5, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: hs_main:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s7, 0xe80000
+; VI-NEXT: s_add_u32 s4, s4, s0
+; VI-NEXT: s_addc_u32 s5, s5, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: hs_main:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: hs_main:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: hs_main:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: hs_main:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: hs_main:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: hs_main:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: hs_main:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: hs_main:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v9, 0xb702e758 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v13 :: v_dual_mov_b32 v33, v22
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%r = fadd float %v1, %v2
ret float %r
}
-; GCN-LABEL: {{^}}gs_main:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; SIVI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
define amdgpu_gs float @gs_main(i32 %idx) {
+; SI-LABEL: gs_main:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s7, 0xe8f000
+; SI-NEXT: s_add_u32 s4, s4, s0
+; SI-NEXT: s_addc_u32 s5, s5, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: gs_main:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s7, 0xe80000
+; VI-NEXT: s_add_u32 s4, s4, s0
+; VI-NEXT: s_addc_u32 s5, s5, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[4:7], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[4:7], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: gs_main:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: gs_main:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: gs_main:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: gs_main:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: gs_main:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: gs_main:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: gs_main:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: gs_main:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v9, 0xb702e758 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v13 :: v_dual_mov_b32 v33, v22
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%r = fadd float %v1, %v2
@@ -282,55 +5137,1030 @@ define amdgpu_gs float @gs_main(i32 %idx) {
; SGPR5, and the inreg implementation is used to reference it in the IR. The
; following tests confirm the shader and anything inserted after the return
; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset.
-
-; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; SIVI-NOT: s_mov_b32 s6
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; MUBUF-DAG: s_mov_b32 s2, s5
-
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
+; SI-LABEL: hs_ir_uses_scratch_offset:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s11, 0xe8f000
+; SI-NEXT: s_add_u32 s8, s8, s6
+; SI-NEXT: s_addc_u32 s9, s9, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; SI-NEXT: s_mov_b32 s2, s5
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: hs_ir_uses_scratch_offset:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s10, -1
+; VI-NEXT: s_mov_b32 s11, 0xe80000
+; VI-NEXT: s_add_u32 s8, s8, s6
+; VI-NEXT: s_addc_u32 s9, s9, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; VI-NEXT: s_mov_b32 s2, s5
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s11, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX9-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s11, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: hs_ir_uses_scratch_offset:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s11, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v0, 0xbeae29dc :: v_dual_mov_b32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v15, 0x3e319356 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe31934f
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0x3efcd89c :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v33, v22 :: v_dual_mov_b32 v30, v13
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_mov_b32 s2, s5
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%f = fadd float %v1, %v2
@@ -339,53 +6169,1030 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
ret <{i32, i32, i32, float}> %r2
}
-; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset:
-; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
-; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
-; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
-; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
-; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
-
-; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
-; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
-; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
-; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
-; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
-; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
-; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-
-; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; FLATSCR-NOT: SCRATCH_RSRC_DWORD
-
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
-
-; MUBUF-DAG: s_mov_b32 s2, s5
-
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
-
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
-; GFX11-FLATSCR: scratch_load_b32 {{v[0-9]+}}, {{v[0-9]+}}, off
define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
+; SI-LABEL: gs_ir_uses_scratch_offset:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s11, 0xe8f000
+; SI-NEXT: s_add_u32 s8, s8, s6
+; SI-NEXT: s_addc_u32 s9, s9, 0
+; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; SI-NEXT: s_mov_b32 s0, 0
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; SI-NEXT: v_add_i32_e32 v1, vcc, 0x200, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; SI-NEXT: s_waitcnt expcnt(3)
+; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; SI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; SI-NEXT: s_mov_b32 s2, s5
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_add_f32_e32 v0, v0, v1
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: ; return to shader part epilog
+;
+; VI-LABEL: gs_ir_uses_scratch_offset:
+; VI: ; %bb.0:
+; VI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s10, -1
+; VI-NEXT: s_mov_b32 s11, 0xe80000
+; VI-NEXT: s_add_u32 s8, s8, s6
+; VI-NEXT: s_addc_u32 s9, s9, 0
+; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; VI-NEXT: s_mov_b32 s0, 0
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 0x200, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; VI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; VI-NEXT: s_mov_b32 s2, s5
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_add_f32_e32 v0, v0, v1
+; VI-NEXT: ; return to shader part epilog
+;
+; GFX9-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX9-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX9-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX9-MUBUF-NEXT: s_mov_b32 s11, 0xe00000
+; GFX9-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX9-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:320
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:316
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:312
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:308
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:304
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:300
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:296
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:292
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:288
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:284
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:280
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:276
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:272
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:264
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:260
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:256
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:252
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:236
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:268
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:248
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:240
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:232
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:228
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:224
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:196
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:832
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:828
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:824
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:820
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:800
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:784
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:780
+; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:772
+; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:764
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:760
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:756
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:752
+; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:748
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:744
+; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:740
+; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:736
+; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:732
+; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:728
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:724
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:720
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:716
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:712
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:708
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen
+; GFX9-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W32-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX10_W32-MUBUF: ; %bb.0:
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s11, 0x31c16000
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W32-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W32-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:320
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:316
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:300
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:296
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:292
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:288
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:284
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:280
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:276
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:272
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:268
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:264
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:260
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:256
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:252
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:248
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:240
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:236
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:232
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:228
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:224
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:200
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:196
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:832
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:828
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:824
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:820
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:816
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:812
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:808
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:804
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:800
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:796
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:792
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:788
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:784
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:780
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:772
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:764
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:760
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:756
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:752
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:748
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:744
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:740
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:736
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:732
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:728
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:724
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:720
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:716
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:712
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:708
+; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX10_W64-MUBUF-LABEL: gs_ir_uses_scratch_offset:
+; GFX10_W64-MUBUF: ; %bb.0:
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s10, -1
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s11, 0x31e16000
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
+; GFX10_W64-MUBUF-NEXT: s_add_u32 s8, s8, s5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
+; GFX10_W64-MUBUF-NEXT: s_addc_u32 s9, s9, 0
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:320
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:316
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:300
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:296
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:292
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:288
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:284
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:280
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:276
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:272
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:268
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:264
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:260
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:256
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:252
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:248
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:244
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:240
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:236
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:232
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:228
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:224
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:220
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:216
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:212
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:208
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:204
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:200
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:196
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499
+; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x200, v0
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:832
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:828
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:824
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:820
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:816
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:812
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:808
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:804
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:800
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:796
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:792
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:788
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:784
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:780
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:776
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:772
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:768
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:764
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:760
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:756
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:752
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:748
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:744
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:740
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:736
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:732
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:728
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:724
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:720
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:716
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:712
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:708
+; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen
+; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, s5
+; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
+; GFX10-FLATSCR: ; %bb.0:
+; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: ; return to shader part epilog
+;
+; GFX9-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
+; GFX9-FLATSCR-PAL: ; %bb.0:
+; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
+; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX10-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
+; GFX10-FLATSCR-PAL: ; %bb.0:
+; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1]
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
+; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
+; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5
+; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
+;
+; GFX11-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
+; GFX11-FLATSCR: ; %bb.0:
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v0, 0xbeae29dc :: v_dual_mov_b32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v15, 0x3e319356 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xb7043519
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe31934f
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0x3efcd89c :: v_dual_mov_b32 v29, v15
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v33, v22 :: v_dual_mov_b32 v30, v13
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
+; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
+; GFX11-FLATSCR-NEXT: s_clause 0x2
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v30, v7 :: v_dual_mov_b32 v31, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v6, v13
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, v2
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: s_mov_b32 s2, s5
+; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
%f = fadd float %v1, %v2
@@ -393,3 +7200,10 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
%r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
ret <{i32, i32, i32, float}> %r2
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FLATSCR: {{.*}}
+; GCN: {{.*}}
+; GFX9PLUS: {{.*}}
+; GFX9_10-MUBUF: {{.*}}
+; MUBUF: {{.*}}
+; SIVI: {{.*}}
More information about the llvm-commits
mailing list