[llvm] r363990 - AMDGPU: Always use s33 for global scratch wave offset
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 14:58:25 PDT 2019
Author: arsenm
Date: Thu Jun 20 14:58:24 2019
New Revision: 363990
URL: http://llvm.org/viewvc/llvm-project?rev=363990&view=rev
Log:
AMDGPU: Always use s33 for global scratch wave offset
Every called function could possibly need this to calculate the
absolute address of stack objectst, and this avoids inserting a copy
around every call site in the kernel. It's also somewhat cleaner to
keep this in a callee saved SGPR.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/bswap.ll
llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
llvm/trunk/test/CodeGen/AMDGPU/call-waitcnt.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
llvm/trunk/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
llvm/trunk/test/CodeGen/AMDGPU/known-never-snan.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll
llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
llvm/trunk/test/CodeGen/AMDGPU/spill-offset-calculation.ll
llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll
llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll
llvm/trunk/test/CodeGen/AMDGPU/wave32.ll
llvm/trunk/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jun 20 14:58:24 2019
@@ -2621,20 +2621,12 @@ SDValue SITargetLowering::LowerCall(Call
SmallVector<SDValue, 4> CopyFromChains;
- unsigned OffsetReg = Info->getScratchWaveOffsetReg();
-
// In the HSA case, this should be an identity copy.
SDValue ScratchRSrcReg
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
- // TODO: Don't hardcode these registers and get from the callee function.
- SDValue ScratchWaveOffsetReg
- = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
- RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
- CopyFromChains.push_back(ScratchWaveOffsetReg.getValue(1));
-
if (!Info->isEntryFunction()) {
// Avoid clobbering this function's FP value. In the current convention
// callee will overwrite this, so do save/restore around the call site.
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Thu Jun 20 14:58:24 2019
@@ -69,7 +69,7 @@ SIMachineFunctionInfo::SIMachineFunction
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- ScratchWaveOffsetReg = AMDGPU::SGPR4;
+ ScratchWaveOffsetReg = AMDGPU::SGPR33;
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
Modified: llvm/trunk/test/CodeGen/AMDGPU/bswap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/bswap.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/bswap.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/bswap.ll Thu Jun 20 14:58:24 2019
@@ -723,8 +723,8 @@ define float @missing_truncate_promote_b
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_alignbit_b32 v1, v0, v0, 8
; SI-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; SI-NEXT: s_mov_b32 s6, 0xff00ff
-; SI-NEXT: v_bfi_b32 v0, s6, v0, v1
+; SI-NEXT: s_mov_b32 s4, 0xff00ff
+; SI-NEXT: v_bfi_b32 v0, s4, v0, v1
; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: s_setpc_b64 s[30:31]
@@ -735,8 +735,8 @@ define float @missing_truncate_promote_b
; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; VI-NEXT: v_alignbit_b32 v1, v0, v0, 8
; VI-NEXT: v_alignbit_b32 v0, v0, v0, 24
-; VI-NEXT: s_mov_b32 s6, 0xff00ff
-; VI-NEXT: v_bfi_b32 v0, s6, v0, v1
+; VI-NEXT: s_mov_b32 s4, 0xff00ff
+; VI-NEXT: v_bfi_b32 v0, s4, v0, v1
; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI-NEXT: s_setpc_b64 s[30:31]
bb:
Modified: llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll Thu Jun 20 14:58:24 2019
@@ -80,8 +80,8 @@ define void @undef_lo_op_v2i16(i16 %arg0
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT: s_movk_i32 s6, 0x63
-; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
+; GFX9-NEXT: s_movk_i32 s4, 0x63
+; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
@@ -273,8 +273,8 @@ define void @undef_hi_op_v2i16(i16 %arg0
; GFX9-LABEL: undef_hi_op_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_movk_i32 s6, 0x63
-; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0]
+; GFX9-NEXT: s_movk_i32 s4, 0x63
+; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Thu Jun 20 14:58:24 2019
@@ -81,14 +81,14 @@ define amdgpu_kernel void @test_call_ext
; MESA: s_mov_b32 s33, s3{{$}}
; HSA: s_mov_b32 s33, s9{{$}}
+; HSA: buffer_load_ubyte [[VAR:v[0-9]+]]
+; HSA: s_mov_b32 s32, s33
+
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext at rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext at rel32@hi+4
-; GCN-NEXT: buffer_load_ubyte [[VAR:v[0-9]+]]
-; HSA-NEXT: s_mov_b32 s4, s33
-; HSA-NEXT: s_mov_b32 s32, s33
-; MESA-DAG: s_mov_b32 s4, s33{{$}}
+; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
; MESA-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_waitcnt vmcnt(0)
@@ -105,13 +105,15 @@ define amdgpu_kernel void @test_call_ext
; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext:
; MESA: s_mov_b32 s33, s3{{$}}
+; HSA: buffer_load_ubyte v0
+; HSA-DAG: s_mov_b32 s32, s33{{$}}
+
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext at rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext at rel32@hi+4
-; GCN-NEXT: buffer_load_ubyte v0
-; GCN-DAG: s_mov_b32 s4, s33{{$}}
-; GCN-DAG: s_mov_b32 s32, s33{{$}}
+; MESA: buffer_load_ubyte v0
+; MESA-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
@@ -131,7 +133,6 @@ define amdgpu_kernel void @test_call_ext
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8 at rel32@hi+4
; GCN-DAG: v_mov_b32_e32 v0, 0x7b
-; HSA-DAG: s_mov_b32 s4, s33{{$}}
; GCN-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
@@ -151,7 +152,6 @@ define amdgpu_kernel void @test_call_ext
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext at rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext at rel32@hi+4
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s3
; GCN-NOT: s_waitcnt
@@ -186,7 +186,6 @@ define amdgpu_kernel void @test_call_ext
; GCN-LABEL: {{^}}test_call_external_void_func_i16_imm:
; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}}
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_swappc_b64
@@ -240,7 +239,6 @@ define amdgpu_kernel void @test_call_ext
; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32 at rel32@lo+4
; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32 at rel32@hi+4
; GCN-DAG: v_mov_b32_e32 v0, 42
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-graph-register-usage.ll Thu Jun 20 14:58:24 2019
@@ -13,14 +13,14 @@ define void @use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_use_vcc:
-; GCN: v_writelane_b32 v32, s33, 0
-; GCN: v_writelane_b32 v32, s34, 1
-; GCN: v_writelane_b32 v32, s35, 2
+; GCN: v_writelane_b32 v32, s34, 0
+; GCN: v_writelane_b32 v32, s35, 1
+; GCN: v_writelane_b32 v32, s36, 2
; GCN: s_swappc_b64
-; GCN: v_readlane_b32 s35, v32, 2
-; GCN: v_readlane_b32 s34, v32, 1
-; GCN: v_readlane_b32 s33, v32, 0
-; GCN: ; NumSgprs: 38
+; GCN: v_readlane_b32 s36, v32, 2
+; GCN: v_readlane_b32 s35, v32, 1
+; GCN: v_readlane_b32 s34, v32, 0
+; GCN: ; NumSgprs: 39
; GCN: ; NumVgprs: 33
define void @indirect_use_vcc() #1 {
call void @use_vcc()
@@ -29,8 +29,8 @@ define void @indirect_use_vcc() #1 {
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; GCN: is_dynamic_callstack = 0
-; CI: ; NumSgprs: 40
-; VI-NOBUG: ; NumSgprs: 42
+; CI: ; NumSgprs: 41
+; VI-NOBUG: ; NumSgprs: 43
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
@@ -48,8 +48,8 @@ define void @use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
-; CI: ; NumSgprs: 40
-; VI: ; NumSgprs: 42
+; CI: ; NumSgprs: 41
+; VI: ; NumSgprs: 43
; GCN: ; NumVgprs: 33
define void @indirect_use_flat_scratch() #1 {
call void @use_flat_scratch()
@@ -58,8 +58,8 @@ define void @indirect_use_flat_scratch()
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; GCN: is_dynamic_callstack = 0
-; CI: ; NumSgprs: 40
-; VI-NOBUG: ; NumSgprs: 42
+; CI: ; NumSgprs: 41
+; VI-NOBUG: ; NumSgprs: 43
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll Thu Jun 20 14:58:24 2019
@@ -6,14 +6,12 @@ declare hidden void @external_void_func_
; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GCN: s_mov_b32 s33, s7
-; GCN: s_mov_b32 s4, s33
-; GCN-NEXT: s_getpc_b64 s[34:35]
+; GCN: s_getpc_b64 s[34:35]
; GCN-NEXT: s_add_u32 s34, s34,
; GCN-NEXT: s_addc_u32 s35, s35,
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s[34:35]
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: #ASMSTART
; GCN-NEXT: #ASMEND
; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
@@ -25,25 +23,25 @@ define amdgpu_kernel void @test_kernel_c
}
; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
-; GCN: v_writelane_b32 v32, s33, 0
-; GCN: v_writelane_b32 v32, s34, 1
-; GCN: v_writelane_b32 v32, s35, 2
-; GCN: v_writelane_b32 v32, s36, 3
-; GCN: v_writelane_b32 v32, s37, 4
+; GCN: v_writelane_b32 v32, s34, 0
+; GCN: v_writelane_b32 v32, s35, 1
+; GCN: v_writelane_b32 v32, s36, 2
+; GCN: v_writelane_b32 v32, s37, 3
+; GCN: v_writelane_b32 v32, s38, 4
-; GCN: s_mov_b32 s33, s5
+; GCN: s_mov_b32 [[COPY_FP:s[0-9]+]], s5
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s5, [[COPY_FP]]
+; GCN-NEXT: s_mov_b32 [[COPY_FP]], s5
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_swappc_b64
-; GCN-DAG: s_mov_b32 s5, s33
-; GCN-DAG: v_readlane_b32 s37, v32, 4
-; GCN: v_readlane_b32 s36, v32, 3
-; GCN: v_readlane_b32 s35, v32, 2
-; GCN: v_readlane_b32 s34, v32, 1
-; GCN: v_readlane_b32 s33, v32, 0
+; GCN-DAG: s_mov_b32 s5, [[COPY_FP]]
+; GCN-DAG: v_readlane_b32 s38, v32, 4
+; GCN: v_readlane_b32 s37, v32, 3
+; GCN: v_readlane_b32 s36, v32, 2
+; GCN: v_readlane_b32 s35, v32, 1
+; GCN: v_readlane_b32 s34, v32, 0
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
call void @external_void_func_void()
@@ -54,12 +52,12 @@ define void @test_func_call_external_voi
; FIXME: Avoid extra restore of FP in between calls.
; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
-; GCN: s_mov_b32 s33, s5
+; GCN: s_mov_b32 [[COPY_FP:s[0-9]+]], s5
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s5, [[COPY_FP]]
+; GCN-NEXT: s_mov_b32 [[COPY_FP]], s5
; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN: s_mov_b32 s5, [[COPY_FP]]
define void @test_func_call_external_void_funcx2() #0 {
call void @external_void_func_void()
call void @external_void_func_void()
@@ -127,19 +125,23 @@ define amdgpu_kernel void @test_call_voi
ret void
}
+; FIXME: What is the expected behavior for reserved registers here?
+
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_mov_b32 s34, s9
-; GCN: s_mov_b32 s4, s34
-; GCN-DAG: s_mov_b32 s32, s34
-; GCN-DAG: ; def s33
-; GCN-DAG: #ASMEND
-; GCN-DAG: s_getpc_b64 s[6:7]
-; GCN-DAG: s_add_u32 s6, s6, external_void_func_void at rel32@lo+4
-; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void at rel32@hi+4
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: ;;#ASMSTART
+; GCN-NOT: s33
+; GCN: #ASMSTART
+; GCN-NEXT: ; def s33
+; GCN-NEXT: #ASMEND
+; GCN: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+4
+; GCN: s_mov_b32 s32, s34
+; GCN: s_swappc_b64 s[30:31], s[4:5]
+; GCN: ;;#ASMSTART
; GCN-NEXT: ; use s33
; GCN-NEXT: ;;#ASMEND
+; GCN-NOT: s33
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 {
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
@@ -148,17 +150,54 @@ define amdgpu_kernel void @test_call_voi
ret void
}
+; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34:
+; GCN: s_mov_b32 s33, s9
+; GCN-NOT: s34
+; GCN: ;;#ASMSTART
+; GCN-NEXT: ; def s34
+; GCN-NEXT: ;;#ASMEND
+
+; GCN-NOT: s34
+
+; GCN: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+4
+
+; GCN-NOT: s34
+; GCN: s_swappc_b64 s[30:31], s[4:5]
+
+; GCN-NOT: s34
+
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s34
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 {
+ %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
+ call void @external_void_func_void()
+ call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32:
; GCN: s_mov_b32 s33, s9
-; GCN: s_mov_b32 s4, s33
+
+; GCN: ;;#ASMSTART
+; GCN-NEXT: ; def v32
+; GCN-NEXT: ;;#ASMEND
+
+; GCN-NOT: v32
+; GCN: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+4
+; GCN-NOT: v32
; GCN-DAG: s_mov_b32 s32, s33
-; GCN-DAG: ; def v32
-; GCN-DAG: #ASMEND
-; GCN-DAG: s_getpc_b64 s[6:7]
-; GCN-DAG: s_add_u32 s6, s6, external_void_func_void at rel32@lo+4
-; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void at rel32@hi+4
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: ;;#ASMSTART
+
+; GCN: s_swappc_b64 s[30:31], s[4:5]
+
+; GCN-NOT: v32
+
+; GCN: ;;#ASMSTART
; GCN-NEXT: ; use v32
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_endpgm
@@ -181,10 +220,22 @@ define hidden void @void_func_void_clobb
ret void
}
+; GCN-LABEL: {{^}}void_func_void_clobber_s34:
+; GCN: v_writelane_b32 v0, s34, 0
+; GCN-NEXT: #ASMSTART
+; GCN-NEXT: ; clobber
+; GCN-NEXT: #ASMEND
+; GCN-NEXT: v_readlane_b32 s34, v0, 0
+; GCN: s_setpc_b64
+define hidden void @void_func_void_clobber_s34() #2 {
+ call void asm sideeffect "; clobber", "~{s34}"() #0
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
; GCN: s_mov_b32 s33, s7
-; GCN: s_mov_b32 s4, s33
-; GCN-NEXT: s_getpc_b64
+
+; GCN: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN-NEXT: s_mov_b32 s32, s33
@@ -195,6 +246,19 @@ define amdgpu_kernel void @test_call_voi
ret void
}
+; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s34:
+; GCN: s_mov_b32 s33, s7
+; GCN: s_getpc_b64
+; GCN-NEXT: s_add_u32
+; GCN-NEXT: s_addc_u32
+; GCN-NEXT: s_mov_b32 s32, s33
+; GCN: s_swappc_b64
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
+ call void @void_func_void_clobber_s34()
+ ret void
+}
+
; GCN-LABEL: {{^}}callee_saved_sgpr_func:
; GCN-NOT: s40
; GCN: v_writelane_b32 v32, s40
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-waitcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-waitcnt.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-waitcnt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-waitcnt.ll Thu Jun 20 14:58:24 2019
@@ -13,11 +13,10 @@ define amdgpu_kernel void @call_memory_a
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: ds_read_b32 v0, v0
-; GCN-NEXT: s_mov_b32 s4, s33
-; GCN-NEXT: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, func at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, func at rel32@hi+4
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, func at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, func at rel32@hi+4
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: s_endpgm
%vgpr = load volatile i32, i32 addrspace(3)* %ptr
call void @func(i32 %vgpr)
@@ -40,7 +39,6 @@ define amdgpu_kernel void @call_memory_n
; GCN-NEXT: s_add_u32 s6, s6, func at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func at rel32@hi+4
; GCN-NEXT: global_store_dword v[0:1], v2, off
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -58,14 +56,13 @@ define amdgpu_kernel void @call_no_wait_
; GCN-NEXT: s_mov_b32 s33, s9
; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, func at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, func at rel32@hi+4
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, func at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, func at rel32@hi+4
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: v_mov_b32_e32 v32, 0
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, s34
; GCN-NEXT: v_mov_b32_e32 v1, s35
; GCN-NEXT: global_store_dword v[0:1], v32, off
@@ -82,13 +79,12 @@ define amdgpu_kernel void @call_no_wait_
; GCN-NEXT: s_mov_b32 s33, s9
; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, func.return at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, func.return at rel32@hi+4
+; GCN-NEXT: s_getpc_b64 s[4:5]
+; GCN-NEXT: s_add_u32 s4, s4, func.return at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s5, s5, func.return at rel32@hi+4
; GCN-NEXT: s_mov_b32 s32, s33
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v1, s34
; GCN-NEXT: v_mov_b32_e32 v2, s35
; GCN-NEXT: global_store_dword v[1:2], v0, off
@@ -108,12 +104,11 @@ define amdgpu_kernel void @call_got_load
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, got.func at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, got.func at gotpcrel32@hi+4
-; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
-; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: s_endpgm
call void @got.func(i32 0)
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll Thu Jun 20 14:58:24 2019
@@ -75,19 +75,19 @@ define void @callee_with_stack_no_fp_eli
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
-; GCN-DAG: v_writelane_b32 v32, s33,
; GCN-DAG: v_writelane_b32 v32, s34,
; GCN-DAG: v_writelane_b32 v32, s35,
+; GCN-DAG: v_writelane_b32 v32, s36,
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5{{$}}
-; GCN-DAG: s_mov_b32 s33, s5
+; GCN-DAG: s_mov_b32 [[COPY_FP:s[0-9]+]], s5
; GCN: s_swappc_b64
-; GCN-DAG: s_mov_b32 s5, s33
-; GCN-DAG: v_readlane_b32 s35,
+; GCN-DAG: s_mov_b32 s5, [[COPY_FP]]
; GCN-DAG: v_readlane_b32 s34,
-; GCN-DAG: v_readlane_b32 s33,
+; GCN-DAG: v_readlane_b32 s35,
+; GCN-DAG: v_readlane_b32 s36,
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
; GCN: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -110,14 +110,16 @@ define void @callee_with_stack_and_call(
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN-DAG: v_writelane_b32 v32, s33, 0
-; GCN-DAG: v_writelane_b32 v32, s34, 1
-; GCN: s_mov_b32 s33, s5
+; GCN-DAG: v_writelane_b32 v32, s34, 0
+; GCN-DAG: v_writelane_b32 v32, s35, 1
+; GCN-DAG: v_writelane_b32 v32, s36, 2
+; GCN-DAG: s_mov_b32 [[COPY_FP:s[0-9]+]], s5
; GCN: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN: s_mov_b32 s5, [[COPY_FP]]
-; GCN-DAG: v_readlane_b32 s34, v32, 1
-; GCN-DAG: v_readlane_b32 s33, v32, 0
+; GCN-DAG: v_readlane_b32 s34, v32, 0
+; GCN-DAG: v_readlane_b32 s35, v32, 1
+; GCN-DAG: v_readlane_b32 s36, v32, 2
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll Thu Jun 20 14:58:24 2019
@@ -106,7 +106,7 @@ define amdgpu_kernel void @kern_indirect
; GCN-LABEL: {{^}}use_workgroup_id_x:
; GCN: s_waitcnt
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @use_workgroup_id_x() #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.x()
call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -117,7 +117,7 @@ define hidden void @use_workgroup_id_x()
; GCN: s_waitcnt
; GCN-NOT: s32
; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; GCN: ; use s6
+; GCN: ; use s4
; GCN: s_setpc_b64
define hidden void @use_stack_workgroup_id_x() #1 {
%alloca = alloca i32, addrspace(5)
@@ -129,7 +129,7 @@ define hidden void @use_stack_workgroup_
; GCN-LABEL: {{^}}use_workgroup_id_y:
; GCN: s_waitcnt
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @use_workgroup_id_y() #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.y()
call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -138,7 +138,7 @@ define hidden void @use_workgroup_id_y()
; GCN-LABEL: {{^}}use_workgroup_id_z:
; GCN: s_waitcnt
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @use_workgroup_id_z() #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.z()
call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -146,8 +146,8 @@ define hidden void @use_workgroup_id_z()
}
; GCN-LABEL: {{^}}use_workgroup_id_xy:
+; GCN: ; use s4
; GCN: ; use s6
-; GCN: ; use s7
define hidden void @use_workgroup_id_xy() #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -157,9 +157,9 @@ define hidden void @use_workgroup_id_xy(
}
; GCN-LABEL: {{^}}use_workgroup_id_xyz:
+; GCN: ; use s4
; GCN: ; use s6
; GCN: ; use s7
-; GCN: ; use s8
define hidden void @use_workgroup_id_xyz() #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -171,8 +171,8 @@ define hidden void @use_workgroup_id_xyz
}
; GCN-LABEL: {{^}}use_workgroup_id_xz:
+; GCN: ; use s4
; GCN: ; use s6
-; GCN: ; use s7
define hidden void @use_workgroup_id_xz() #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -182,8 +182,8 @@ define hidden void @use_workgroup_id_xz(
}
; GCN-LABEL: {{^}}use_workgroup_id_yz:
+; GCN: ; use s4
; GCN: ; use s6
-; GCN: ; use s7
define hidden void @use_workgroup_id_yz() #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -198,12 +198,13 @@ define hidden void @use_workgroup_id_yz(
; GCN: enable_sgpr_workgroup_id_z = 0
; GCN-NOT: s6
-; GCN: s_mov_b32 s33, s7
-; GCN-NOT: s6
-; GCN: s_mov_b32 s4, s33
-; GCN-NOT: s6
+; GCN: s_mov_b32 s4, s6
+; GCN-NEXT: s_getpc_b64 s[6:7]
+; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x at rel32@hi+4
; GCN: s_mov_b32 s32, s33
; GCN: s_swappc_b64
+; GCN-NEXT: s_endpgm
define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
call void @use_workgroup_id_x()
ret void
@@ -215,8 +216,7 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_z = 0
; GCN: s_mov_b32 s33, s8
-; GCN-DAG: s_mov_b32 s4, s33
-; GCN-DAG: s_mov_b32 s6, s7
+; GCN-DAG: s_mov_b32 s4, s7
; GCN: s_mov_b32 s32, s33
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
@@ -228,10 +228,9 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_x = 1
; GCN: enable_sgpr_workgroup_id_y = 0
; GCN: enable_sgpr_workgroup_id_z = 1
-
; GCN: s_mov_b32 s33, s8
-; GCN-DAG: s_mov_b32 s4, s33
-; GCN-DAG: s_mov_b32 s6, s7
+; GCN: s_mov_b32 s4, s7
+
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
call void @use_workgroup_id_z()
@@ -244,14 +243,10 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_z = 0
; GCN: s_mov_b32 s33, s8
-; GCN-NOT: s6
-; GCN-NOT: s7
-; GCN: s_mov_b32 s4, s33
-; GCN-NOT: s6
-; GCN-NOT: s7
+
+; GCN: s_mov_b32 s4, s6
+; GCN: s_mov_b32 s6, s7
; GCN: s_mov_b32 s32, s33
-; GCN-NOT: s6
-; GCN-NOT: s7
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
call void @use_workgroup_id_xy()
@@ -265,22 +260,11 @@ define amdgpu_kernel void @kern_indirect
; GCN: s_mov_b32 s33, s9
-; GCN-NOT: s6
-; GCN-NOT: s7
-; GCN-NOT: s8
-
-; GCN: s_mov_b32 s4, s33
-
-; GCN-NOT: s6
-; GCN-NOT: s7
-; GCN-NOT: s8
+; GCN: s_mov_b32 s4, s6
+; GCN: s_mov_b32 s6, s7
+; GCN: s_mov_b32 s7, s8
; GCN: s_mov_b32 s32, s33
-
-; GCN-NOT: s6
-; GCN-NOT: s7
-; GCN-NOT: s8
-
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
call void @use_workgroup_id_xyz()
@@ -293,16 +277,10 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_z = 1
; GCN: s_mov_b32 s33, s8
-; GCN-NOT: s6
-; GCN-NOT: s7
-
-; GCN: s_mov_b32 s4, s33
-; GCN-NOT: s6
-; GCN-NOT: s7
+; GCN: s_mov_b32 s4, s6
+; GCN: s_mov_b32 s6, s7
; GCN: s_mov_b32 s32, s33
-; GCN-NOT: s6
-; GCN-NOT: s7
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
@@ -316,9 +294,8 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_z = 1
; GCN: s_mov_b32 s33, s9
-; GCN: s_mov_b32 s6, s7
-; GCN: s_mov_b32 s4, s33
-; GCN: s_mov_b32 s7, s8
+; GCN: s_mov_b32 s6, s8
+; GCN: s_mov_b32 s4, s7
; GCN: s_mov_b32 s32, s33
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
@@ -328,21 +305,21 @@ define amdgpu_kernel void @kern_indirect
; Argument is in right place already
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
-; GCN-NOT: s6
+; GCN-NOT: s4
define hidden void @func_indirect_use_workgroup_id_x() #1 {
call void @use_workgroup_id_x()
ret void
}
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
-; GCN-NOT: s6
+; GCN-NOT: s4
define hidden void @func_indirect_use_workgroup_id_y() #1 {
call void @use_workgroup_id_y()
ret void
}
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
-; GCN-NOT: s6
+; GCN-NOT: s4
define hidden void @func_indirect_use_workgroup_id_z() #1 {
call void @use_workgroup_id_z()
ret void
@@ -350,7 +327,7 @@ define hidden void @func_indirect_use_wo
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %arg0, i32 addrspace(1)* undef
@@ -360,7 +337,7 @@ define hidden void @other_arg_use_workgr
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %arg0, i32 addrspace(1)* undef
@@ -370,7 +347,7 @@ define hidden void @other_arg_use_workgr
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
-; GCN: ; use s6
+; GCN: ; use s4
define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %arg0, i32 addrspace(1)* undef
@@ -383,12 +360,11 @@ define hidden void @other_arg_use_workgr
; GCN: enable_sgpr_workgroup_id_y = 0
; GCN: enable_sgpr_workgroup_id_z = 0
-; GCN-NOT: s6
; GCN-DAG: s_mov_b32 s33, s7
; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; GCN-DAG: s_mov_b32 s4, s33
+; GCN-DAG: s_mov_b32 s4, s6
; GCN-DAG: s_mov_b32 s32, s33
-; GCN-NOT: s6
+; GCN-NOT: s4
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
call void @other_arg_use_workgroup_id_x(i32 555)
@@ -402,8 +378,8 @@ define amdgpu_kernel void @kern_indirect
; GCN-DAG: s_mov_b32 s33, s8
; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; GCN-DAG: s_mov_b32 s4, s33
-; GCN-DAG: s_mov_b32 s6, s7
+; GCN-DAG: s_mov_b32 s4, s7
+
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
@@ -416,10 +392,8 @@ define amdgpu_kernel void @kern_indirect
; GCN: enable_sgpr_workgroup_id_y = 0
; GCN: enable_sgpr_workgroup_id_z = 1
-; GCN: s_mov_b32 s33, s8
+; GCN-DAG: s_mov_b32 s33, s8
; GCN-DAG: v_mov_b32_e32 v0, 0x22b
-; GCN-DAG: s_mov_b32 s4, s33
-; GCN-DAG: s_mov_b32 s6, s7
; GCN: s_mov_b32 s32, s33
; GCN: s_swappc_b64
@@ -440,9 +414,9 @@ define amdgpu_kernel void @kern_indirect
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: ; use s[12:13]
+; GCN: ; use s4
; GCN: ; use s14
; GCN: ; use s15
-; GCN: ; use s16
define hidden void @use_every_sgpr_input() #1 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@@ -492,7 +466,9 @@ define hidden void @use_every_sgpr_input
; GCN: s_mov_b64 s[10:11], s[8:9]
; GCN: s_mov_b64 s[8:9], s[6:7]
; GCN: s_mov_b64 s[6:7], s[4:5]
-; GCN: s_mov_b32 s4, s33
+; GCN: s_mov_b32 s4, s14
+; GCN: s_mov_b32 s14, s15
+; GCN: s_mov_b32 s15, s16
; GCN: s_mov_b32 s32, s33
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
@@ -519,9 +495,11 @@ define hidden void @func_indirect_use_ev
}
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
-; GCN-DAG: s_mov_b32 s6, s14
-; GCN-DAG: s_mov_b32 s7, s15
-; GCN-DAG: s_mov_b32 s8, s16
+; GCN-NOT: s_mov_b32 s4
+; GCN: s_mov_b32 s6, s14
+; GCN-NEXT: s_mov_b32 s7, s15
+; GCN-NOT: s_mov_b32 s4
+
; GCN: s_swappc_b64
define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
%alloca = alloca i32, align 4, addrspace(5)
@@ -560,20 +538,18 @@ define hidden void @func_use_every_sgpr_
; GCN-DAG: s_add_u32 s32, s32, 0x400
-; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
-; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
-; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
-; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
-; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
-; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
-
+; GCN: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
+; GCN-NOT: s_mov_b32 s4,
; GCN-DAG: s_mov_b32 s6, s14
; GCN-DAG: s_mov_b32 s7, s15
-; GCN-DAG: s_mov_b32 s8, s16
-; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
-; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
-; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
+; GCN: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
+
+; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s4
+; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s14
+; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s15
+
+; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
; GCN: s_swappc_b64
Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll Thu Jun 20 14:58:24 2019
@@ -290,7 +290,6 @@ define void @too_many_args_use_workitem_
; GCN: s_mov_b32 s33, s7
; GCN: s_mov_b32 s32, s33
; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
-; GCN: s_mov_b32 s4, s33
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
call void @too_many_args_use_workitem_id_x(
Modified: llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/chain-hi-to-lo.ll Thu Jun 20 14:58:24 2019
@@ -220,10 +220,10 @@ bb:
}
; GCN-LABEL: {{^}}chain_hi_to_lo_private_other_dep:
-; GFX900: buffer_load_short_d16_hi v1, v0, s[0:3], s4 offen
+; GFX900: buffer_load_short_d16_hi v1, v0, s[0:3], s33 offen
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
-; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:2
+; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s33 offen offset:2
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: s_setpc_b64
Modified: llvm/trunk/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll Thu Jun 20 14:58:24 2019
@@ -32,20 +32,20 @@ define float @call_split_type_used_outsi
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_writelane_b32 v32, s33, 0
-; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: v_writelane_b32 v32, s35, 2
+; GCN-NEXT: v_writelane_b32 v32, s34, 0
+; GCN-NEXT: v_writelane_b32 v32, s35, 1
+; GCN-NEXT: v_writelane_b32 v32, s36, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v2f32 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func_v2f32 at rel32@hi+4
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s36, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_mov_b32 s5, s36
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
-; GCN-NEXT: v_readlane_b32 s35, v32, 2
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: v_readlane_b32 s34, v32, 1
-; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: v_readlane_b32 s36, v32, 2
+; GCN-NEXT: v_readlane_b32 s35, v32, 1
+; GCN-NEXT: v_readlane_b32 s34, v32, 0
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -70,20 +70,20 @@ define float @call_split_type_used_outsi
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_writelane_b32 v32, s33, 0
-; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: v_writelane_b32 v32, s35, 2
+; GCN-NEXT: v_writelane_b32 v32, s34, 0
+; GCN-NEXT: v_writelane_b32 v32, s35, 1
+; GCN-NEXT: v_writelane_b32 v32, s36, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v3f32 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func_v3f32 at rel32@hi+4
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s36, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_mov_b32 s5, s36
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
-; GCN-NEXT: v_readlane_b32 s35, v32, 2
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: v_readlane_b32 s34, v32, 1
-; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: v_readlane_b32 s36, v32, 2
+; GCN-NEXT: v_readlane_b32 s35, v32, 1
+; GCN-NEXT: v_readlane_b32 s34, v32, 0
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -108,20 +108,20 @@ define half @call_split_type_used_outsid
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_writelane_b32 v32, s33, 0
-; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: v_writelane_b32 v32, s35, 2
+; GCN-NEXT: v_writelane_b32 v32, s34, 0
+; GCN-NEXT: v_writelane_b32 v32, s35, 1
+; GCN-NEXT: v_writelane_b32 v32, s36, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v4f16 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func_v4f16 at rel32@hi+4
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s36, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_mov_b32 s5, s36
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
-; GCN-NEXT: v_readlane_b32 s35, v32, 2
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: v_readlane_b32 s34, v32, 1
-; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: v_readlane_b32 s36, v32, 2
+; GCN-NEXT: v_readlane_b32 s35, v32, 1
+; GCN-NEXT: v_readlane_b32 s34, v32, 0
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -146,21 +146,21 @@ define { i32, half } @call_split_type_us
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_writelane_b32 v32, s33, 0
-; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: v_writelane_b32 v32, s35, 2
+; GCN-NEXT: v_writelane_b32 v32, s34, 0
+; GCN-NEXT: v_writelane_b32 v32, s35, 1
+; GCN-NEXT: v_writelane_b32 v32, s36, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_struct at rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func_struct at rel32@hi+4
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
-; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_mov_b32 s36, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
-; GCN-NEXT: v_readlane_b32 s35, v32, 2
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN-NEXT: v_readlane_b32 s34, v32, 1
+; GCN-NEXT: s_mov_b32 s5, s36
; GCN-NEXT: v_mov_b32_e32 v1, v4
-; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
+; GCN-NEXT: v_readlane_b32 s36, v32, 2
+; GCN-NEXT: v_readlane_b32 s35, v32, 1
+; GCN-NEXT: v_readlane_b32 s34, v32, 0
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Thu Jun 20 14:58:24 2019
@@ -7,10 +7,10 @@
; Materialize into a mov. Make sure there isn't an unnecessary copy.
; GCN-LABEL: {{^}}func_mov_fi_i32:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 s6, s32, s4
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 v0, s6, 6
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s6
+; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
+; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB]]
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
@@ -24,22 +24,22 @@ define void @func_mov_fi_i32() #0 {
; GCN-LABEL: {{^}}func_mov_fi_i32_offset:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI: s_sub_u32 s6, s32, s4
-; CI-NEXT: v_lshr_b32_e64 v0, s6, 6
+; CI: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
-; CI: s_sub_u32 s6, s32, s4
-; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
+; CI: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
; CI-NEXT: v_add_i32_e64 v1, s[6:7], 4, [[SCALED]]
; CI-NOT: v_mov
; CI: ds_write_b32 v0, v0
; CI-NEXT: ds_write_b32 v0, v1
-; GFX9: s_sub_u32 s6, s32, s4
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s6
+; GFX9: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB]]
; GFX9-DAG: ds_write_b32 v0, v0
-; GFX9-DAG: s_sub_u32 s6, s32, s4
-; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6
+; GFX9-DAG: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
; GFX9-NEXT: ds_write_b32 v0, v0
define void @func_mov_fi_i32_offset() #0 {
@@ -55,12 +55,12 @@ define void @func_mov_fi_i32_offset() #0
; GCN-LABEL: {{^}}func_add_constant_to_fi_i32:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 s6, s32, s4
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
+; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
-; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6
+; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
@@ -77,11 +77,11 @@ define void @func_add_constant_to_fi_i32
; into.
; GCN-LABEL: {{^}}func_other_fi_user_i32:
-; GCN: s_sub_u32 s6, s32, s4
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 v0, s6, 6
+; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s6
+; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB]]
; GCN-NEXT: v_mul_u32_u24_e32 v0, 9, v0
; GCN-NOT: v_mov
@@ -96,7 +96,7 @@ define void @func_other_fi_user_i32() #0
; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr:
; GCN: v_mov_b32_e32 v1, 15{{$}}
-; GCN: buffer_store_dword v1, v0, s[0:3], s4 offen{{$}}
+; GCN: buffer_store_dword v1, v0, s[0:3], s33 offen{{$}}
define void @func_store_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
store volatile i32 15, i32 addrspace(5)* %ptr
ret void
@@ -104,7 +104,7 @@ define void @func_store_private_arg_i32_
; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr:
; GCN: s_waitcnt
-; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s4 offen{{$}}
+; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s33 offen{{$}}
define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
%val = load volatile i32, i32 addrspace(5)* %ptr
ret void
@@ -112,7 +112,7 @@ define void @func_load_private_arg_i32_p
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
; GCN: s_waitcnt
-; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s4
+; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s33
; CI-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]]
@@ -148,7 +148,7 @@ define void @void_func_byval_struct_i8_i
; FrameIndex is hidden behind a CopyFromReg in the second block.
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
-; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s4
+; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s33
; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
@@ -157,10 +157,10 @@ define void @void_func_byval_struct_i8_i
; GCN: s_and_saveexec_b64
; CI: v_add_i32_e32 v0, vcc, 4, [[SHIFT]]
-; CI: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4{{$}}
+; CI: buffer_load_dword v1, v1, s[0:3], s33 offen offset:4{{$}}
; GFX9: v_add_u32_e32 v0, 4, [[SHIFT]]
-; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}}
+; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s33 offen offset:4{{$}}
; GCN: ds_write_b32
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 {
@@ -180,14 +180,14 @@ ret:
; Added offset can't be used with VOP3 add
; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
-; GCN: s_sub_u32 s6, s32, s4
-; GCN-DAG: s_movk_i32 s6, 0x200
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GCN-DAG: s_movk_i32 [[K:s[0-9]+]], 0x200
-; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
-; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s[6:7], s6, [[SCALED]]
+; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
+; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s[6:7], [[K]], [[SCALED]]
-; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6
-; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], s6, [[SCALED]]
+; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
+; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[K]], [[SCALED]]
; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]]
; GCN: ds_write_b32 v0, [[VZ]]
@@ -204,7 +204,7 @@ define void @func_other_fi_user_non_inli
}
; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
-; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s32, s4
+; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s32, s33
; GCN-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x200
; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6
@@ -261,7 +261,7 @@ bb5:
; GCN-LABEL: {{^}}alloca_ptr_nonentry_block:
; GCN: s_and_saveexec_b64
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
-; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s4
+; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s33
; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll Thu Jun 20 14:58:24 2019
@@ -389,8 +389,8 @@ define {i8, i32} @struct_i8_i32_func_voi
; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
-; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}}
-; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}}
+; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s33 offen{{$}}
+; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s33 offen offset:4{{$}}
define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 {
%val0 = load volatile i8, i8 addrspace(1)* undef
%val1 = load volatile i32, i32 addrspace(1)* undef
@@ -406,39 +406,39 @@ define void @void_func_sret_struct_i8_i3
; AssertZext inserted. Not using it introduces the spills.
; GCN-LABEL: {{^}}v33i32_func_void:
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define <33 x i32> @v33i32_func_void() #0 {
@@ -448,39 +448,39 @@ define <33 x i32> @v33i32_func_void() #0
}
; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
@@ -490,39 +490,39 @@ define { <32 x i32>, i32 } @struct_v32i3
}
; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}}
-; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:132{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:136{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:140{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:144{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:148{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:152{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:156{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:160{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:164{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:168{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:172{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:176{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:180{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:184{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:188{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:192{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:196{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:200{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:204{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:208{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:212{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:216{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:220{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:224{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:228{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:232{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:236{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:240{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:244{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:248{{$}}
+; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:252{{$}}
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64
define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/known-never-snan.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/known-never-snan.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/known-never-snan.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/known-never-snan.ll Thu Jun 20 14:58:24 2019
@@ -67,8 +67,8 @@ define float @v_test_known_not_snan_copy
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_rcp_f32_e32 v0, v0
-; GCN-NEXT: s_brev_b32 s6, -2
-; GCN-NEXT: v_bfi_b32 v0, s6, v0, v1
+; GCN-NEXT: s_brev_b32 s4, -2
+; GCN-NEXT: v_bfi_b32 v0, s4, v0, v1
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%a.nnan.add = fdiv nnan float 1.0, %a
@@ -455,9 +455,9 @@ define float @v_test_known_not_snan_roun
; GCN-LABEL: v_test_known_not_snan_round_input_fmed3_r_i_i_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_brev_b32 s6, -2
+; GCN-NEXT: s_brev_b32 s4, -2
; GCN-NEXT: v_trunc_f32_e32 v2, v0
-; GCN-NEXT: v_bfi_b32 v1, s6, 1.0, v0
+; GCN-NEXT: v_bfi_b32 v1, s4, 1.0, v0
; GCN-NEXT: v_sub_f32_e32 v0, v0, v2
; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, 0.5
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Thu Jun 20 14:58:24 2019
@@ -217,9 +217,10 @@ define void @opencl_func_kernarg_implici
}
; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func:
+; GCN: s_add_u32 s8, s4, 0x70
+; GCN: s_addc_u32 s9, s5, 0
+
; GCN: s_mov_b64 s[6:7], s[4:5]
-; GCN: s_add_u32 s8, s6, 0x70
-; GCN: s_addc_u32 s9, s7, 0
; GCN: s_swappc_b64
define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
call void @func_kernarg_implicitarg_ptr()
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-hi16.ll Thu Jun 20 14:58:24 2019
@@ -532,13 +532,13 @@ entry:
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff:
; GCN: s_waitcnt
-; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s4 offset:4094{{$}}
+; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s33 offset:4094{{$}}
; GFX900: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_hi_v2i16_reglo_vreg_nooff(i16 addrspace(5)* byval %in, i16 %reg) #0 {
entry:
%load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*)
@@ -550,13 +550,13 @@ entry:
; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_short_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_short_d16_hi v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_hi_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, half %reg) #0 {
entry:
%load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*)
@@ -650,13 +650,13 @@ entry:
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff_zexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
%load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
@@ -669,13 +669,13 @@ entry:
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_nooff_sexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_sbyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_sbyte_d16_hi v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_sbyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i16 %reg) #0 {
entry:
%load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
@@ -688,13 +688,13 @@ entry:
; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_nooff_zexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_ubyte_d16_hi v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, half %reg) #0 {
entry:
%load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*)
Modified: llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/load-lo16.ll Thu Jun 20 14:58:24 2019
@@ -650,13 +650,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
@@ -668,13 +668,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
@@ -686,13 +686,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_short_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
@@ -744,13 +744,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
@@ -763,13 +763,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_sbyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x i16>
@@ -782,13 +782,13 @@ entry:
; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
+; GFX900-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s33 offset:4094{{$}}
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
; GFX900-NEXT: s_waitcnt
; GFX900-NEXT: s_setpc_b64
-; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ubyte v0, off, s[0:3], s33 offset:4094{{$}}
define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 {
entry:
%reg.bc = bitcast i32 %reg to <2 x half>
Modified: llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll Thu Jun 20 14:58:24 2019
@@ -234,7 +234,7 @@ entry:
; W64-O0-LABEL: mubuf_vgpr_outside_entry
; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s4
-; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], [[IDX_S]]
+; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s4
; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s[0:3], s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
Modified: llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll Thu Jun 20 14:58:24 2019
@@ -249,7 +249,7 @@ entry:
; GCN-DAG: v_and_b32_e32 v1, [[U23_MASK]], v1
; GCN-DAG: v_mul_u32_u24_e32 v0, 0xea, v0
; GCN-DAG: v_mul_u32_u24_e32 v1, 0x39b, v1
-; GCN: v_and_b32_e32 v1, s6, v1
+; GCN: v_and_b32_e32 v1, s4, v1
; GCN: v_and_b32_e32 v0, 0x7ffffe, v0
; GCN: v_mul_u32_u24_e32 v0, v0, v1
; GCN: v_and_b32_e32 v0, 0x1fffe, v0
Modified: llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/nested-calls.ll Thu Jun 20 14:58:24 2019
@@ -16,15 +16,15 @@ declare void @external_void_func_i32(i32
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN-DAG: v_writelane_b32 v32, s33, 0
-; GCN-DAG: v_writelane_b32 v32, s34, 1
-; GCN-DAG: v_writelane_b32 v32, s35, 2
+; GCN-DAG: v_writelane_b32 v32, s34, 0
+; GCN-DAG: v_writelane_b32 v32, s35, 1
+; GCN-DAG: v_writelane_b32 v32, s36, 2
; GCN: s_swappc_b64
-; GCN: v_readlane_b32 s35, v32, 2
-; GCN: v_readlane_b32 s34, v32, 1
-; GCN: v_readlane_b32 s33, v32, 0
+; GCN: v_readlane_b32 s36, v32, 2
+; GCN: v_readlane_b32 s35, v32, 1
+; GCN: v_readlane_b32 s34, v32, 0
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir Thu Jun 20 14:58:24 2019
@@ -17,7 +17,7 @@ stack:
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- scratchWaveOffsetReg: $sgpr5
+ scratchWaveOffsetReg: $sgpr33
frameOffsetReg: $sgpr5
stackPtrOffsetReg: $sgpr32
@@ -25,9 +25,9 @@ body: |
; CHECK-LABEL: name: scavenge_register_position
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
- ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
+ ; CHECK: liveins: $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: $sgpr4 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
+ ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_ptr.ll Thu Jun 20 14:58:24 2019
@@ -333,10 +333,10 @@ define void @shl_add_ptr_combine_2use_bo
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_private:
; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 2, v0
-; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:16
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s33 offen offset:16
; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 3, v0
-; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s4 offen offset:32
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s33 offen offset:32
define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
%idx = zext i16 %idx.arg to i32
%idx.add = add nuw i32 %idx, 4
@@ -352,9 +352,9 @@ define void @shl_add_ptr_combine_2use_pr
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_max_private_offset:
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:4088
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s33 offen offset:4088
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
-; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s4 offen{{$}}
+; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s33 offen{{$}}
define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
%idx = zext i16 %idx.arg to i32
%idx.add = add nuw i32 %idx, 511
@@ -370,8 +370,8 @@ define void @shl_add_ptr_combine_2use_ma
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s4 offen{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s33 offen{{$}}
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s33 offen{{$}}
define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.arg) #0 {
%idx = zext i16 %idx.arg to i32
%idx.add = add nuw i32 %idx, 256
Modified: llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll Thu Jun 20 14:58:24 2019
@@ -117,7 +117,7 @@ entry:
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_byval_i32:
; GCN-NOT: v0
; GCN-NOT: s32
-; GCN: buffer_load_dword v1, off, s[0:3], s4 offset:16
+; GCN: buffer_load_dword v1, off, s[0:3], s33 offset:16
; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
; GCN-NEXT: s_setpc_b64
define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
@@ -211,8 +211,8 @@ entry:
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
; GCN: buffer_store_dword v33, off, s[0:3], s5 ; 4-byte Folded Spill
-; GCN-DAG: v_writelane_b32 v34, s33, 0
-; GCN-DAG: v_writelane_b32 v34, s34, 1
+; GCN-DAG: v_writelane_b32 v34, s34, 0
+; GCN-DAG: v_writelane_b32 v34, s35, 1
; GCN-DAG: s_getpc_b64
; GCN: s_swappc_b64
@@ -221,8 +221,8 @@ entry:
; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32 at rel32@lo+4
; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32 at rel32@hi+4
-; GCN-DAG: v_readlane_b32 s33, v34, 0
-; GCN-DAG: v_readlane_b32 s34, v34, 1
+; GCN-DAG: v_readlane_b32 s34, v34, 0
+; GCN-DAG: v_readlane_b32 s35, v34, 1
; GCN: buffer_load_dword v33, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-offset-calculation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-offset-calculation.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-offset-calculation.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-offset-calculation.ll Thu Jun 20 14:58:24 2019
@@ -220,8 +220,8 @@ entry:
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
; 0x40000 / 64 = 4096 (for wave64)
- ; CHECK: s_add_u32 s6, s32, 0x40000
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
+ ; CHECK: s_add_u32 s4, s32, 0x40000
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
; Force %a to spill
@@ -272,9 +272,9 @@ entry:
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
; 0x3ff00 / 64 = 4092 (for wave64)
- ; CHECK: s_add_u32 s6, s32, 0x3ff00
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
+ ; CHECK: s_add_u32 s4, s32, 0x3ff00
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
@@ -293,4 +293,4 @@ entry:
attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" }
-attributes #2 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
+attributes #2 = { nounwind "amdgpu-num-sgpr"="15" "amdgpu-num-vgpr"="8" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll Thu Jun 20 14:58:24 2019
@@ -9,18 +9,18 @@
; = 144 bytes with padding between them
; GCN-LABEL: {{^}}needs_align16_default_stack_align:
-; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s4
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
; GCN-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, v0
; GCN-DAG: v_lshrrev_b32_e64 [[FRAMEDIFF:v[0-9]+]], 6, [[SUB]]
; GCN: v_add_u32_e32 [[FI:v[0-9]+]], vcc, [[FRAMEDIFF]], [[SCALED_IDX]]
; GCN-NOT: s32
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN-NOT: s32
@@ -34,14 +34,14 @@ define void @needs_align16_default_stack
; GCN-LABEL: {{^}}needs_align16_stack_align4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
-; GCN: s_and_b32 s5, s6, 0xfffffc00
+; GCN: s_and_b32 s5, [[SCRATCH_REG]], 0xfffffc00
; GCN: s_add_u32 s32, s32, 0x2800{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: s_sub_u32 s32, s32, 0x2800
@@ -55,14 +55,14 @@ define void @needs_align16_stack_align4(
; GCN-LABEL: {{^}}needs_align32:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
-; GCN: s_and_b32 s5, s6, 0xfffff800
+; GCN: s_and_b32 s5, [[SCRATCH_REG]], 0xfffff800
; GCN: s_add_u32 s32, s32, 0x3000{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: s_sub_u32 s32, s32, 0x3000
@@ -76,10 +76,10 @@ define void @needs_align32(i32 %idx) #0
; GCN-LABEL: {{^}}force_realign4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
-; GCN: s_and_b32 s5, s6, 0xffffff00
+; GCN: s_and_b32 s5, [[SCRATCH_REG]], 0xffffff00
; GCN: s_add_u32 s32, s32, 0xd00{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
+; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s33 offen
; GCN: s_sub_u32 s32, s32, 0xd00
; GCN: ; ScratchSize: 52
Modified: llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store-hi16.ll Thu Jun 20 14:58:24 2019
@@ -389,10 +389,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_v2i16:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
-; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -408,10 +408,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_v2f16:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
-; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -427,10 +427,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_i32_shift:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -445,10 +445,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s33 offen{{$}}
; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
+; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s33 offen{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -464,10 +464,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_i8_shift:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s33 offen{{$}}
; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
+; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s33 offen{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -502,10 +502,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
+; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s33{{$}}
; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
+; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s33{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
@@ -522,10 +522,10 @@ entry:
; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
; GCN: s_waitcnt
-; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
+; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s33{{$}}
; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
-; NO-D16-HI: buffer_store_byte v0, off, s[0:3], s4{{$}}
+; NO-D16-HI: buffer_store_byte v0, off, s[0:3], s33{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
Modified: llvm/trunk/test/CodeGen/AMDGPU/wave32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wave32.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wave32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wave32.ll Thu Jun 20 14:58:24 2019
@@ -1080,13 +1080,13 @@ declare void @external_void_func_void()
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
-; GCN-DAG: v_writelane_b32 v32, s33, 0
-; GCN-DAG: v_writelane_b32 v32, s34, 1
-; GCN-DAG: s_mov_b32 s33, s5
+; GCN-DAG: v_writelane_b32 v32, s34, 0
+; GCN-DAG: v_writelane_b32 v32, s35, 1
+; GCN-DAG: s_mov_b32 [[COPY_FP:s[0-9]+]], s5
; GCN: s_swappc_b64
-; GCN-DAG: s_mov_b32 s5, s33
-; GCN-DAG: v_readlane_b32 s34, v32, 1
-; GCN-DAG: v_readlane_b32 s33, v32, 0
+; GCN-DAG: s_mov_b32 s5, [[COPY_FP]]
+; GCN-DAG: v_readlane_b32 s35, v32, 1
+; GCN-DAG: v_readlane_b32 s34, v32, 0
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/machine-function-info.ll?rev=363990&r1=363989&r2=363990&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/machine-function-info.ll (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/machine-function-info.ll Thu Jun 20 14:58:24 2019
@@ -54,7 +54,7 @@ define amdgpu_ps void @ps_shader(i32 %ar
; CHECK-NEXT: memoryBound: false
; CHECK-NEXT: waveLimiter: false
; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr4'
+; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr33'
; CHECK-NEXT: frameOffsetReg: '$sgpr5'
; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
; CHECK-NEXT: body:
@@ -72,7 +72,7 @@ define void @function() {
; CHECK-NEXT: memoryBound: false
; CHECK-NEXT: waveLimiter: false
; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr4'
+; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr33'
; CHECK-NEXT: frameOffsetReg: '$sgpr5'
; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
; CHECK-NEXT: body:
More information about the llvm-commits
mailing list