[llvm] b8b491c - AMDGPU: Add infinite looping testcase after subrange spilling change

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 27 01:42:20 PDT 2023


Author: Matt Arsenault
Date: 2023-10-27T17:42:14+09:00
New Revision: b8b491c9d7b19db00830caea2fd2b7d792d9c8cb

URL: https://github.com/llvm/llvm-project/commit/b8b491c9d7b19db00830caea2fd2b7d792d9c8cb
DIFF: https://github.com/llvm/llvm-project/commit/b8b491c9d7b19db00830caea2fd2b7d792d9c8cb.diff

LOG: AMDGPU: Add infinite looping testcase after subrange spilling change

This infinite looped after d8127b2ba8a87a610851b9a462f2fc2526c36e37

Added: 
    llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
    llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
new file mode 100644
index 000000000000000..27fa6712f9eb429
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s
+
+define void @main(i1 %arg) #0 {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
+; CHECK-NEXT:    v_writelane_b32 v1, s30, 0
+; CHECK-NEXT:    v_writelane_b32 v1, s31, 1
+; CHECK-NEXT:    v_writelane_b32 v1, s36, 2
+; CHECK-NEXT:    v_writelane_b32 v1, s37, 3
+; CHECK-NEXT:    v_writelane_b32 v1, s38, 4
+; CHECK-NEXT:    v_writelane_b32 v1, s39, 5
+; CHECK-NEXT:    v_writelane_b32 v1, s40, 6
+; CHECK-NEXT:    v_writelane_b32 v1, s41, 7
+; CHECK-NEXT:    v_writelane_b32 v1, s42, 8
+; CHECK-NEXT:    v_writelane_b32 v1, s43, 9
+; CHECK-NEXT:    v_writelane_b32 v1, s44, 10
+; CHECK-NEXT:    v_writelane_b32 v1, s45, 11
+; CHECK-NEXT:    v_writelane_b32 v1, s46, 12
+; CHECK-NEXT:    v_writelane_b32 v1, s47, 13
+; CHECK-NEXT:    v_writelane_b32 v1, s48, 14
+; CHECK-NEXT:    v_writelane_b32 v1, s49, 15
+; CHECK-NEXT:    s_getpc_b64 s[24:25]
+; CHECK-NEXT:    v_writelane_b32 v1, s50, 16
+; CHECK-NEXT:    s_movk_i32 s4, 0xf0
+; CHECK-NEXT:    s_mov_b32 s5, s24
+; CHECK-NEXT:    v_writelane_b32 v1, s51, 17
+; CHECK-NEXT:    s_load_dwordx16 s[36:51], s[4:5], 0x0
+; CHECK-NEXT:    ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_load_dwordx4 s[28:31], s[4:5], 0x0
+; CHECK-NEXT:    s_movk_i32 s4, 0x130
+; CHECK-NEXT:    s_mov_b32 s5, s24
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v4, s36, 0
+; CHECK-NEXT:    v_writelane_b32 v4, s37, 1
+; CHECK-NEXT:    v_writelane_b32 v4, s38, 2
+; CHECK-NEXT:    v_writelane_b32 v4, s39, 3
+; CHECK-NEXT:    v_writelane_b32 v4, s40, 4
+; CHECK-NEXT:    v_writelane_b32 v4, s41, 5
+; CHECK-NEXT:    v_writelane_b32 v4, s42, 6
+; CHECK-NEXT:    v_writelane_b32 v4, s43, 7
+; CHECK-NEXT:    v_writelane_b32 v4, s44, 8
+; CHECK-NEXT:    v_writelane_b32 v4, s45, 9
+; CHECK-NEXT:    v_writelane_b32 v4, s46, 10
+; CHECK-NEXT:    s_load_dwordx16 s[4:19], s[4:5], 0x0
+; CHECK-NEXT:    v_writelane_b32 v4, s47, 11
+; CHECK-NEXT:    v_writelane_b32 v4, s48, 12
+; CHECK-NEXT:    v_writelane_b32 v4, s49, 13
+; CHECK-NEXT:    s_mov_b32 s20, 0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_writelane_b32 v4, s50, 14
+; CHECK-NEXT:    v_mov_b32_e32 v5, s28
+; CHECK-NEXT:    v_mov_b32_e32 v6, v2
+; CHECK-NEXT:    s_mov_b32 s21, s20
+; CHECK-NEXT:    s_mov_b32 s22, s20
+; CHECK-NEXT:    s_mov_b32 s23, s20
+; CHECK-NEXT:    v_writelane_b32 v4, s51, 15
+; CHECK-NEXT:    v_mov_b32_e32 v3, v2
+; CHECK-NEXT:    image_sample_lz v5, v[5:6], s[44:51], s[20:23] dmask:0x1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v4, s4, 16
+; CHECK-NEXT:    v_writelane_b32 v1, s52, 18
+; CHECK-NEXT:    v_writelane_b32 v4, s5, 17
+; CHECK-NEXT:    v_writelane_b32 v1, s53, 19
+; CHECK-NEXT:    v_writelane_b32 v4, s6, 18
+; CHECK-NEXT:    v_writelane_b32 v1, s54, 20
+; CHECK-NEXT:    image_sample_lz v6, v[2:3], s[4:11], s[20:23] dmask:0x1
+; CHECK-NEXT:    v_writelane_b32 v4, s7, 19
+; CHECK-NEXT:    v_writelane_b32 v1, s55, 21
+; CHECK-NEXT:    v_writelane_b32 v4, s8, 20
+; CHECK-NEXT:    v_writelane_b32 v1, s56, 22
+; CHECK-NEXT:    v_writelane_b32 v4, s9, 21
+; CHECK-NEXT:    v_writelane_b32 v1, s57, 23
+; CHECK-NEXT:    v_writelane_b32 v4, s10, 22
+; CHECK-NEXT:    v_writelane_b32 v1, s58, 24
+; CHECK-NEXT:    v_writelane_b32 v4, s11, 23
+; CHECK-NEXT:    v_writelane_b32 v1, s59, 25
+; CHECK-NEXT:    v_writelane_b32 v4, s12, 24
+; CHECK-NEXT:    v_writelane_b32 v1, s60, 26
+; CHECK-NEXT:    v_writelane_b32 v4, s13, 25
+; CHECK-NEXT:    v_writelane_b32 v1, s61, 27
+; CHECK-NEXT:    v_writelane_b32 v4, s14, 26
+; CHECK-NEXT:    v_writelane_b32 v1, s62, 28
+; CHECK-NEXT:    v_writelane_b32 v4, s15, 27
+; CHECK-NEXT:    v_writelane_b32 v1, s63, 29
+; CHECK-NEXT:    v_writelane_b32 v4, s16, 28
+; CHECK-NEXT:    v_writelane_b32 v1, s64, 30
+; CHECK-NEXT:    v_writelane_b32 v4, s17, 29
+; CHECK-NEXT:    v_writelane_b32 v1, s65, 31
+; CHECK-NEXT:    v_writelane_b32 v4, s18, 30
+; CHECK-NEXT:    v_writelane_b32 v1, s66, 32
+; CHECK-NEXT:    v_writelane_b32 v4, s19, 31
+; CHECK-NEXT:    s_mov_b32 s4, 48
+; CHECK-NEXT:    s_movk_i32 s28, 0x2f0
+; CHECK-NEXT:    s_mov_b32 s5, s24
+; CHECK-NEXT:    s_mov_b32 s29, s24
+; CHECK-NEXT:    v_writelane_b32 v1, s67, 33
+; CHECK-NEXT:    s_movk_i32 s26, 0x1f0
+; CHECK-NEXT:    s_mov_b32 s27, s24
+; CHECK-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    s_load_dwordx16 s[36:51], s[26:27], 0x0
+; CHECK-NEXT:    s_load_dwordx16 s[52:67], s[28:29], 0x0
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; CHECK-NEXT:    s_xor_b64 s[24:25], vcc, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v0, v6, v5
+; CHECK-NEXT:    s_and_saveexec_b64 s[26:27], s[24:25]
+; CHECK-NEXT:    s_xor_b64 s[26:27], exec, s[26:27]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_3
+; CHECK-NEXT:  ; %bb.1: ; %bb48
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v4, s36, 32
+; CHECK-NEXT:    v_writelane_b32 v4, s37, 33
+; CHECK-NEXT:    v_writelane_b32 v4, s38, 34
+; CHECK-NEXT:    v_writelane_b32 v4, s39, 35
+; CHECK-NEXT:    v_writelane_b32 v4, s40, 36
+; CHECK-NEXT:    v_writelane_b32 v4, s41, 37
+; CHECK-NEXT:    v_writelane_b32 v4, s42, 38
+; CHECK-NEXT:    v_writelane_b32 v4, s43, 39
+; CHECK-NEXT:    v_writelane_b32 v4, s44, 40
+; CHECK-NEXT:    v_writelane_b32 v4, s45, 41
+; CHECK-NEXT:    v_writelane_b32 v4, s46, 42
+; CHECK-NEXT:    v_writelane_b32 v4, s47, 43
+; CHECK-NEXT:    v_writelane_b32 v4, s48, 44
+; CHECK-NEXT:    v_writelane_b32 v4, s49, 45
+; CHECK-NEXT:    v_writelane_b32 v4, s50, 46
+; CHECK-NEXT:    v_writelane_b32 v4, s51, 47
+; CHECK-NEXT:    v_readlane_b32 s36, v4, 0
+; CHECK-NEXT:    v_readlane_b32 s44, v4, 8
+; CHECK-NEXT:    v_readlane_b32 s45, v4, 9
+; CHECK-NEXT:    v_readlane_b32 s46, v4, 10
+; CHECK-NEXT:    v_readlane_b32 s47, v4, 11
+; CHECK-NEXT:    v_readlane_b32 s48, v4, 12
+; CHECK-NEXT:    v_readlane_b32 s49, v4, 13
+; CHECK-NEXT:    v_readlane_b32 s50, v4, 14
+; CHECK-NEXT:    v_readlane_b32 s51, v4, 15
+; CHECK-NEXT:    v_readlane_b32 s37, v4, 1
+; CHECK-NEXT:    v_readlane_b32 s38, v4, 2
+; CHECK-NEXT:    v_readlane_b32 s39, v4, 3
+; CHECK-NEXT:    v_readlane_b32 s40, v4, 4
+; CHECK-NEXT:    v_readlane_b32 s41, v4, 5
+; CHECK-NEXT:    image_sample_lz v5, v[2:3], s[44:51], s[20:23] dmask:0x1
+; CHECK-NEXT:    v_readlane_b32 s42, v4, 6
+; CHECK-NEXT:    v_readlane_b32 s43, v4, 7
+; CHECK-NEXT:    v_readlane_b32 s36, v4, 32
+; CHECK-NEXT:    v_readlane_b32 s37, v4, 33
+; CHECK-NEXT:    v_readlane_b32 s38, v4, 34
+; CHECK-NEXT:    v_readlane_b32 s39, v4, 35
+; CHECK-NEXT:    v_readlane_b32 s40, v4, 36
+; CHECK-NEXT:    v_readlane_b32 s41, v4, 37
+; CHECK-NEXT:    v_readlane_b32 s42, v4, 38
+; CHECK-NEXT:    v_readlane_b32 s43, v4, 39
+; CHECK-NEXT:    v_readlane_b32 s44, v4, 40
+; CHECK-NEXT:    v_readlane_b32 s45, v4, 41
+; CHECK-NEXT:    v_readlane_b32 s46, v4, 42
+; CHECK-NEXT:    v_readlane_b32 s47, v4, 43
+; CHECK-NEXT:    v_readlane_b32 s48, v4, 44
+; CHECK-NEXT:    v_readlane_b32 s49, v4, 45
+; CHECK-NEXT:    v_readlane_b32 s50, v4, 46
+; CHECK-NEXT:    v_readlane_b32 s51, v4, 47
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    s_and_b64 vcc, exec, -1
+; CHECK-NEXT:  .LBB0_2: ; %bb50
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_mov_b32 s21, s20
+; CHECK-NEXT:    s_mov_b32 s22, s20
+; CHECK-NEXT:    s_mov_b32 s23, s20
+; CHECK-NEXT:    image_sample_lz v6, v[2:3], s[44:51], s[8:11] dmask:0x1
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    image_sample_lz v2, v[2:3], s[60:67], s[20:23] dmask:0x1
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v2, v2, v6
+; CHECK-NEXT:    v_mul_f32_e32 v2, v2, v0
+; CHECK-NEXT:    v_mul_f32_e32 v2, v2, v5
+; CHECK-NEXT:    s_mov_b64 vcc, vcc
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT:  .LBB0_3: ; %Flow14
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[28:29], s[26:27]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_10
+; CHECK-NEXT:  ; %bb.4: ; %bb32
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[24:25]
+; CHECK-NEXT:    s_xor_b64 vcc, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_6
+; CHECK-NEXT:  ; %bb.5: ; %bb43
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    s_mov_b64 s[12:13], s[36:37]
+; CHECK-NEXT:    s_mov_b32 s9, s8
+; CHECK-NEXT:    v_mov_b32_e32 v2, s8
+; CHECK-NEXT:    s_mov_b64 s[14:15], s[38:39]
+; CHECK-NEXT:    s_mov_b64 s[16:17], s[40:41]
+; CHECK-NEXT:    s_mov_b64 s[18:19], s[42:43]
+; CHECK-NEXT:    v_readlane_b32 s36, v4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, s9
+; CHECK-NEXT:    s_mov_b32 s10, s8
+; CHECK-NEXT:    s_mov_b32 s11, s8
+; CHECK-NEXT:    v_readlane_b32 s37, v4, 1
+; CHECK-NEXT:    v_readlane_b32 s38, v4, 2
+; CHECK-NEXT:    v_readlane_b32 s39, v4, 3
+; CHECK-NEXT:    v_readlane_b32 s40, v4, 4
+; CHECK-NEXT:    v_readlane_b32 s41, v4, 5
+; CHECK-NEXT:    v_readlane_b32 s42, v4, 6
+; CHECK-NEXT:    v_readlane_b32 s43, v4, 7
+; CHECK-NEXT:    v_mov_b32_e32 v6, 0
+; CHECK-NEXT:    v_mov_b32_e32 v7, v6
+; CHECK-NEXT:    v_readlane_b32 s44, v4, 8
+; CHECK-NEXT:    v_readlane_b32 s45, v4, 9
+; CHECK-NEXT:    v_readlane_b32 s46, v4, 10
+; CHECK-NEXT:    image_sample_lz v5, v[2:3], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT:    s_mov_b64 s[42:43], s[18:19]
+; CHECK-NEXT:    s_mov_b64 s[40:41], s[16:17]
+; CHECK-NEXT:    s_mov_b64 s[38:39], s[14:15]
+; CHECK-NEXT:    s_mov_b64 s[36:37], s[12:13]
+; CHECK-NEXT:    v_readlane_b32 s12, v4, 16
+; CHECK-NEXT:    v_readlane_b32 s20, v4, 24
+; CHECK-NEXT:    v_readlane_b32 s21, v4, 25
+; CHECK-NEXT:    v_readlane_b32 s22, v4, 26
+; CHECK-NEXT:    v_readlane_b32 s23, v4, 27
+; CHECK-NEXT:    v_readlane_b32 s24, v4, 28
+; CHECK-NEXT:    v_readlane_b32 s25, v4, 29
+; CHECK-NEXT:    v_readlane_b32 s26, v4, 30
+; CHECK-NEXT:    v_readlane_b32 s27, v4, 31
+; CHECK-NEXT:    v_readlane_b32 s47, v4, 11
+; CHECK-NEXT:    v_readlane_b32 s48, v4, 12
+; CHECK-NEXT:    v_readlane_b32 s49, v4, 13
+; CHECK-NEXT:    v_readlane_b32 s50, v4, 14
+; CHECK-NEXT:    v_readlane_b32 s51, v4, 15
+; CHECK-NEXT:    image_sample_lz v2, v[2:3], s[20:27], s[4:7] dmask:0x1
+; CHECK-NEXT:    v_readlane_b32 s13, v4, 17
+; CHECK-NEXT:    v_readlane_b32 s14, v4, 18
+; CHECK-NEXT:    v_readlane_b32 s15, v4, 19
+; CHECK-NEXT:    v_readlane_b32 s16, v4, 20
+; CHECK-NEXT:    v_readlane_b32 s17, v4, 21
+; CHECK-NEXT:    v_readlane_b32 s18, v4, 22
+; CHECK-NEXT:    v_readlane_b32 s19, v4, 23
+; CHECK-NEXT:    ; implicit-def: $vgpr0
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    buffer_store_dwordx3 v[5:7], off, s[8:11], 0
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    buffer_store_dwordx4 v[2:5], off, s[8:11], 0
+; CHECK-NEXT:  .LBB0_6: ; %Flow12
+; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT:    s_cbranch_execz .LBB0_9
+; CHECK-NEXT:  ; %bb.7: ; %bb33.preheader
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    s_mov_b32 s6, s8
+; CHECK-NEXT:    s_mov_b32 s7, s8
+; CHECK-NEXT:    v_mov_b32_e32 v2, s6
+; CHECK-NEXT:    s_mov_b32 s9, s8
+; CHECK-NEXT:    s_mov_b32 s10, s8
+; CHECK-NEXT:    s_mov_b32 s11, s8
+; CHECK-NEXT:    v_mov_b32_e32 v3, s7
+; CHECK-NEXT:    image_sample_lz v5, v[2:3], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT:    image_sample_lz v6, v[2:3], s[52:59], s[8:11] dmask:0x1
+; CHECK-NEXT:    s_and_b64 vcc, exec, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v2, v6, v5
+; CHECK-NEXT:    v_mul_f32_e32 v0, v2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:  .LBB0_8: ; %bb33
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_f32_e32 v3, v2, v0
+; CHECK-NEXT:    v_sub_f32_e32 v2, v2, v3
+; CHECK-NEXT:    s_mov_b64 vcc, vcc
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_8
+; CHECK-NEXT:  .LBB0_9: ; %Flow13
+; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:  .LBB0_10: ; %UnifiedReturnBlock
+; CHECK-NEXT:    s_or_b64 exec, exec, s[28:29]
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_readlane_b32 s67, v1, 33
+; CHECK-NEXT:    v_readlane_b32 s66, v1, 32
+; CHECK-NEXT:    v_readlane_b32 s65, v1, 31
+; CHECK-NEXT:    v_readlane_b32 s64, v1, 30
+; CHECK-NEXT:    v_readlane_b32 s63, v1, 29
+; CHECK-NEXT:    v_readlane_b32 s62, v1, 28
+; CHECK-NEXT:    v_readlane_b32 s61, v1, 27
+; CHECK-NEXT:    v_readlane_b32 s60, v1, 26
+; CHECK-NEXT:    v_readlane_b32 s59, v1, 25
+; CHECK-NEXT:    v_readlane_b32 s58, v1, 24
+; CHECK-NEXT:    v_readlane_b32 s57, v1, 23
+; CHECK-NEXT:    v_readlane_b32 s56, v1, 22
+; CHECK-NEXT:    v_readlane_b32 s55, v1, 21
+; CHECK-NEXT:    v_readlane_b32 s54, v1, 20
+; CHECK-NEXT:    v_readlane_b32 s53, v1, 19
+; CHECK-NEXT:    v_readlane_b32 s52, v1, 18
+; CHECK-NEXT:    v_readlane_b32 s51, v1, 17
+; CHECK-NEXT:    v_readlane_b32 s50, v1, 16
+; CHECK-NEXT:    v_readlane_b32 s49, v1, 15
+; CHECK-NEXT:    v_readlane_b32 s48, v1, 14
+; CHECK-NEXT:    v_readlane_b32 s47, v1, 13
+; CHECK-NEXT:    v_readlane_b32 s46, v1, 12
+; CHECK-NEXT:    v_readlane_b32 s45, v1, 11
+; CHECK-NEXT:    v_readlane_b32 s44, v1, 10
+; CHECK-NEXT:    v_readlane_b32 s43, v1, 9
+; CHECK-NEXT:    v_readlane_b32 s42, v1, 8
+; CHECK-NEXT:    v_readlane_b32 s41, v1, 7
+; CHECK-NEXT:    v_readlane_b32 s40, v1, 6
+; CHECK-NEXT:    v_readlane_b32 s39, v1, 5
+; CHECK-NEXT:    v_readlane_b32 s38, v1, 4
+; CHECK-NEXT:    v_readlane_b32 s37, v1, 3
+; CHECK-NEXT:    v_readlane_b32 s36, v1, 2
+; CHECK-NEXT:    v_readlane_b32 s31, v1, 1
+; CHECK-NEXT:    v_readlane_b32 s30, v1, 0
+; CHECK-NEXT:    ; kill: killed $vgpr4
+; CHECK-NEXT:    s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %i = call i64 @llvm.amdgcn.s.getpc()
+  %i1 = trunc i64 %i to i32
+  %i2 = insertelement <2 x i32> zeroinitializer, i32 %i1, i64 1
+  %i3 = bitcast <2 x i32> %i2 to i64
+  %i4 = inttoptr i64 %i3 to ptr addrspace(4)
+  %i5 = getelementptr i8, ptr addrspace(4) %i4, i64 48
+  %i6 = load <4 x i32>, ptr addrspace(4) %i5, align 16
+  %i7 = getelementptr i8, ptr addrspace(4) %i4, i64 64
+  %i8 = load <4 x i32>, ptr addrspace(4) %i7, align 16
+  %i9 = getelementptr i8, ptr addrspace(4) %i4, i64 240
+  %i10 = load <8 x i32>, ptr addrspace(4) %i9, align 32
+  %i11 = getelementptr i8, ptr addrspace(4) %i4, i64 272
+  %i12 = load <8 x i32>, ptr addrspace(4) %i11, align 32
+  %i13 = getelementptr i8, ptr addrspace(4) %i4, i64 304
+  %i14 = load <8 x i32>, ptr addrspace(4) %i13, align 32
+  %i15 = getelementptr i8, ptr addrspace(4) %i4, i64 336
+  %i16 = load <8 x i32>, ptr addrspace(4) %i15, align 32
+  %i17 = getelementptr i8, ptr addrspace(4) %i4, i64 496
+  %i18 = load <8 x i32>, ptr addrspace(4) %i17, align 32
+  %i19 = getelementptr i8, ptr addrspace(4) %i4, i64 528
+  %i20 = load <8 x i32>, ptr addrspace(4) %i19, align 32
+  %i21 = getelementptr i8, ptr addrspace(4) %i4, i64 752
+  %i22 = load <8 x i32>, ptr addrspace(4) %i21, align 32
+  %i23 = getelementptr i8, ptr addrspace(4) %i4, i64 784
+  %i24 = load <8 x i32>, ptr addrspace(4) %i23, align 32
+  %i25 = load <4 x float>, ptr addrspace(4) null, align 16
+  %i26 = extractelement <4 x float> %i25, i64 0
+  %i27 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float %i26, float 0.000000e+00, <8 x i32> %i12, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i28 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i14, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i29 = extractelement <4 x float> %i28, i64 0
+  %i30 = fmul float %i29, %i27
+  %i31 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i16, <4 x i32> %i6, i1 false, i32 0, i32 0)
+  br i1 %arg, label %bb32, label %bb48
+
+bb32:                                             ; preds = %bb
+  br i1 %arg, label %bb33, label %bb43
+
+bb33:                                             ; preds = %bb33, %bb32
+  %i34 = phi float [ %i42, %bb33 ], [ 0.000000e+00, %bb32 ]
+  %i35 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i18, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i36 = extractelement <2 x float> %i35, i64 0
+  %i37 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i22, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i38 = extractelement <2 x float> %i37, i64 0
+  %i39 = fsub float %i38, %i36
+  %i40 = fmul float %i39, %i30
+  %i41 = fadd float %i34, %i40
+  %i42 = fsub float %i34, %i41
+  br label %bb33
+
+bb43:                                             ; preds = %bb32
+  %i44 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i10, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i45 = bitcast float %i44 to i32
+  %i46 = insertelement <3 x i32> zeroinitializer, i32 %i45, i64 0
+  call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %i46, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
+  %i47 = bitcast <4 x float> %i31 to <4 x i32>
+  call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %i47, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
+  ret void
+
+bb48:                                             ; preds = %bb
+  %i49 = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, <8 x i32> %i12, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  br label %bb50
+
+bb50:                                             ; preds = %bb50, %bb48
+  %i51 = phi float [ 0.000000e+00, %bb48 ], [ %i58, %bb50 ]
+  %i52 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float %i51, float 0.000000e+00, <8 x i32> %i20, <4 x i32> %i8, i1 false, i32 0, i32 0)
+  %i53 = extractelement <2 x float> %i52, i64 0
+  %i54 = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 1, float %i51, float 0.000000e+00, <8 x i32> %i24, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0)
+  %i55 = extractelement <2 x float> %i54, i64 0
+  %i56 = fsub float %i55, %i53
+  %i57 = fmul float %i56, %i30
+  %i58 = fmul float %i57, %i49
+  br label %bb50
+}
+
+declare i64 @llvm.amdgcn.s.getpc() #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
+declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
+declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
+declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
+declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
+
+attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) }
+attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) }

diff  --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
new file mode 100644
index 000000000000000..1030cdb1b43fc13
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,0 -simplify-mir -o - %s | FileCheck %s
+--- |
+  define void @main() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+
+...
+---
+name:            main
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  sgprForEXECCopy: '$sgpr58_sgpr59'
+body:             |
+  ; CHECK-LABEL: name: main
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead undef [[DEF:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_1:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   SI_RETURN
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr12 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead undef [[IMAGE_SAMPLE_LZ_V1_V2_2:%[0-9]+]].sub0:vreg_96 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], undef renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_4:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+  ; CHECK-NEXT:   S_BRANCH %bb.6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_XOR_B64_term $exec, undef renamable $sgpr4_sgpr5, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.7, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:sreg_64 = IMPLICIT_DEF
+    %1:sgpr_512 = IMPLICIT_DEF
+    undef %2.sub0:vreg_64 = IMPLICIT_DEF
+    %3:sgpr_256 = IMPLICIT_DEF
+    undef %4.sub1:sgpr_64 = IMPLICIT_DEF
+    %5:vgpr_32 = IMPLICIT_DEF
+    undef %6.sub0:vreg_64 = IMPLICIT_DEF
+    undef %7.sub1:sreg_64 = IMPLICIT_DEF
+    undef %8.sub1:vreg_64 = IMPLICIT_DEF
+    undef %9.sub1:sreg_64 = IMPLICIT_DEF
+    %10:sgpr_512 = S_LOAD_DWORDX16_IMM undef %0, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
+    undef %11.sub0:sgpr_128 = IMPLICIT_DEF
+    %12:sgpr_512 = S_LOAD_DWORDX16_IMM undef %7, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
+    $exec = S_MOV_B64_term undef %9
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.1:
+    $exec = S_MOV_B64_term undef %9
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+  bb.2:
+    %13:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef %8, %10.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, undef %11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    %14:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef %8, %12.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, %11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    SI_RETURN
+
+  bb.3:
+    undef %15.sub0:sgpr_128 = IMPLICIT_DEF
+    undef %16.sub0:vreg_96 = IMAGE_SAMPLE_LZ_V1_V2 undef %8, %1.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, %15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    undef %17.sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef %8, undef %10.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, %3.sub0_sub1_sub2_sub3, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    S_BRANCH %bb.2
+
+  bb.4:
+    %18:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef %2, %1.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, undef %11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    S_BRANCH %bb.6
+
+  bb.5:
+    $exec = S_XOR_B64_term $exec, undef %9, implicit-def $scc
+    S_CBRANCH_EXECZ %bb.7, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.6:
+    %19:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef %2, %10.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, undef %3.sub4_sub5_sub6_sub7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+    undef %11.sub1:sgpr_128 = COPY undef %11.sub0
+    S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
+    S_BRANCH %bb.5
+
+  bb.7:
+    SI_RETURN
+
+...


        


More information about the llvm-commits mailing list