[llvm] [AMDGPU] Fix end() iterator dereference in SILowerSGPRSpills (PR #88828)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 01:37:38 PDT 2024


================
@@ -0,0 +1,473 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s
+
+; Test that the si-lower-sgpr-spills pass does not dereference a
+; machine instruction iterator that is equal to end().
+
+define amdgpu_kernel void @test(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %6, ptr addrspace(1) %7, ptr addrspace(1) %8, ptr addrspace(1) %9, ptr addrspace(1) %10, ptr addrspace(1) %11, ptr addrspace(1) %12, ptr addrspace(1) %13, ptr addrspace(1) %14, ptr addrspace(1) %15, ptr addrspace(1) %16, ptr addrspace(1) %17, ptr addrspace(1) %18, ptr addrspace(1) %19, ptr addrspace(1) %20, ptr addrspace(1) %21, ptr addrspace(1) %22, ptr addrspace(1) %23, ptr addrspace(1) %24, ptr addrspace(1) %25, ptr addrspace(1) %26, ptr addrspace(1) %27, ptr addrspace(1) %28, ptr addrspace(1) %29, ptr addrspace(1) %30, ptr addrspace(1) %31, ptr addrspace(1) %32, ptr addrspace(1) %33, ptr addrspace(1) %34, ptr addrspace(1) %35, ptr addrspace(1) %36, ptr addrspace(1) %37, ptr addrspace(1) %38, ptr addrspace(1) %39, ptr addrspace(1) %40, ptr addrspace(1) %41, ptr addrspace(1) %42, ptr addrspace(1) %43, ptr addrspace(1) %44, ptr addrspace(1) %45, ptr addrspace(1) %46, i32 %47) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dwordx8 s[36:43], s[0:1], 0x0
+; CHECK-NEXT:    s_load_dwordx8 s[52:59], s[0:1], 0x20
+; CHECK-NEXT:    s_load_dwordx8 s[84:91], s[0:1], 0x60
+; CHECK-NEXT:    s_load_dwordx8 s[68:75], s[0:1], 0x40
+; CHECK-NEXT:    s_load_dwordx8 s[92:99], s[0:1], 0xa0
+; CHECK-NEXT:    s_load_dword s28, s[0:1], 0x178
+; CHECK-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x80
+; CHECK-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; CHECK-NEXT:    s_load_dwordx8 s[44:51], s[0:1], 0xe0
+; CHECK-NEXT:    s_load_dwordx8 s[20:27], s[0:1], 0xc0
+; CHECK-NEXT:    s_load_dwordx8 s[76:83], s[0:1], 0x120
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 16
+; CHECK-NEXT:    v_writelane_b32 v0, s4, 0
+; CHECK-NEXT:    s_nop 1
+; CHECK-NEXT:    v_writelane_b32 v0, s5, 1
+; CHECK-NEXT:    v_writelane_b32 v0, s6, 2
+; CHECK-NEXT:    v_writelane_b32 v0, s7, 3
+; CHECK-NEXT:    v_writelane_b32 v0, s8, 4
+; CHECK-NEXT:    v_writelane_b32 v0, s9, 5
+; CHECK-NEXT:    v_writelane_b32 v0, s10, 6
+; CHECK-NEXT:    v_writelane_b32 v0, s11, 7
+; CHECK-NEXT:    s_load_dwordx8 s[60:67], s[0:1], 0x100
+; CHECK-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x160
+; CHECK-NEXT:    s_load_dwordx8 s[12:19], s[0:1], 0x140
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_7
+; CHECK-NEXT:  ; %bb.1: ; %NodeBlock172
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x4d
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_8
+; CHECK-NEXT:  ; %bb.2: ; %NodeBlock170
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x50
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_9
+; CHECK-NEXT:  ; %bb.3: ; %NodeBlock168
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x51
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_10
+; CHECK-NEXT:  ; %bb.4: ; %NodeBlock166
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x52
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_11
+; CHECK-NEXT:  ; %bb.5: ; %LeafBlock164
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[10:11], 0
+; CHECK-NEXT:    s_cmpk_eq_i32 s28, 0x52
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[22:23]
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_12
+; CHECK-NEXT:  ; %bb.6:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[20:21]
+; CHECK-NEXT:    s_branch .LBB0_12
+; CHECK-NEXT:  .LBB0_7:
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_46
+; CHECK-NEXT:  .LBB0_8:
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_27
+; CHECK-NEXT:  .LBB0_9:
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_17
+; CHECK-NEXT:  .LBB0_10:
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_15
+; CHECK-NEXT:  .LBB0_11:
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[10:11], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_12: ; %Flow176
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[10:11]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_14
+; CHECK-NEXT:  ; %bb.13:
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; CHECK-NEXT:  .LBB0_14: ; %Flow177
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_16
+; CHECK-NEXT:  .LBB0_15:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[96:97]
+; CHECK-NEXT:  .LBB0_16: ; %Flow179
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_26
+; CHECK-NEXT:  .LBB0_17: ; %NodeBlock162
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x4e
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_20
+; CHECK-NEXT:  ; %bb.18: ; %NodeBlock160
+; CHECK-NEXT:    s_cmpk_gt_i32 s28, 0x4e
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_21
+; CHECK-NEXT:  ; %bb.19:
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[18:19]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[94:95]
+; CHECK-NEXT:    s_branch .LBB0_22
+; CHECK-NEXT:  .LBB0_20:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_25
+; CHECK-NEXT:  .LBB0_21:
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_22: ; %Flow180
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_24
+; CHECK-NEXT:  ; %bb.23:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[16:17]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[92:93]
+; CHECK-NEXT:  .LBB0_24: ; %Flow181
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_26
+; CHECK-NEXT:  .LBB0_25:
+; CHECK-NEXT:    v_readlane_b32 s4, v0, 0
+; CHECK-NEXT:    v_readlane_b32 s10, v0, 6
+; CHECK-NEXT:    v_readlane_b32 s11, v0, 7
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[14:15]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; CHECK-NEXT:    v_readlane_b32 s5, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s6, v0, 2
+; CHECK-NEXT:    v_readlane_b32 s7, v0, 3
+; CHECK-NEXT:    v_readlane_b32 s8, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s9, v0, 5
+; CHECK-NEXT:  .LBB0_26: ; %Flow185
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_45
+; CHECK-NEXT:  .LBB0_27: ; %NodeBlock158
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 18
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_32
+; CHECK-NEXT:  ; %bb.28: ; %NodeBlock156
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 19
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_33
+; CHECK-NEXT:  ; %bb.29: ; %NodeBlock154
+; CHECK-NEXT:    s_cmpk_lt_i32 s28, 0x4b
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_34
+; CHECK-NEXT:  ; %bb.30: ; %LeafBlock152
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_cmpk_eq_i32 s28, 0x4b
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[22:23]
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_35
+; CHECK-NEXT:  ; %bb.31:
+; CHECK-NEXT:    v_readlane_b32 s92, v0, 0
+; CHECK-NEXT:    v_readlane_b32 s94, v0, 2
+; CHECK-NEXT:    v_readlane_b32 s95, v0, 3
+; CHECK-NEXT:    s_mov_b64 s[0:1], 0
+; CHECK-NEXT:    v_readlane_b32 s93, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s96, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s97, v0, 5
+; CHECK-NEXT:    v_readlane_b32 s98, v0, 6
+; CHECK-NEXT:    v_readlane_b32 s99, v0, 7
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[94:95]
+; CHECK-NEXT:    s_branch .LBB0_35
+; CHECK-NEXT:  .LBB0_32:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_41
+; CHECK-NEXT:  .LBB0_33:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_39
+; CHECK-NEXT:  .LBB0_34:
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_35: ; %Flow188
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_38
+; CHECK-NEXT:  ; %bb.36: ; %LeafBlock150
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_cmp_eq_u32 s28, 19
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[22:23]
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_38
+; CHECK-NEXT:  ; %bb.37:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[62:63]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[68:69]
+; CHECK-NEXT:  .LBB0_38: ; %Flow189
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_40
+; CHECK-NEXT:  .LBB0_39:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[52:53]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[88:89]
+; CHECK-NEXT:  .LBB0_40: ; %Flow191
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_45
+; CHECK-NEXT:  .LBB0_41: ; %NodeBlock148
+; CHECK-NEXT:    s_cmp_gt_i32 s28, 16
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_43
+; CHECK-NEXT:  ; %bb.42:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[80:81]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[86:87]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_44
+; CHECK-NEXT:    s_branch .LBB0_45
+; CHECK-NEXT:  .LBB0_43:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_44:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[78:79]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[84:85]
+; CHECK-NEXT:  .LBB0_45: ; %Flow197
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_89
+; CHECK-NEXT:  .LBB0_46: ; %NodeBlock146
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 5
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_51
+; CHECK-NEXT:  ; %bb.47: ; %NodeBlock144
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 13
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_52
+; CHECK-NEXT:  ; %bb.48: ; %NodeBlock142
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 14
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_53
+; CHECK-NEXT:  ; %bb.49: ; %NodeBlock140
+; CHECK-NEXT:    s_cmp_gt_i32 s28, 14
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_54
+; CHECK-NEXT:  ; %bb.50:
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[76:77]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[74:75]
+; CHECK-NEXT:    s_branch .LBB0_55
+; CHECK-NEXT:  .LBB0_51:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_71
+; CHECK-NEXT:  .LBB0_52:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_60
+; CHECK-NEXT:  .LBB0_53:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_58
+; CHECK-NEXT:  .LBB0_54:
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_55: ; %Flow198
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_57
+; CHECK-NEXT:  ; %bb.56:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[66:67]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[72:73]
+; CHECK-NEXT:  .LBB0_57: ; %Flow199
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_59
+; CHECK-NEXT:  .LBB0_58:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[64:65]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[70:71]
+; CHECK-NEXT:  .LBB0_59: ; %Flow201
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_70
+; CHECK-NEXT:  .LBB0_60: ; %NodeBlock138
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 6
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_64
+; CHECK-NEXT:  ; %bb.61: ; %NodeBlock136
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 7
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_65
+; CHECK-NEXT:  ; %bb.62: ; %LeafBlock134
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_cmp_eq_u32 s28, 7
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[22:23]
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_66
+; CHECK-NEXT:  ; %bb.63:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[60:61]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[36:37]
+; CHECK-NEXT:    s_branch .LBB0_66
+; CHECK-NEXT:  .LBB0_64:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_69
+; CHECK-NEXT:  .LBB0_65:
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_66: ; %Flow203
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_68
+; CHECK-NEXT:  ; %bb.67:
+; CHECK-NEXT:    v_readlane_b32 s0, v0, 0
+; CHECK-NEXT:    v_readlane_b32 s2, v0, 2
+; CHECK-NEXT:    v_readlane_b32 s3, v0, 3
+; CHECK-NEXT:    v_readlane_b32 s1, v0, 1
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[56:57]
+; CHECK-NEXT:    v_readlane_b32 s4, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s5, v0, 5
+; CHECK-NEXT:    v_readlane_b32 s6, v0, 6
+; CHECK-NEXT:    v_readlane_b32 s7, v0, 7
+; CHECK-NEXT:  .LBB0_68: ; %Flow204
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_70
+; CHECK-NEXT:  .LBB0_69:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[48:49]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; CHECK-NEXT:  .LBB0_70: ; %Flow208
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_89
+; CHECK-NEXT:  .LBB0_71: ; %NodeBlock132
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 2
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_75
+; CHECK-NEXT:  ; %bb.72: ; %NodeBlock130
+; CHECK-NEXT:    s_cmp_lt_i32 s28, 3
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_76
+; CHECK-NEXT:  ; %bb.73: ; %NodeBlock128
+; CHECK-NEXT:    s_cmp_gt_i32 s28, 3
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_77
+; CHECK-NEXT:  ; %bb.74:
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[46:47]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[40:41]
+; CHECK-NEXT:    s_branch .LBB0_78
+; CHECK-NEXT:  .LBB0_75:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_83
+; CHECK-NEXT:  .LBB0_76:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    s_branch .LBB0_81
+; CHECK-NEXT:  .LBB0_77:
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_78: ; %Flow209
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_80
+; CHECK-NEXT:  ; %bb.79:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[44:45]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[42:43]
+; CHECK-NEXT:  .LBB0_80: ; %Flow210
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_82
+; CHECK-NEXT:  .LBB0_81:
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[26:27]
+; CHECK-NEXT:  .LBB0_82: ; %Flow212
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_89
+; CHECK-NEXT:  .LBB0_83: ; %NodeBlock
+; CHECK-NEXT:    s_cmp_gt_i32 s28, 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_85
+; CHECK-NEXT:  ; %bb.84:
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[24:25]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_86
+; CHECK-NEXT:    s_branch .LBB0_89
+; CHECK-NEXT:  .LBB0_85:
+; CHECK-NEXT:    ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:  .LBB0_86: ; %LeafBlock
+; CHECK-NEXT:    s_cmp_eq_u32 s28, 0
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB0_88
+; CHECK-NEXT:  ; %bb.87:
+; CHECK-NEXT:    v_readlane_b32 s0, v0, 0
+; CHECK-NEXT:    v_readlane_b32 s2, v0, 2
+; CHECK-NEXT:    v_readlane_b32 s3, v0, 3
+; CHECK-NEXT:    v_readlane_b32 s4, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s5, v0, 5
+; CHECK-NEXT:    s_mov_b64 s[22:23], s[12:13]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[4:5]
+; CHECK-NEXT:    v_readlane_b32 s1, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s6, v0, 6
+; CHECK-NEXT:    v_readlane_b32 s7, v0, 7
+; CHECK-NEXT:  .LBB0_88: ; %Flow213
+; CHECK-NEXT:    s_mov_b64 s[0:1], s[22:23]
+; CHECK-NEXT:  .LBB0_89: ; %.sink.split
+; CHECK-NEXT:    s_load_dword s2, s[2:3], 0x0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    ; kill: killed $vgpr0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v2, s2
+; CHECK-NEXT:    global_store_dword v1, v2, s[0:1]
+; CHECK-NEXT:    s_endpgm
+  switch i32 %47, label %.sink.split [
+    i32 75, label %63
+    i32 1, label %49
+    i32 2, label %50
+    i32 3, label %51
+    i32 4, label %52
+    i32 5, label %53
+    i32 6, label %54
+    i32 7, label %55
+    i32 82, label %70
+    i32 81, label %69
+    i32 80, label %68
+    i32 79, label %67
+    i32 78, label %66
+    i32 13, label %56
+    i32 14, label %57
+    i32 15, label %58
+    i32 16, label %59
+    i32 17, label %60
+    i32 18, label %61
+    i32 19, label %62
+    i32 77, label %65
+    i32 0, label %64
+  ]
+
+49:                                               ; preds = %48
+  br label %.sink.split
+
+50:                                               ; preds = %48
+  br label %.sink.split
+
+51:                                               ; preds = %48
+  br label %.sink.split
+
+52:                                               ; preds = %48
+  br label %.sink.split
+
+53:                                               ; preds = %48
+  br label %.sink.split
+
+54:                                               ; preds = %48
+  br label %.sink.split
+
+55:                                               ; preds = %48
+  br label %.sink.split
+
+56:                                               ; preds = %48
+  br label %.sink.split
+
+57:                                               ; preds = %48
+  br label %.sink.split
+
+58:                                               ; preds = %48
+  br label %.sink.split
+
+59:                                               ; preds = %48
+  br label %.sink.split
+
+60:                                               ; preds = %48
+  br label %.sink.split
+
+61:                                               ; preds = %48
+  br label %.sink.split
+
+62:                                               ; preds = %48
+  br label %.sink.split
+
+63:                                               ; preds = %48
+  br label %.sink.split
+
+64:                                               ; preds = %48
+  br label %.sink.split
+
+65:                                               ; preds = %48
+  br label %.sink.split
+
+66:                                               ; preds = %48
+  br label %.sink.split
+
+67:                                               ; preds = %48
+  br label %.sink.split
+
+68:                                               ; preds = %48
+  br label %.sink.split
+
+69:                                               ; preds = %48
+  br label %.sink.split
+
+70:                                               ; preds = %48
----------------
arsenm wrote:

Use named values 

https://github.com/llvm/llvm-project/pull/88828


More information about the llvm-commits mailing list