[llvm] [ASAN][AMDGPU] NFC. Add CodeGen tests. (PR #73857)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 13:30:46 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Valery Pykhtin (vpykhtin)

<details>
<summary>Changes</summary>

To show upcoming patch changes.

---

Patch is 54.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73857.diff


2 Files Affected:

- (added) llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll (+504) 
- (added) llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_trivial.ll (+669) 


``````````diff
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll
new file mode 100644
index 000000000000000..12f99d8e19ddc5c
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll
@@ -0,0 +1,504 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: opt  -passes=asan -S < %s | FileCheck %s --check-prefix=OPT
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx90a -o - | FileCheck %s --check-prefix=LLC-W64
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -o - | FileCheck %s --check-prefix=LLC-W32
+
+; This test contains checks for opt and llc, to update use:
+;   utils/update_test_checks.py --force-update
+;   utils/update_llc_test_checks.py --force-update
+;
+; --force-update allows to override "Assertions have been autogenerated by" guard
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+define protected amdgpu_kernel void @uniform_loop_global(i32 %num, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @uniform_loop_global(
+; OPT-SAME: i32 [[NUM:%.*]], ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(1) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT:    br label [[WHILE_COND:%.*]]
+; OPT:       while.cond:
+; OPT-NEXT:    [[C:%.*]] = phi i32 [ [[NUM]], [[ENTRY:%.*]] ], [ [[NEXT_C:%.*]], [[TMP25:%.*]] ]
+; OPT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], 0
+; OPT-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY:%.*]]
+; OPT:       while.body:
+; OPT-NEXT:    [[OFFS32:%.*]] = add i32 [[TID]], [[C]]
+; OPT-NEXT:    [[OFFS:%.*]] = zext i32 [[OFFS32]] to i64
+; OPT-NEXT:    [[PP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR1]], i64 [[OFFS]]
+; OPT-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[PP1]] to i64
+; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; OPT-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 2147450880
+; OPT-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; OPT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0:![0-9]+]]
+; OPT:       6:
+; OPT-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
+; OPT-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
+; OPT-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
+; OPT-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
+; OPT:       11:
+; OPT-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
+; OPT-NEXT:    unreachable
+; OPT:       12:
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PP1]], align 4
+; OPT-NEXT:    [[SUM:%.*]] = add i32 [[VAL]], 42
+; OPT-NEXT:    [[PP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR2]], i64 [[OFFS]]
+; OPT-NEXT:    [[TMP13:%.*]] = ptrtoint ptr addrspace(1) [[PP2]] to i64
+; OPT-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP13]], 3
+; OPT-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], 2147450880
+; OPT-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; OPT-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; OPT-NEXT:    [[TMP18:%.*]] = icmp ne i8 [[TMP17]], 0
+; OPT-NEXT:    br i1 [[TMP18]], label [[TMP19:%.*]], label [[TMP25]], !prof [[PROF0]]
+; OPT:       19:
+; OPT-NEXT:    [[TMP20:%.*]] = and i64 [[TMP13]], 7
+; OPT-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 3
+; OPT-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8
+; OPT-NEXT:    [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP17]]
+; OPT-NEXT:    br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP25]]
+; OPT:       24:
+; OPT-NEXT:    call void @__asan_report_store4(i64 [[TMP13]]) #[[ATTR3]]
+; OPT-NEXT:    unreachable
+; OPT:       25:
+; OPT-NEXT:    store i32 [[SUM]], ptr addrspace(1) [[PP2]], align 4
+; OPT-NEXT:    [[NEXT_C]] = sub i32 [[C]], 1
+; OPT-NEXT:    br label [[WHILE_COND]]
+; OPT:       exit:
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: uniform_loop_global:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_load_dword s17, s[8:9], 0x0
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_cmp_eq_u32 s17, 0
+; LLC-W64-NEXT:    s_cbranch_scc1 .LBB0_15
+; LLC-W64-NEXT:  ; %bb.1: ; %while.body.preheader
+; LLC-W64-NEXT:    s_load_dwordx4 s[20:23], s[8:9], 0x8
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], 0
+; LLC-W64-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
+; LLC-W64-NEXT:    v_mov_b32_e32 v3, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    v_mov_b32_e32 v7, s21
+; LLC-W64-NEXT:    ; implicit-def: $sgpr24_sgpr25
+; LLC-W64-NEXT:    ; implicit-def: $sgpr12_sgpr13
+; LLC-W64-NEXT:    ; implicit-def: $sgpr26_sgpr27
+; LLC-W64-NEXT:    ; implicit-def: $sgpr28_sgpr29
+; LLC-W64-NEXT:    s_branch .LBB0_4
+; LLC-W64-NEXT:  .LBB0_2: ; %Flow16
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[42:43]
+; LLC-W64-NEXT:    s_andn2_b64 s[28:29], s[28:29], exec
+; LLC-W64-NEXT:    s_and_b64 s[36:37], s[36:37], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[26:27], s[26:27], exec
+; LLC-W64-NEXT:    s_and_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 s[28:29], s[28:29], s[36:37]
+; LLC-W64-NEXT:    s_or_b64 s[26:27], s[26:27], s[34:35]
+; LLC-W64-NEXT:    s_orn2_b64 s[36:37], s[40:41], exec
+; LLC-W64-NEXT:  .LBB0_3: ; %Flow14
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[30:31]
+; LLC-W64-NEXT:    s_and_b64 s[30:31], exec, s[36:37]
+; LLC-W64-NEXT:    s_or_b64 s[18:19], s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[28:29], exec
+; LLC-W64-NEXT:    s_or_b64 s[12:13], s[12:13], s[30:31]
+; LLC-W64-NEXT:    s_andn2_b64 s[24:25], s[24:25], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[26:27], exec
+; LLC-W64-NEXT:    s_or_b64 s[24:25], s[24:25], s[30:31]
+; LLC-W64-NEXT:    s_andn2_b64 exec, exec, s[18:19]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_11
+; LLC-W64-NEXT:  .LBB0_4: ; %while.body
+; LLC-W64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LLC-W64-NEXT:    v_add_u32_e32 v2, s17, v6
+; LLC-W64-NEXT:    v_lshlrev_b64 v[4:5], 3, v[2:3]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v40, vcc, s20, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v41, vcc, v7, v5, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[0:1], 3, v[40:41]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fff8000, v0
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v0, v[0:1]
+; LLC-W64-NEXT:    ; implicit-def: $sgpr38_sgpr39
+; LLC-W64-NEXT:    ; implicit-def: $sgpr30_sgpr31
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[34:35], 0, v0
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[36:37], vcc
+; LLC-W64-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_and_b32_e32 v1, 7, v40
+; LLC-W64-NEXT:    v_add_u16_e32 v1, 3, v1
+; LLC-W64-NEXT:    v_cmp_lt_i16_e32 vcc, v1, v0
+; LLC-W64-NEXT:    s_andn2_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_and_b64 s[40:41], vcc, exec
+; LLC-W64-NEXT:    s_mov_b64 s[30:31], -1
+; LLC-W64-NEXT:    s_mov_b64 s[38:39], 0
+; LLC-W64-NEXT:    s_or_b64 s[34:35], s[34:35], s[40:41]
+; LLC-W64-NEXT:  ; %bb.6: ; %Flow13
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_andn2_b64 s[28:29], s[28:29], exec
+; LLC-W64-NEXT:    s_and_b64 s[38:39], s[38:39], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[26:27], s[26:27], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[30:31], exec
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], -1
+; LLC-W64-NEXT:    s_or_b64 s[28:29], s[28:29], s[38:39]
+; LLC-W64-NEXT:    s_or_b64 s[26:27], s[26:27], s[30:31]
+; LLC-W64-NEXT:    ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[30:31], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_3
+; LLC-W64-NEXT:  ; %bb.7: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s23
+; LLC-W64-NEXT:    v_add_co_u32_e32 v0, vcc, s22, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v8, vcc, 0x7fff8000, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v9, vcc, 0, v5, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v4, v[8:9]
+; LLC-W64-NEXT:    global_load_dword v2, v[40:41], off
+; LLC-W64-NEXT:    ; implicit-def: $sgpr36_sgpr37
+; LLC-W64-NEXT:    ; implicit-def: $sgpr34_sgpr35
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[38:39], 0, v4
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v4
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[40:41], vcc
+; LLC-W64-NEXT:  ; %bb.8: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_and_b32_e32 v5, 7, v0
+; LLC-W64-NEXT:    v_add_u16_e32 v5, 3, v5
+; LLC-W64-NEXT:    v_cmp_lt_i16_e32 vcc, v5, v4
+; LLC-W64-NEXT:    s_andn2_b64 s[38:39], s[38:39], exec
+; LLC-W64-NEXT:    s_and_b64 s[42:43], vcc, exec
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], 0
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], -1
+; LLC-W64-NEXT:    s_or_b64 s[38:39], s[38:39], s[42:43]
+; LLC-W64-NEXT:  ; %bb.9: ; %Flow15
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[40:41]
+; LLC-W64-NEXT:    s_mov_b64 s[40:41], -1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[42:43], s[38:39]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W64-NEXT:  ; %bb.10: ; %while.cond
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_add_i32 s17, s17, -1
+; LLC-W64-NEXT:    s_cmp_eq_u32 s17, 0
+; LLC-W64-NEXT:    s_cselect_b64 s[38:39], -1, 0
+; LLC-W64-NEXT:    v_add_u32_e32 v2, 42, v2
+; LLC-W64-NEXT:    s_andn2_b64 s[36:37], s[36:37], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_orn2_b64 s[40:41], s[38:39], exec
+; LLC-W64-NEXT:    global_store_dword v[0:1], v2, off
+; LLC-W64-NEXT:    s_branch .LBB0_2
+; LLC-W64-NEXT:  .LBB0_11: ; %loop.exit.guard
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[18:19], s[24:25], -1
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], 0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[20:21], s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[40:41], exec, s[20:21]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_16
+; LLC-W64-NEXT:  ; %bb.12: ; %Flow
+; LLC-W64-NEXT:    s_andn2_saveexec_b64 s[36:37], s[40:41]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_19
+; LLC-W64-NEXT:  .LBB0_13: ; %Flow11
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:  .LBB0_14: ; %UnifiedUnreachableBlock
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB0_15: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB0_16: ; %loop.exit.guard8
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], 0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[20:21], s[12:13]
+; LLC-W64-NEXT:    s_xor_b64 s[42:43], exec, s[20:21]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_18
+; LLC-W64-NEXT:  ; %bb.17:
+; LLC-W64-NEXT:    s_add_u32 s12, s8, 24
+; LLC-W64-NEXT:    s_addc_u32 s13, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[18:19]
+; LLC-W64-NEXT:    s_add_u32 s18, s18, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s19, s19, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[18:19], 0x0
+; LLC-W64-NEXT:    s_mov_b64 s[44:45], s[8:9]
+; LLC-W64-NEXT:    s_mov_b64 s[8:9], s[12:13]
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s33, s14
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], s[4:5]
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], s[6:7]
+; LLC-W64-NEXT:    s_mov_b64 s[38:39], s[10:11]
+; LLC-W64-NEXT:    s_mov_b32 s46, s16
+; LLC-W64-NEXT:    s_mov_b32 s47, s15
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_mov_b32 s14, s33
+; LLC-W64-NEXT:    s_mov_b32 s15, s47
+; LLC-W64-NEXT:    s_mov_b32 s16, s46
+; LLC-W64-NEXT:    s_mov_b64 s[8:9], s[44:45]
+; LLC-W64-NEXT:    s_mov_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_mov_b64 s[6:7], s[36:37]
+; LLC-W64-NEXT:    s_mov_b64 s[10:11], s[38:39]
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], exec
+; LLC-W64-NEXT:  .LBB0_18: ; %Flow10
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[42:43]
+; LLC-W64-NEXT:    s_and_b64 s[34:35], s[18:19], exec
+; LLC-W64-NEXT:    ; implicit-def: $vgpr31
+; LLC-W64-NEXT:    s_andn2_saveexec_b64 s[36:37], s[40:41]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_13
+; LLC-W64-NEXT:  .LBB0_19:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 24
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, v40
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, v41
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_or_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_14
+; LLC-W64-NEXT:    s_branch .LBB0_15
+;
+; LLC-W32-LABEL: uniform_loop_global:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_mov_b64 s[34:35], s[6:7]
+; LLC-W32-NEXT:    s_load_b32 s6, s[4:5], 0x0
+; LLC-W32-NEXT:    s_mov_b32 s7, 0
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_cmp_eq_u32 s6, 0
+; LLC-W32-NEXT:    s_cbranch_scc1 .LBB0_15
+; LLC-W32-NEXT:  ; %bb.1: ; %while.body.preheader
+; LLC-W32-NEXT:    s_load_b128 s[8:11], s[4:5], 0x8
+; LLC-W32-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v2, 0
+; LLC-W32-NEXT:    ; implicit-def: $sgpr16
+; LLC-W32-NEXT:    ; implicit-def: $sgpr12
+; LLC-W32-NEXT:    ; implicit-def: $sgpr17
+; LLC-W32-NEXT:    ; implicit-def: $sgpr18
+; LLC-W32-NEXT:    v_and_b32_e32 v5, 0x3ff, v31
+; LLC-W32-NEXT:    s_branch .LBB0_4
+; LLC-W32-NEXT:  .LBB0_2: ; %Flow16
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s24
+; LLC-W32-NEXT:    s_and_not1_b32 s18, s18, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s21, s21, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s17, s17, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s18, s18, s21
+; LLC-W32-NEXT:    s_or_b32 s17, s17, s20
+; LLC-W32-NEXT:    s_or_not1_b32 s21, s23, exec_lo
+; LLC-W32-NEXT:  .LBB0_3: ; %Flow14
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s19
+; LLC-W32-NEXT:    s_and_b32 s19, exec_lo, s21
+; LLC-W32-NEXT:    s_or_b32 s7, s19, s7
+; LLC-W32-NEXT:    s_and_not1_b32 s12, s12, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s19, s18, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s16, s16, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s20, s17, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s12, s12, s19
+; LLC-W32-NEXT:    s_or_b32 s16, s16, s20
+; LLC-W32-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s7
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_11
+; LLC-W32-NEXT:  .LBB0_4: ; %while.body
+; LLC-W32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LLC-W32-NEXT:    v_add_nc_u32_e32 v1, s6, v5
+; LLC-W32-NEXT:    s_mov_b32 s21, exec_lo
+; LLC-W32-NEXT:    ; implicit-def: $sgpr22
+; LLC-W32-NEXT:    ; implicit-def: $sgpr19
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W32-NEXT:    v_lshlrev_b64 v[3:4], 3, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    v_add_co_u32 v40, vcc_lo, s8, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v41, vcc_lo, s9, v4, vcc_lo
+; LLC-W32-NEXT:    v_lshrrev_b64 v[0:1], 3, v[40:41]
+; LLC-W32-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fff8000, v0
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v0, v[0:1]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s20, 0, v0
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v0
+; LLC-W32-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_and_b32_e32 v1, 7, v40
+; LLC-W32-NEXT:    s_and_not1_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s19, -1
+; LLC-W32-NEXT:    s_mov_b32 s22, 0
+; LLC-W32-NEXT:    v_add_nc_u16 v1, v1, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e32 vcc_lo, v1, v0
+; LLC-W32-NEXT:    s_and_b32 s23, vcc_lo, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s20, s20, s23
+; LLC-W32-NEXT:  ; %bb.6: ; %Flow13
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s21
+; LLC-W32-NEXT:    s_and_not1_b32 s18, s18, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s22, s22, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s17, s17, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s19, s19, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s21, -1
+; LLC-W32-NEXT:    s_or_b32 s18, s18, s22
+; LLC-W32-NEXT:    s_or_b32 s17, s17, s19
+; LLC-W32-NEXT:    ; kill: def $vgpr0_vgpr1 killed $sgpr0_sgpr1 killed $exec
+; LLC-W32-NEXT:    s_and_saveexec_b32 s19, s20
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_3
+; LLC-W32-NEXT:  ; %bb.7: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_add_co_u32 v0, vcc_lo, s10, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s11, v4, vcc_lo
+; LLC-W32-NEXT:    s_mov_b32 s23, exec_lo
+; LLC-W32-NEXT:    ; implicit-def: $sgpr21
+; LLC-W32-NEXT:    ; implicit-def: $sgpr20
+; LLC-W32-NEXT:    v_lshrrev_b64 v[3:4], 3, v[0:1]
+; LLC-W32-NEXT:    v_add_co_u32 v3, vcc_lo, 0x7fff8000, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v4, v[3:4]
+; LLC-W32-NEXT:    global_load_b32 v3, v[40:41], off
+; LLC-W32-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s22, 0, v4
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v4
+; LLC-W32-NEXT:  ; %bb.8: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_and_b32_e32 v6, 7, v0
+; LLC-W32-NEXT:    s_and_not1_b32 s22, s22, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s20, 0
+; LLC-W32-NEXT:    s_mov_b32 s21, -1
+; LLC-W32-NEXT:    v_add_nc_u16 v6, v6, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e32 vcc_lo, v6, v4
+; LLC-W32-NEXT:    s_and_b32 s24, vcc_lo, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s22, s22, s24
+; LLC-W32-NEXT:  ; %bb.9: ; %Flow15
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s23
+; LLC-W32-NEXT:    s_mov_b32 s23, -1
+; LLC-W32-NEXT:    s_and_saveexec_b32 s24, s22
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W32-NEXT:  ; %bb.10: ; %while.cond
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_add_i32 s6, s6, -1
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W32-NEXT:    v_add_nc_u32_e32 v3, 42, v3
+; LLC-W32-NEXT:    s_cmp_eq_u32 s6, 0
+; LLC-W32-NEXT:    s_cselect_b32 s22, -1, 0
+; LLC-W32-NEXT:    s_and_not1_b32 s21, s21, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_or_not1_b32 s23, s22, exec_lo
+; LLC-W32-NEXT:    global_store_b32 v[0:1], v3, off
+; LLC-W32-NEXT:    s_branch .LBB0_2
+; LLC-W32-NEXT:  .LBB0_11: ; %loop.exit.guard
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s7
+; LLC-W32-NEXT:    s_xor_b32 s6, s16, -1
+; LLC-W32-NEXT:    s_mov_b32 s33, 0
+; LLC-W32-NE...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/73857


More information about the llvm-commits mailing list