[llvm] [ASAN][AMDGPU] NFC. Add CodeGen tests. (PR #73857)

Valery Pykhtin via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 13:30:16 PST 2023


https://github.com/vpykhtin created https://github.com/llvm/llvm-project/pull/73857

To show upcoming patch changes.

>From 03426fc5e512227ce901ba114d46ccb1d7938837 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 29 Nov 2023 22:12:09 +0100
Subject: [PATCH] [ASAN][AMDGPU] NFC. Add CodeGen tests.

---
 .../AMDGPU/asan_codegen_loop.ll               | 504 +++++++++++++
 .../AMDGPU/asan_codegen_trivial.ll            | 669 ++++++++++++++++++
 2 files changed, 1173 insertions(+)
 create mode 100644 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll
 create mode 100644 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_trivial.ll

diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll
new file mode 100644
index 000000000000000..12f99d8e19ddc5c
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_loop.ll
@@ -0,0 +1,504 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: opt  -passes=asan -S < %s | FileCheck %s --check-prefix=OPT
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx90a -o - | FileCheck %s --check-prefix=LLC-W64
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -o - | FileCheck %s --check-prefix=LLC-W32
+
+; This test contains checks for opt and llc, to update use:
+;   utils/update_test_checks.py --force-update
+;   utils/update_llc_test_checks.py --force-update
+;
+; --force-update allows to override "Assertions have been autogenerated by" guard
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+define protected amdgpu_kernel void @uniform_loop_global(i32 %num, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @uniform_loop_global(
+; OPT-SAME: i32 [[NUM:%.*]], ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(1) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT:    br label [[WHILE_COND:%.*]]
+; OPT:       while.cond:
+; OPT-NEXT:    [[C:%.*]] = phi i32 [ [[NUM]], [[ENTRY:%.*]] ], [ [[NEXT_C:%.*]], [[TMP25:%.*]] ]
+; OPT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], 0
+; OPT-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[WHILE_BODY:%.*]]
+; OPT:       while.body:
+; OPT-NEXT:    [[OFFS32:%.*]] = add i32 [[TID]], [[C]]
+; OPT-NEXT:    [[OFFS:%.*]] = zext i32 [[OFFS32]] to i64
+; OPT-NEXT:    [[PP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR1]], i64 [[OFFS]]
+; OPT-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[PP1]] to i64
+; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; OPT-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 2147450880
+; OPT-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; OPT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0:![0-9]+]]
+; OPT:       6:
+; OPT-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
+; OPT-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
+; OPT-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
+; OPT-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
+; OPT:       11:
+; OPT-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
+; OPT-NEXT:    unreachable
+; OPT:       12:
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(1) [[PP1]], align 4
+; OPT-NEXT:    [[SUM:%.*]] = add i32 [[VAL]], 42
+; OPT-NEXT:    [[PP2:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR2]], i64 [[OFFS]]
+; OPT-NEXT:    [[TMP13:%.*]] = ptrtoint ptr addrspace(1) [[PP2]] to i64
+; OPT-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP13]], 3
+; OPT-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], 2147450880
+; OPT-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; OPT-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; OPT-NEXT:    [[TMP18:%.*]] = icmp ne i8 [[TMP17]], 0
+; OPT-NEXT:    br i1 [[TMP18]], label [[TMP19:%.*]], label [[TMP25]], !prof [[PROF0]]
+; OPT:       19:
+; OPT-NEXT:    [[TMP20:%.*]] = and i64 [[TMP13]], 7
+; OPT-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 3
+; OPT-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8
+; OPT-NEXT:    [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP17]]
+; OPT-NEXT:    br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP25]]
+; OPT:       24:
+; OPT-NEXT:    call void @__asan_report_store4(i64 [[TMP13]]) #[[ATTR3]]
+; OPT-NEXT:    unreachable
+; OPT:       25:
+; OPT-NEXT:    store i32 [[SUM]], ptr addrspace(1) [[PP2]], align 4
+; OPT-NEXT:    [[NEXT_C]] = sub i32 [[C]], 1
+; OPT-NEXT:    br label [[WHILE_COND]]
+; OPT:       exit:
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: uniform_loop_global:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_load_dword s17, s[8:9], 0x0
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_cmp_eq_u32 s17, 0
+; LLC-W64-NEXT:    s_cbranch_scc1 .LBB0_15
+; LLC-W64-NEXT:  ; %bb.1: ; %while.body.preheader
+; LLC-W64-NEXT:    s_load_dwordx4 s[20:23], s[8:9], 0x8
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], 0
+; LLC-W64-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
+; LLC-W64-NEXT:    v_mov_b32_e32 v3, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    v_mov_b32_e32 v7, s21
+; LLC-W64-NEXT:    ; implicit-def: $sgpr24_sgpr25
+; LLC-W64-NEXT:    ; implicit-def: $sgpr12_sgpr13
+; LLC-W64-NEXT:    ; implicit-def: $sgpr26_sgpr27
+; LLC-W64-NEXT:    ; implicit-def: $sgpr28_sgpr29
+; LLC-W64-NEXT:    s_branch .LBB0_4
+; LLC-W64-NEXT:  .LBB0_2: ; %Flow16
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[42:43]
+; LLC-W64-NEXT:    s_andn2_b64 s[28:29], s[28:29], exec
+; LLC-W64-NEXT:    s_and_b64 s[36:37], s[36:37], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[26:27], s[26:27], exec
+; LLC-W64-NEXT:    s_and_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 s[28:29], s[28:29], s[36:37]
+; LLC-W64-NEXT:    s_or_b64 s[26:27], s[26:27], s[34:35]
+; LLC-W64-NEXT:    s_orn2_b64 s[36:37], s[40:41], exec
+; LLC-W64-NEXT:  .LBB0_3: ; %Flow14
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[30:31]
+; LLC-W64-NEXT:    s_and_b64 s[30:31], exec, s[36:37]
+; LLC-W64-NEXT:    s_or_b64 s[18:19], s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[28:29], exec
+; LLC-W64-NEXT:    s_or_b64 s[12:13], s[12:13], s[30:31]
+; LLC-W64-NEXT:    s_andn2_b64 s[24:25], s[24:25], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[26:27], exec
+; LLC-W64-NEXT:    s_or_b64 s[24:25], s[24:25], s[30:31]
+; LLC-W64-NEXT:    s_andn2_b64 exec, exec, s[18:19]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_11
+; LLC-W64-NEXT:  .LBB0_4: ; %while.body
+; LLC-W64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LLC-W64-NEXT:    v_add_u32_e32 v2, s17, v6
+; LLC-W64-NEXT:    v_lshlrev_b64 v[4:5], 3, v[2:3]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v40, vcc, s20, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v41, vcc, v7, v5, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[0:1], 3, v[40:41]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7fff8000, v0
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v0, v[0:1]
+; LLC-W64-NEXT:    ; implicit-def: $sgpr38_sgpr39
+; LLC-W64-NEXT:    ; implicit-def: $sgpr30_sgpr31
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[34:35], 0, v0
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[36:37], vcc
+; LLC-W64-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_and_b32_e32 v1, 7, v40
+; LLC-W64-NEXT:    v_add_u16_e32 v1, 3, v1
+; LLC-W64-NEXT:    v_cmp_lt_i16_e32 vcc, v1, v0
+; LLC-W64-NEXT:    s_andn2_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_and_b64 s[40:41], vcc, exec
+; LLC-W64-NEXT:    s_mov_b64 s[30:31], -1
+; LLC-W64-NEXT:    s_mov_b64 s[38:39], 0
+; LLC-W64-NEXT:    s_or_b64 s[34:35], s[34:35], s[40:41]
+; LLC-W64-NEXT:  ; %bb.6: ; %Flow13
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_andn2_b64 s[28:29], s[28:29], exec
+; LLC-W64-NEXT:    s_and_b64 s[38:39], s[38:39], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[26:27], s[26:27], exec
+; LLC-W64-NEXT:    s_and_b64 s[30:31], s[30:31], exec
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], -1
+; LLC-W64-NEXT:    s_or_b64 s[28:29], s[28:29], s[38:39]
+; LLC-W64-NEXT:    s_or_b64 s[26:27], s[26:27], s[30:31]
+; LLC-W64-NEXT:    ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[30:31], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_3
+; LLC-W64-NEXT:  ; %bb.7: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s23
+; LLC-W64-NEXT:    v_add_co_u32_e32 v0, vcc, s22, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v8, vcc, 0x7fff8000, v4
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v9, vcc, 0, v5, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v4, v[8:9]
+; LLC-W64-NEXT:    global_load_dword v2, v[40:41], off
+; LLC-W64-NEXT:    ; implicit-def: $sgpr36_sgpr37
+; LLC-W64-NEXT:    ; implicit-def: $sgpr34_sgpr35
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[38:39], 0, v4
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v4
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[40:41], vcc
+; LLC-W64-NEXT:  ; %bb.8: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    v_and_b32_e32 v5, 7, v0
+; LLC-W64-NEXT:    v_add_u16_e32 v5, 3, v5
+; LLC-W64-NEXT:    v_cmp_lt_i16_e32 vcc, v5, v4
+; LLC-W64-NEXT:    s_andn2_b64 s[38:39], s[38:39], exec
+; LLC-W64-NEXT:    s_and_b64 s[42:43], vcc, exec
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], 0
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], -1
+; LLC-W64-NEXT:    s_or_b64 s[38:39], s[38:39], s[42:43]
+; LLC-W64-NEXT:  ; %bb.9: ; %Flow15
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[40:41]
+; LLC-W64-NEXT:    s_mov_b64 s[40:41], -1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[42:43], s[38:39]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W64-NEXT:  ; %bb.10: ; %while.cond
+; LLC-W64-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W64-NEXT:    s_add_i32 s17, s17, -1
+; LLC-W64-NEXT:    s_cmp_eq_u32 s17, 0
+; LLC-W64-NEXT:    s_cselect_b64 s[38:39], -1, 0
+; LLC-W64-NEXT:    v_add_u32_e32 v2, 42, v2
+; LLC-W64-NEXT:    s_andn2_b64 s[36:37], s[36:37], exec
+; LLC-W64-NEXT:    s_andn2_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_orn2_b64 s[40:41], s[38:39], exec
+; LLC-W64-NEXT:    global_store_dword v[0:1], v2, off
+; LLC-W64-NEXT:    s_branch .LBB0_2
+; LLC-W64-NEXT:  .LBB0_11: ; %loop.exit.guard
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[18:19], s[24:25], -1
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], 0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[20:21], s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[40:41], exec, s[20:21]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_16
+; LLC-W64-NEXT:  ; %bb.12: ; %Flow
+; LLC-W64-NEXT:    s_andn2_saveexec_b64 s[36:37], s[40:41]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_19
+; LLC-W64-NEXT:  .LBB0_13: ; %Flow11
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:  .LBB0_14: ; %UnifiedUnreachableBlock
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB0_15: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB0_16: ; %loop.exit.guard8
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], 0
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[20:21], s[12:13]
+; LLC-W64-NEXT:    s_xor_b64 s[42:43], exec, s[20:21]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_18
+; LLC-W64-NEXT:  ; %bb.17:
+; LLC-W64-NEXT:    s_add_u32 s12, s8, 24
+; LLC-W64-NEXT:    s_addc_u32 s13, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[18:19]
+; LLC-W64-NEXT:    s_add_u32 s18, s18, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s19, s19, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[18:19], 0x0
+; LLC-W64-NEXT:    s_mov_b64 s[44:45], s[8:9]
+; LLC-W64-NEXT:    s_mov_b64 s[8:9], s[12:13]
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s33, s14
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    s_mov_b64 s[34:35], s[4:5]
+; LLC-W64-NEXT:    s_mov_b64 s[36:37], s[6:7]
+; LLC-W64-NEXT:    s_mov_b64 s[38:39], s[10:11]
+; LLC-W64-NEXT:    s_mov_b32 s46, s16
+; LLC-W64-NEXT:    s_mov_b32 s47, s15
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_mov_b32 s14, s33
+; LLC-W64-NEXT:    s_mov_b32 s15, s47
+; LLC-W64-NEXT:    s_mov_b32 s16, s46
+; LLC-W64-NEXT:    s_mov_b64 s[8:9], s[44:45]
+; LLC-W64-NEXT:    s_mov_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_mov_b64 s[6:7], s[36:37]
+; LLC-W64-NEXT:    s_mov_b64 s[10:11], s[38:39]
+; LLC-W64-NEXT:    s_mov_b64 s[18:19], exec
+; LLC-W64-NEXT:  .LBB0_18: ; %Flow10
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[42:43]
+; LLC-W64-NEXT:    s_and_b64 s[34:35], s[18:19], exec
+; LLC-W64-NEXT:    ; implicit-def: $vgpr31
+; LLC-W64-NEXT:    s_andn2_saveexec_b64 s[36:37], s[40:41]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_13
+; LLC-W64-NEXT:  .LBB0_19:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 24
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, v40
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, v41
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_or_b64 s[34:35], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_14
+; LLC-W64-NEXT:    s_branch .LBB0_15
+;
+; LLC-W32-LABEL: uniform_loop_global:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_mov_b64 s[34:35], s[6:7]
+; LLC-W32-NEXT:    s_load_b32 s6, s[4:5], 0x0
+; LLC-W32-NEXT:    s_mov_b32 s7, 0
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_cmp_eq_u32 s6, 0
+; LLC-W32-NEXT:    s_cbranch_scc1 .LBB0_15
+; LLC-W32-NEXT:  ; %bb.1: ; %while.body.preheader
+; LLC-W32-NEXT:    s_load_b128 s[8:11], s[4:5], 0x8
+; LLC-W32-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v2, 0
+; LLC-W32-NEXT:    ; implicit-def: $sgpr16
+; LLC-W32-NEXT:    ; implicit-def: $sgpr12
+; LLC-W32-NEXT:    ; implicit-def: $sgpr17
+; LLC-W32-NEXT:    ; implicit-def: $sgpr18
+; LLC-W32-NEXT:    v_and_b32_e32 v5, 0x3ff, v31
+; LLC-W32-NEXT:    s_branch .LBB0_4
+; LLC-W32-NEXT:  .LBB0_2: ; %Flow16
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s24
+; LLC-W32-NEXT:    s_and_not1_b32 s18, s18, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s21, s21, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s17, s17, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s18, s18, s21
+; LLC-W32-NEXT:    s_or_b32 s17, s17, s20
+; LLC-W32-NEXT:    s_or_not1_b32 s21, s23, exec_lo
+; LLC-W32-NEXT:  .LBB0_3: ; %Flow14
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s19
+; LLC-W32-NEXT:    s_and_b32 s19, exec_lo, s21
+; LLC-W32-NEXT:    s_or_b32 s7, s19, s7
+; LLC-W32-NEXT:    s_and_not1_b32 s12, s12, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s19, s18, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s16, s16, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s20, s17, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s12, s12, s19
+; LLC-W32-NEXT:    s_or_b32 s16, s16, s20
+; LLC-W32-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s7
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_11
+; LLC-W32-NEXT:  .LBB0_4: ; %while.body
+; LLC-W32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LLC-W32-NEXT:    v_add_nc_u32_e32 v1, s6, v5
+; LLC-W32-NEXT:    s_mov_b32 s21, exec_lo
+; LLC-W32-NEXT:    ; implicit-def: $sgpr22
+; LLC-W32-NEXT:    ; implicit-def: $sgpr19
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W32-NEXT:    v_lshlrev_b64 v[3:4], 3, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    v_add_co_u32 v40, vcc_lo, s8, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v41, vcc_lo, s9, v4, vcc_lo
+; LLC-W32-NEXT:    v_lshrrev_b64 v[0:1], 3, v[40:41]
+; LLC-W32-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7fff8000, v0
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v0, v[0:1]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s20, 0, v0
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v0
+; LLC-W32-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_and_b32_e32 v1, 7, v40
+; LLC-W32-NEXT:    s_and_not1_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s19, -1
+; LLC-W32-NEXT:    s_mov_b32 s22, 0
+; LLC-W32-NEXT:    v_add_nc_u16 v1, v1, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e32 vcc_lo, v1, v0
+; LLC-W32-NEXT:    s_and_b32 s23, vcc_lo, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s20, s20, s23
+; LLC-W32-NEXT:  ; %bb.6: ; %Flow13
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s21
+; LLC-W32-NEXT:    s_and_not1_b32 s18, s18, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s22, s22, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s17, s17, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s19, s19, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s21, -1
+; LLC-W32-NEXT:    s_or_b32 s18, s18, s22
+; LLC-W32-NEXT:    s_or_b32 s17, s17, s19
+; LLC-W32-NEXT:    ; kill: def $vgpr0_vgpr1 killed $sgpr0_sgpr1 killed $exec
+; LLC-W32-NEXT:    s_and_saveexec_b32 s19, s20
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_3
+; LLC-W32-NEXT:  ; %bb.7: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_add_co_u32 v0, vcc_lo, s10, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, s11, v4, vcc_lo
+; LLC-W32-NEXT:    s_mov_b32 s23, exec_lo
+; LLC-W32-NEXT:    ; implicit-def: $sgpr21
+; LLC-W32-NEXT:    ; implicit-def: $sgpr20
+; LLC-W32-NEXT:    v_lshrrev_b64 v[3:4], 3, v[0:1]
+; LLC-W32-NEXT:    v_add_co_u32 v3, vcc_lo, 0x7fff8000, v3
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v4, v[3:4]
+; LLC-W32-NEXT:    global_load_b32 v3, v[40:41], off
+; LLC-W32-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s22, 0, v4
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v4
+; LLC-W32-NEXT:  ; %bb.8: ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    v_and_b32_e32 v6, 7, v0
+; LLC-W32-NEXT:    s_and_not1_b32 s22, s22, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s20, 0
+; LLC-W32-NEXT:    s_mov_b32 s21, -1
+; LLC-W32-NEXT:    v_add_nc_u16 v6, v6, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e32 vcc_lo, v6, v4
+; LLC-W32-NEXT:    s_and_b32 s24, vcc_lo, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s22, s22, s24
+; LLC-W32-NEXT:  ; %bb.9: ; %Flow15
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s23
+; LLC-W32-NEXT:    s_mov_b32 s23, -1
+; LLC-W32-NEXT:    s_and_saveexec_b32 s24, s22
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W32-NEXT:  ; %bb.10: ; %while.cond
+; LLC-W32-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; LLC-W32-NEXT:    s_add_i32 s6, s6, -1
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W32-NEXT:    v_add_nc_u32_e32 v3, 42, v3
+; LLC-W32-NEXT:    s_cmp_eq_u32 s6, 0
+; LLC-W32-NEXT:    s_cselect_b32 s22, -1, 0
+; LLC-W32-NEXT:    s_and_not1_b32 s21, s21, exec_lo
+; LLC-W32-NEXT:    s_and_not1_b32 s20, s20, exec_lo
+; LLC-W32-NEXT:    s_or_not1_b32 s23, s22, exec_lo
+; LLC-W32-NEXT:    global_store_b32 v[0:1], v3, off
+; LLC-W32-NEXT:    s_branch .LBB0_2
+; LLC-W32-NEXT:  .LBB0_11: ; %loop.exit.guard
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s7
+; LLC-W32-NEXT:    s_xor_b32 s6, s16, -1
+; LLC-W32-NEXT:    s_mov_b32 s33, 0
+; LLC-W32-NEXT:    s_and_saveexec_b32 s7, s6
+; LLC-W32-NEXT:    s_xor_b32 s42, exec_lo, s7
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB0_16
+; LLC-W32-NEXT:  ; %bb.12: ; %Flow
+; LLC-W32-NEXT:    s_and_not1_saveexec_b32 s36, s42
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB0_19
+; LLC-W32-NEXT:  .LBB0_13: ; %Flow11
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:  .LBB0_14: ; %UnifiedUnreachableBlock
+; LLC-W32-NEXT:    ; divergent unreachable
+; LLC-W32-NEXT:  .LBB0_15: ; %UnifiedReturnBlock
+; LLC-W32-NEXT:    s_nop 0
+; LLC-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; LLC-W32-NEXT:    s_endpgm
+; LLC-W32-NEXT:  .LBB0_16: ; %loop.exit.guard8
+; LLC-W32-NEXT:    s_mov_b32 s6, 0
+; LLC-W32-NEXT:    s_and_saveexec_b32 s7, s12
+; LLC-W32-NEXT:    s_xor_b32 s33, exec_lo, s7
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_18
+; LLC-W32-NEXT:  ; %bb.17:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 24
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[6:7]
+; LLC-W32-NEXT:    s_add_u32 s6, s6, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s7, s7, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    s_mov_b64 s[36:37], s[4:5]
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[6:7], 0x0
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s43, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s44, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_mov_b64 s[38:39], s[2:3]
+; LLC-W32-NEXT:    s_mov_b64 s[40:41], s[0:1]
+; LLC-W32-NEXT:    s_mov_b32 s45, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    s_mov_b32 s13, s43
+; LLC-W32-NEXT:    s_mov_b32 s14, s44
+; LLC-W32-NEXT:    s_mov_b32 s15, s45
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[36:37]
+; LLC-W32-NEXT:    s_mov_b64 s[0:1], s[40:41]
+; LLC-W32-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; LLC-W32-NEXT:    s_mov_b32 s6, exec_lo
+; LLC-W32-NEXT:  .LBB0_18: ; %Flow10
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s33
+; LLC-W32-NEXT:    s_and_b32 s33, s6, exec_lo
+; LLC-W32-NEXT:    ; implicit-def: $vgpr31
+; LLC-W32-NEXT:    s_and_not1_saveexec_b32 s36, s42
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_13
+; LLC-W32-NEXT:  .LBB0_19:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 24
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[4:5]
+; LLC-W32-NEXT:    s_add_u32 s4, s4, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s5, s5, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    s_or_b32 s33, s33, exec_lo
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB0_14
+; LLC-W32-NEXT:    s_branch .LBB0_15
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  br label %while.cond
+
+while.cond:
+  %c = phi i32 [%num, %entry], [%next_c, %while.body]
+  %cmp = icmp eq i32 %c, 0
+  br i1 %cmp, label %exit, label %while.body
+
+while.body:
+  %offs32 = add i32 %tid, %c
+  %offs = zext i32 %offs32 to i64
+
+  %pp1 = getelementptr inbounds i64, ptr addrspace(1) %ptr1, i64 %offs
+  %val = load i32, ptr addrspace(1) %pp1, align 4
+
+  %sum = add i32 %val, 42
+
+  %pp2 = getelementptr inbounds i64, ptr addrspace(1) %ptr2, i64 %offs
+  store i32 %sum, ptr addrspace(1) %pp2, align 4
+
+  %next_c = sub i32 %c, 1
+  br label %while.cond
+
+exit:
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_trivial.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_trivial.ll
new file mode 100644
index 000000000000000..4935a92d54787a3
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_codegen_trivial.ll
@@ -0,0 +1,669 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: opt  -passes=asan -S < %s | FileCheck %s --check-prefix=OPT
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx90a -o - | FileCheck %s --check-prefix=LLC-W64
+; RUN: opt < %s -passes='asan,default<O3>' -o - | llc -O3 -mtriple=amdgcn-hsa-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -o - | FileCheck %s --check-prefix=LLC-W32
+
+; This test contains checks for opt and llc, to update use:
+;   utils/update_test_checks.py --force-update
+;   utils/update_llc_test_checks.py --force-update
+;
+; --force-update allows to override "Assertions have been autogenerated by" guard
+target triple = "amdgcn-amd-amdhsa"
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+define protected amdgpu_kernel void @global_loadstore_uniform(ptr addrspace(1) %ptr) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @global_loadstore_uniform(
+; OPT-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
+; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; OPT-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 2147450880
+; OPT-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; OPT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0:![0-9]+]]
+; OPT:       6:
+; OPT-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
+; OPT-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
+; OPT-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
+; OPT-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
+; OPT:       11:
+; OPT-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
+; OPT-NEXT:    unreachable
+; OPT:       12:
+; OPT-NEXT:    [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4
+; OPT-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: global_loadstore_uniform:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_load_dwordx2 s[34:35], s[8:9], 0x0
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_lshr_b64 s[12:13], s[34:35], 3
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s12
+; LLC-W64-NEXT:    v_add_co_u32_e32 v2, vcc, 0x7fff8000, v1
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s13
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v1, v[2:3]
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[36:37], 0, v1
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[38:39], vcc
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_3
+; LLC-W64-NEXT:  ; %bb.1: ; %Flow
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[36:37]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB0_6
+; LLC-W64-NEXT:  .LBB0_2: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB0_3:
+; LLC-W64-NEXT:    v_and_b32_e64 v2, s34, 7
+; LLC-W64-NEXT:    v_add_u16_e32 v2, 3, v2
+; LLC-W64-NEXT:    v_cmp_ge_i16_e32 vcc, v2, v1
+; LLC-W64-NEXT:    s_mov_b64 s[12:13], -1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[40:41], vcc
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_5
+; LLC-W64-NEXT:  ; %bb.4:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 8
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, s34
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s35
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[12:13], exec, -1
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB0_5: ; %Flow2
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[40:41]
+; LLC-W64-NEXT:    s_andn2_b64 s[4:5], s[36:37], exec
+; LLC-W64-NEXT:    s_and_b64 s[6:7], s[12:13], exec
+; LLC-W64-NEXT:    s_or_b64 s[36:37], s[4:5], s[6:7]
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[36:37]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W64-NEXT:  .LBB0_6:
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, 0
+; LLC-W64-NEXT:    global_load_dword v1, v0, s[34:35] glc
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W64-NEXT:    global_store_dword v0, v1, s[34:35]
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W64-NEXT:    s_endpgm
+;
+; LLC-W32-LABEL: global_loadstore_uniform:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_load_b64 s[34:35], s[4:5], 0x0
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; LLC-W32-NEXT:    s_mov_b32 s36, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_lshr_b64 s[6:7], s[34:35], 3
+; LLC-W32-NEXT:    v_add_co_u32 v1, s6, 0x7fff8000, s6
+; LLC-W32-NEXT:    v_add_co_ci_u32_e64 v2, null, 0, s7, s6
+; LLC-W32-NEXT:    flat_load_i8 v1, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s33, 0, v1
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v1
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB0_3
+; LLC-W32-NEXT:  ; %bb.1: ; %Flow
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB0_6
+; LLC-W32-NEXT:  .LBB0_2: ; %UnifiedReturnBlock
+; LLC-W32-NEXT:    s_endpgm
+; LLC-W32-NEXT:  .LBB0_3:
+; LLC-W32-NEXT:    v_and_b32_e64 v2, s34, 7
+; LLC-W32-NEXT:    s_mov_b32 s6, -1
+; LLC-W32-NEXT:    s_mov_b32 s37, exec_lo
+; LLC-W32-NEXT:    v_add_nc_u16 v2, v2, 3
+; LLC-W32-NEXT:    v_cmpx_ge_i16_e64 v2, v1
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_5
+; LLC-W32-NEXT:  ; %bb.4:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 8
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[4:5]
+; LLC-W32-NEXT:    s_add_u32 s4, s4, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s5, s5, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, s34
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
+; LLC-W32-NEXT:    v_mov_b32_e32 v1, s35
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    ; divergent unreachable
+; LLC-W32-NEXT:    s_xor_b32 s6, exec_lo, -1
+; LLC-W32-NEXT:  .LBB0_5: ; %Flow2
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s37
+; LLC-W32-NEXT:    s_and_not1_b32 s0, s33, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s1, s6, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s33, s0, s1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:    s_cbranch_execz .LBB0_2
+; LLC-W32-NEXT:  .LBB0_6:
+; LLC-W32-NEXT:    v_mov_b32_e32 v0, 0
+; LLC-W32-NEXT:    global_load_b32 v1, v0, s[34:35] glc dlc
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W32-NEXT:    global_store_b32 v0, v1, s[34:35] dlc
+; LLC-W32-NEXT:    s_waitcnt_vscnt null, 0x0
+; LLC-W32-NEXT:    s_nop 0
+; LLC-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; LLC-W32-NEXT:    s_endpgm
+entry:
+  %val = load volatile i32, ptr addrspace(1) %ptr, align 4
+  store volatile i32 %val, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define protected amdgpu_kernel void @generic_loadstore_uniform(ptr addrspace(0) %ptr) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @generic_loadstore_uniform(
+; OPT-SAME: ptr [[PTR:%.*]]) #[[ATTR1]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR]])
+; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
+; OPT-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
+; OPT-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; OPT-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; OPT:       4:
+; OPT-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
+; OPT-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
+; OPT-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
+; OPT-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; OPT-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; OPT-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0]]
+; OPT:       11:
+; OPT-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
+; OPT-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
+; OPT-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
+; OPT-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
+; OPT-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
+; OPT:       16:
+; OPT-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR3]]
+; OPT-NEXT:    unreachable
+; OPT:       17:
+; OPT-NEXT:    br label [[TMP18]]
+; OPT:       18:
+; OPT-NEXT:    [[VAL:%.*]] = load volatile i32, ptr [[PTR]], align 4
+; OPT-NEXT:    store volatile i32 [[VAL]], ptr [[PTR]], align 4
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: generic_loadstore_uniform:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_load_dwordx2 s[36:37], s[8:9], 0x0
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_lshr_b64 s[12:13], s[36:37], 3
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s12
+; LLC-W64-NEXT:    v_add_co_u32_e32 v2, vcc, 0x7fff8000, v1
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s13
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v1, v[2:3]
+; LLC-W64-NEXT:    s_mov_b64 s[12:13], -1
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[38:39], vcc
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB1_3
+; LLC-W64-NEXT:  ; %bb.1: ; %Flow
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[12:13]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB1_6
+; LLC-W64-NEXT:  .LBB1_2: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB1_3:
+; LLC-W64-NEXT:    v_and_b32_e64 v2, s36, 7
+; LLC-W64-NEXT:    v_add_u16_e32 v2, 3, v2
+; LLC-W64-NEXT:    v_cmp_lt_i16_e64 s[34:35], v2, v1
+; LLC-W64-NEXT:    v_cmp_ge_i16_e32 vcc, v2, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[40:41], vcc
+; LLC-W64-NEXT:    s_cbranch_execz .LBB1_5
+; LLC-W64-NEXT:  ; %bb.4:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 8
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, s36
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, s37
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB1_5: ; %Flow2
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[40:41]
+; LLC-W64-NEXT:    s_orn2_b64 s[12:13], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[12:13]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB1_2
+; LLC-W64-NEXT:  .LBB1_6:
+; LLC-W64-NEXT:    v_pk_mov_b32 v[0:1], s[36:37], s[36:37] op_sel:[0,1]
+; LLC-W64-NEXT:    flat_load_dword v2, v[0:1] glc
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    flat_store_dword v[0:1], v2
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0)
+; LLC-W64-NEXT:    s_endpgm
+;
+; LLC-W32-LABEL: generic_loadstore_uniform:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_load_b64 s[34:35], s[4:5], 0x0
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; LLC-W32-NEXT:    s_mov_b32 s36, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_lshr_b64 s[6:7], s[34:35], 3
+; LLC-W32-NEXT:    v_add_co_u32 v1, s6, 0x7fff8000, s6
+; LLC-W32-NEXT:    v_add_co_ci_u32_e64 v2, null, 0, s7, s6
+; LLC-W32-NEXT:    s_mov_b32 s6, -1
+; LLC-W32-NEXT:    flat_load_i8 v1, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v1
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB1_3
+; LLC-W32-NEXT:  ; %bb.1: ; %Flow
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s6
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB1_6
+; LLC-W32-NEXT:  .LBB1_2: ; %UnifiedReturnBlock
+; LLC-W32-NEXT:    s_endpgm
+; LLC-W32-NEXT:  .LBB1_3:
+; LLC-W32-NEXT:    v_and_b32_e64 v2, s34, 7
+; LLC-W32-NEXT:    s_mov_b32 s37, exec_lo
+; LLC-W32-NEXT:    v_add_nc_u16 v2, v2, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e64 s33, v2, v1
+; LLC-W32-NEXT:    v_cmpx_ge_i16_e64 v2, v1
+; LLC-W32-NEXT:    s_cbranch_execz .LBB1_5
+; LLC-W32-NEXT:  ; %bb.4:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 8
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[4:5]
+; LLC-W32-NEXT:    s_add_u32 s4, s4, __asan_report_load4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s5, s5, __asan_report_load4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, s34
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
+; LLC-W32-NEXT:    v_mov_b32_e32 v1, s35
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    ; divergent unreachable
+; LLC-W32-NEXT:  .LBB1_5: ; %Flow2
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s37
+; LLC-W32-NEXT:    s_or_not1_b32 s6, s33, exec_lo
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s36
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s6
+; LLC-W32-NEXT:    s_cbranch_execz .LBB1_2
+; LLC-W32-NEXT:  .LBB1_6:
+; LLC-W32-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v1, s35
+; LLC-W32-NEXT:    flat_load_b32 v2, v[0:1] glc dlc
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    flat_store_b32 v[0:1], v2 dlc
+; LLC-W32-NEXT:    s_waitcnt_vscnt null, 0x0
+; LLC-W32-NEXT:    s_endpgm
+entry:
+  %val = load volatile i32, ptr addrspace(0) %ptr, align 4
+  store volatile i32 %val, ptr addrspace(0) %ptr, align 4
+  ret void
+}
+
+define protected amdgpu_kernel void @global_store_nonuniform(ptr addrspace(1) %ptr) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @global_store_nonuniform(
+; OPT-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT:    [[TID64:%.*]] = zext i32 [[TID]] to i64
+; OPT-NEXT:    [[PP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR]], i64 [[TID64]]
+; OPT-NEXT:    [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[PP1]] to i64
+; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; OPT-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 2147450880
+; OPT-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; OPT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0]]
+; OPT:       6:
+; OPT-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
+; OPT-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
+; OPT-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
+; OPT-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
+; OPT:       11:
+; OPT-NEXT:    call void @__asan_report_store4(i64 [[TMP0]]) #[[ATTR3]]
+; OPT-NEXT:    unreachable
+; OPT:       12:
+; OPT-NEXT:    store i32 42, ptr addrspace(1) [[PP1]], align 4
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: global_store_nonuniform:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_load_dwordx2 s[12:13], s[8:9], 0x0
+; LLC-W64-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; LLC-W64-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    v_mov_b32_e32 v2, s13
+; LLC-W64-NEXT:    v_add_co_u32_e32 v40, vcc, s12, v1
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v41, vcc, 0, v2, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[2:3], 3, v[40:41]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v2, vcc, 0x7fff8000, v2
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v1, v[2:3]
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_eq_u16_e64 s[34:35], 0, v1
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[36:37], vcc
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB2_3
+; LLC-W64-NEXT:  ; %bb.1: ; %Flow
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB2_6
+; LLC-W64-NEXT:  .LBB2_2: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB2_3:
+; LLC-W64-NEXT:    v_and_b32_e32 v2, 7, v40
+; LLC-W64-NEXT:    v_add_u16_e32 v2, 3, v2
+; LLC-W64-NEXT:    v_cmp_ge_i16_e32 vcc, v2, v1
+; LLC-W64-NEXT:    s_mov_b64 s[12:13], -1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[38:39], vcc
+; LLC-W64-NEXT:    s_cbranch_execz .LBB2_5
+; LLC-W64-NEXT:  ; %bb.4:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 8
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, v40
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, v41
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    s_xor_b64 s[12:13], exec, -1
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB2_5: ; %Flow2
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_andn2_b64 s[4:5], s[34:35], exec
+; LLC-W64-NEXT:    s_and_b64 s[6:7], s[12:13], exec
+; LLC-W64-NEXT:    s_or_b64 s[34:35], s[4:5], s[6:7]
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[34:35]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB2_2
+; LLC-W64-NEXT:  .LBB2_6:
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, 42
+; LLC-W64-NEXT:    global_store_dword v[40:41], v0, off
+; LLC-W64-NEXT:    s_endpgm
+;
+; LLC-W32-LABEL: global_store_nonuniform:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; LLC-W32-NEXT:    s_load_b64 s[6:7], s[4:5], 0x0
+; LLC-W32-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; LLC-W32-NEXT:    s_mov_b32 s34, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    v_add_co_u32 v40, s6, s6, v1
+; LLC-W32-NEXT:    v_add_co_ci_u32_e64 v41, null, s7, 0, s6
+; LLC-W32-NEXT:    v_lshrrev_b64 v[1:2], 3, v[40:41]
+; LLC-W32-NEXT:    v_add_co_u32 v1, vcc_lo, 0x7fff8000, v1
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v1, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmp_eq_u16_e64 s33, 0, v1
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v1
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB2_3
+; LLC-W32-NEXT:  ; %bb.1: ; %Flow
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s34
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB2_6
+; LLC-W32-NEXT:  .LBB2_2: ; %UnifiedReturnBlock
+; LLC-W32-NEXT:    s_endpgm
+; LLC-W32-NEXT:  .LBB2_3:
+; LLC-W32-NEXT:    v_and_b32_e32 v2, 7, v40
+; LLC-W32-NEXT:    s_mov_b32 s6, -1
+; LLC-W32-NEXT:    s_mov_b32 s35, exec_lo
+; LLC-W32-NEXT:    v_add_nc_u16 v2, v2, 3
+; LLC-W32-NEXT:    v_cmpx_ge_i16_e64 v2, v1
+; LLC-W32-NEXT:    s_cbranch_execz .LBB2_5
+; LLC-W32-NEXT:  ; %bb.4:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 8
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[4:5]
+; LLC-W32-NEXT:    s_add_u32 s4, s4, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s5, s5, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
+; LLC-W32-NEXT:    v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    ; divergent unreachable
+; LLC-W32-NEXT:    s_xor_b32 s6, exec_lo, -1
+; LLC-W32-NEXT:  .LBB2_5: ; %Flow2
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s35
+; LLC-W32-NEXT:    s_and_not1_b32 s0, s33, exec_lo
+; LLC-W32-NEXT:    s_and_b32 s1, s6, exec_lo
+; LLC-W32-NEXT:    s_or_b32 s33, s0, s1
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s34
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s33
+; LLC-W32-NEXT:    s_cbranch_execz .LBB2_2
+; LLC-W32-NEXT:  .LBB2_6:
+; LLC-W32-NEXT:    v_mov_b32_e32 v0, 42
+; LLC-W32-NEXT:    global_store_b32 v[40:41], v0, off
+; LLC-W32-NEXT:    s_nop 0
+; LLC-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; LLC-W32-NEXT:    s_endpgm
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid64 = zext i32 %tid to i64
+
+  %pp1 = getelementptr inbounds i64, ptr addrspace(1) %ptr, i64 %tid64
+  store i32 42, ptr addrspace(1) %pp1, align 4
+  ret void
+}
+
+define protected amdgpu_kernel void @generic_store_nonuniform(ptr addrspace(0) %ptr) sanitize_address {
+; OPT-LABEL: define protected amdgpu_kernel void @generic_store_nonuniform(
+; OPT-SAME: ptr [[PTR:%.*]]) #[[ATTR1]] {
+; OPT-NEXT:  entry:
+; OPT-NEXT:    [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; OPT-NEXT:    [[TID64:%.*]] = zext i32 [[TID]] to i64
+; OPT-NEXT:    [[PP1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TID64]]
+; OPT-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PP1]])
+; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PP1]])
+; OPT-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
+; OPT-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; OPT-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; OPT:       4:
+; OPT-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PP1]] to i64
+; OPT-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
+; OPT-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
+; OPT-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; OPT-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; OPT-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
+; OPT-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0]]
+; OPT:       11:
+; OPT-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
+; OPT-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
+; OPT-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
+; OPT-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
+; OPT-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
+; OPT:       16:
+; OPT-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR3]]
+; OPT-NEXT:    unreachable
+; OPT:       17:
+; OPT-NEXT:    br label [[TMP18]]
+; OPT:       18:
+; OPT-NEXT:    store i32 42, ptr [[PP1]], align 4
+; OPT-NEXT:    ret void
+;
+; LLC-W64-LABEL: generic_store_nonuniform:
+; LLC-W64:       ; %bb.0: ; %entry
+; LLC-W64-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; LLC-W64-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; LLC-W64-NEXT:    s_load_dwordx2 s[12:13], s[8:9], 0x0
+; LLC-W64-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; LLC-W64-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
+; LLC-W64-NEXT:    s_add_u32 s0, s0, s17
+; LLC-W64-NEXT:    s_addc_u32 s1, s1, 0
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    v_mov_b32_e32 v2, s13
+; LLC-W64-NEXT:    v_add_co_u32_e32 v40, vcc, s12, v1
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v41, vcc, 0, v2, vcc
+; LLC-W64-NEXT:    v_lshrrev_b64 v[2:3], 3, v[40:41]
+; LLC-W64-NEXT:    v_add_co_u32_e32 v2, vcc, 0x7fff8000, v2
+; LLC-W64-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; LLC-W64-NEXT:    flat_load_sbyte v1, v[2:3]
+; LLC-W64-NEXT:    s_mov_b64 s[12:13], -1
+; LLC-W64-NEXT:    s_mov_b32 s32, 0
+; LLC-W64-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W64-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[36:37], vcc
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB3_3
+; LLC-W64-NEXT:  ; %bb.1: ; %Flow
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[12:13]
+; LLC-W64-NEXT:    s_cbranch_execnz .LBB3_6
+; LLC-W64-NEXT:  .LBB3_2: ; %UnifiedReturnBlock
+; LLC-W64-NEXT:    s_endpgm
+; LLC-W64-NEXT:  .LBB3_3:
+; LLC-W64-NEXT:    v_and_b32_e32 v2, 7, v40
+; LLC-W64-NEXT:    v_add_u16_e32 v2, 3, v2
+; LLC-W64-NEXT:    v_cmp_lt_i16_e64 s[34:35], v2, v1
+; LLC-W64-NEXT:    v_cmp_ge_i16_e32 vcc, v2, v1
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[38:39], vcc
+; LLC-W64-NEXT:    s_cbranch_execz .LBB3_5
+; LLC-W64-NEXT:  ; %bb.4:
+; LLC-W64-NEXT:    s_add_u32 s8, s8, 8
+; LLC-W64-NEXT:    s_addc_u32 s9, s9, 0
+; LLC-W64-NEXT:    s_getpc_b64 s[12:13]
+; LLC-W64-NEXT:    s_add_u32 s12, s12, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W64-NEXT:    s_addc_u32 s13, s13, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W64-NEXT:    s_load_dwordx2 s[18:19], s[12:13], 0x0
+; LLC-W64-NEXT:    s_mov_b32 s12, s14
+; LLC-W64-NEXT:    s_mov_b32 s13, s15
+; LLC-W64-NEXT:    s_mov_b32 s14, s16
+; LLC-W64-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, v40
+; LLC-W64-NEXT:    v_mov_b32_e32 v1, v41
+; LLC-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W64-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; LLC-W64-NEXT:    ; divergent unreachable
+; LLC-W64-NEXT:  .LBB3_5: ; %Flow2
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[38:39]
+; LLC-W64-NEXT:    s_orn2_b64 s[12:13], s[34:35], exec
+; LLC-W64-NEXT:    s_or_b64 exec, exec, s[36:37]
+; LLC-W64-NEXT:    s_and_saveexec_b64 s[4:5], s[12:13]
+; LLC-W64-NEXT:    s_cbranch_execz .LBB3_2
+; LLC-W64-NEXT:  .LBB3_6:
+; LLC-W64-NEXT:    v_mov_b32_e32 v0, 42
+; LLC-W64-NEXT:    global_store_dword v[40:41], v0, off
+; LLC-W64-NEXT:    s_endpgm
+;
+; LLC-W32-LABEL: generic_store_nonuniform:
+; LLC-W32:       ; %bb.0: ; %entry
+; LLC-W32-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; LLC-W32-NEXT:    s_load_b64 s[6:7], s[4:5], 0x0
+; LLC-W32-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; LLC-W32-NEXT:    s_mov_b32 s34, exec_lo
+; LLC-W32-NEXT:    s_mov_b32 s32, 0
+; LLC-W32-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    v_add_co_u32 v40, s6, s6, v1
+; LLC-W32-NEXT:    v_add_co_ci_u32_e64 v41, null, s7, 0, s6
+; LLC-W32-NEXT:    s_mov_b32 s6, -1
+; LLC-W32-NEXT:    v_lshrrev_b64 v[1:2], 3, v[40:41]
+; LLC-W32-NEXT:    v_add_co_u32 v1, vcc_lo, 0x7fff8000, v1
+; LLC-W32-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
+; LLC-W32-NEXT:    flat_load_i8 v1, v[1:2]
+; LLC-W32-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; LLC-W32-NEXT:    v_cmpx_ne_u16_e32 0, v1
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB3_3
+; LLC-W32-NEXT:  ; %bb.1: ; %Flow
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s34
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s6
+; LLC-W32-NEXT:    s_cbranch_execnz .LBB3_6
+; LLC-W32-NEXT:  .LBB3_2: ; %UnifiedReturnBlock
+; LLC-W32-NEXT:    s_endpgm
+; LLC-W32-NEXT:  .LBB3_3:
+; LLC-W32-NEXT:    v_and_b32_e32 v2, 7, v40
+; LLC-W32-NEXT:    s_mov_b32 s35, exec_lo
+; LLC-W32-NEXT:    v_add_nc_u16 v2, v2, 3
+; LLC-W32-NEXT:    v_cmp_lt_i16_e64 s33, v2, v1
+; LLC-W32-NEXT:    v_cmpx_ge_i16_e64 v2, v1
+; LLC-W32-NEXT:    s_cbranch_execz .LBB3_5
+; LLC-W32-NEXT:  ; %bb.4:
+; LLC-W32-NEXT:    s_add_u32 s8, s4, 8
+; LLC-W32-NEXT:    s_addc_u32 s9, s5, 0
+; LLC-W32-NEXT:    s_getpc_b64 s[4:5]
+; LLC-W32-NEXT:    s_add_u32 s4, s4, __asan_report_store4 at gotpcrel32@lo+4
+; LLC-W32-NEXT:    s_addc_u32 s5, s5, __asan_report_store4 at gotpcrel32@hi+12
+; LLC-W32-NEXT:    v_mov_b32_e32 v31, v0
+; LLC-W32-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
+; LLC-W32-NEXT:    v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41
+; LLC-W32-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; LLC-W32-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; LLC-W32-NEXT:    s_mov_b32 s12, s13
+; LLC-W32-NEXT:    s_mov_b32 s13, s14
+; LLC-W32-NEXT:    s_mov_b32 s14, s15
+; LLC-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; LLC-W32-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; LLC-W32-NEXT:    ; divergent unreachable
+; LLC-W32-NEXT:  .LBB3_5: ; %Flow2
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s35
+; LLC-W32-NEXT:    s_or_not1_b32 s6, s33, exec_lo
+; LLC-W32-NEXT:    s_or_b32 exec_lo, exec_lo, s34
+; LLC-W32-NEXT:    s_and_saveexec_b32 s0, s6
+; LLC-W32-NEXT:    s_cbranch_execz .LBB3_2
+; LLC-W32-NEXT:  .LBB3_6:
+; LLC-W32-NEXT:    v_mov_b32_e32 v0, 42
+; LLC-W32-NEXT:    global_store_b32 v[40:41], v0, off
+; LLC-W32-NEXT:    s_nop 0
+; LLC-W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; LLC-W32-NEXT:    s_endpgm
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %tid64 = zext i32 %tid to i64
+
+  %pp1 = getelementptr inbounds i64, ptr addrspace(0) %ptr, i64 %tid64
+  store i32 42, ptr addrspace(0) %pp1, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }



More information about the llvm-commits mailing list