[llvm] [AMDGPU] Improve detection of non-null addrspacecast operands (PR #82311)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 03:16:01 PST 2024
================
@@ -0,0 +1,272 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
+
+; Tests that we can avoid nullptr checks for addrspacecasts from/to priv/local.
+;
+; Whenever a testcase is successful, we should see the addrspacecast replaced with the intrinsic
+; and the resulting code should have no select/cndmask null check for the pointer.
+
+define void @local_to_flat_nonnull_arg(ptr addrspace(3) nonnull %ptr) {
+; OPT-LABEL: define void @local_to_flat_nonnull_arg(
+; OPT-SAME: ptr addrspace(3) nonnull [[PTR:%.*]]) {
+; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: local_to_flat_nonnull_arg:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
+; ASM-NEXT: v_mov_b32_e32 v1, s5
+; ASM-NEXT: v_mov_b32_e32 v2, 7
+; ASM-NEXT: flat_store_dword v[0:1], v2
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %x = addrspacecast ptr addrspace(3) %ptr to ptr
+ store volatile i32 7, ptr %x
+ ret void
+}
+
+define void @private_to_flat_nonnull_arg(ptr addrspace(5) nonnull %ptr) {
+; OPT-LABEL: define void @private_to_flat_nonnull_arg(
+; OPT-SAME: ptr addrspace(5) nonnull [[PTR:%.*]]) {
+; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: private_to_flat_nonnull_arg:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
+; ASM-NEXT: v_mov_b32_e32 v1, s5
+; ASM-NEXT: v_mov_b32_e32 v2, 7
+; ASM-NEXT: flat_store_dword v[0:1], v2
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %x = addrspacecast ptr addrspace(5) %ptr to ptr
+ store volatile i32 7, ptr %x
+ ret void
+}
+
+define void @flat_to_local_nonnull_arg(ptr nonnull %ptr) {
+; OPT-LABEL: define void @flat_to_local_nonnull_arg(
+; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
+; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[TMP1]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: flat_to_local_nonnull_arg:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: v_mov_b32_e32 v1, 7
+; ASM-NEXT: ds_write_b32 v0, v1
+; ASM-NEXT: s_waitcnt lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %x = addrspacecast ptr %ptr to ptr addrspace(3)
+ store volatile i32 7, ptr addrspace(3) %x
+ ret void
+}
+
+define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
+; OPT-LABEL: define void @flat_to_private_nonnull_arg(
+; OPT-SAME: ptr nonnull [[PTR:%.*]]) {
+; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: flat_to_private_nonnull_arg:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: v_mov_b32_e32 v1, 7
+; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; ASM-NEXT: s_waitcnt vmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %x = addrspacecast ptr %ptr to ptr addrspace(5)
+ store volatile i32 7, ptr addrspace(5) %x
+ ret void
+}
+
+define void @private_alloca_to_flat(ptr %ptr) {
+; OPT-LABEL: define void @private_alloca_to_flat(
+; OPT-SAME: ptr [[PTR:%.*]]) {
+; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
+; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[ALLOCA]])
+; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: private_alloca_to_flat:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
+; ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; ASM-NEXT: v_mov_b32_e32 v1, s5
+; ASM-NEXT: v_mov_b32_e32 v2, 7
+; ASM-NEXT: flat_store_dword v[0:1], v2
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca i8, addrspace(5)
+ %x = addrspacecast ptr addrspace(5) %alloca to ptr
+ store volatile i32 7, ptr %x
+ ret void
+}
+
+ at lds = internal unnamed_addr addrspace(3) global i8 undef, align 4
----------------
arsenm wrote:
```suggestion
@lds = internal unnamed_addr addrspace(3) global i8 poison, align 4
```
https://github.com/llvm/llvm-project/pull/82311
More information about the llvm-commits
mailing list