[llvm] 9060ca0 - [AMDGPU] Check for nonnull loads feeding addrspacecast (#138184)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 04:54:25 PDT 2025
Author: Jay Foad
Date: 2025-05-02T12:54:22+01:00
New Revision: 9060ca01913776cbed78b03e72d7700adc2201b7
URL: https://github.com/llvm/llvm-project/commit/9060ca01913776cbed78b03e72d7700adc2201b7
DIFF: https://github.com/llvm/llvm-project/commit/9060ca01913776cbed78b03e72d7700adc2201b7.diff
LOG: [AMDGPU] Check for nonnull loads feeding addrspacecast (#138184)
Handle nonnull loads just like nonnull arguments when checking for
addrspacecasts that are known never null.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6617373f89c8b..3915028b557d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -2130,6 +2130,11 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
if (const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
return true;
+ // Check nonnull loads.
+ if (const auto *Load = dyn_cast<LoadInst>(V);
+ Load && Load->hasMetadata(LLVMContext::MD_nonnull))
+ return true;
+
// getUnderlyingObject may have looked through another addrspacecast, although
// the optimizable situations most likely folded out by now.
if (AS != cast<PointerType>(V->getType())->getAddressSpace())
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index 3216e71e6221a..31c23b94a8de8 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -88,6 +88,122 @@ define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
ret void
}
+define void @local_to_flat_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @local_to_flat_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT: [[PTR:%.*]] = load ptr addrspace(3), ptr [[P]], align 4, !nonnull [[META0:![0-9]+]]
+; OPT-NEXT: [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: local_to_flat_nonnull_load:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: flat_load_dword v0, v[0:1]
+; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
+; ASM-NEXT: v_mov_b32_e32 v1, s5
+; ASM-NEXT: v_mov_b32_e32 v2, 7
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: flat_store_dword v[0:1], v2
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %ptr = load ptr addrspace(3), ptr %p, !nonnull !{}
+ %x = addrspacecast ptr addrspace(3) %ptr to ptr
+ store volatile i32 7, ptr %x
+ ret void
+}
+
+define void @private_to_flat_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @private_to_flat_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT: [[PTR:%.*]] = load ptr addrspace(5), ptr [[P]], align 4, !nonnull [[META0]]
+; OPT-NEXT: [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
+; OPT-NEXT: ret void
+;
+; ASM-LABEL: private_to_flat_nonnull_load:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: flat_load_dword v0, v[0:1]
+; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
+; ASM-NEXT: v_mov_b32_e32 v1, s5
+; ASM-NEXT: v_mov_b32_e32 v2, 7
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: flat_store_dword v[0:1], v2
+; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %ptr = load ptr addrspace(5), ptr %p, !nonnull !{}
+ %x = addrspacecast ptr addrspace(5) %ptr to ptr
+ store volatile i32 7, ptr %x
+ ret void
+}
+
+define void @flat_to_local_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @flat_to_local_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT: [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
+; OPT-NEXT: [[X:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[X]], align 4
+; OPT-NEXT: ret void
+;
+; DAGISEL-ASM-LABEL: flat_to_local_nonnull_load:
+; DAGISEL-ASM: ; %bb.0:
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: flat_load_dword v0, v[0:1]
+; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: ds_write_b32 v0, v1
+; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: flat_to_local_nonnull_load:
+; GISEL-ASM: ; %bb.0:
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
+; GISEL-ASM-NEXT: ds_write_b32 v0, v1
+; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+ %ptr = load ptr, ptr %p, !nonnull !{}
+ %x = addrspacecast ptr %ptr to ptr addrspace(3)
+ store volatile i32 7, ptr addrspace(3) %x
+ ret void
+}
+
+define void @flat_to_private_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @flat_to_private_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT: [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
+; OPT-NEXT: [[X:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]])
+; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[X]], align 4
+; OPT-NEXT: ret void
+;
+; DAGISEL-ASM-LABEL: flat_to_private_nonnull_load:
+; DAGISEL-ASM: ; %bb.0:
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: flat_load_dword v0, v[0:1]
+; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
+; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: flat_to_private_nonnull_load:
+; GISEL-ASM: ; %bb.0:
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
+; GISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
+; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+ %ptr = load ptr, ptr %p, !nonnull !{}
+ %x = addrspacecast ptr %ptr to ptr addrspace(5)
+ store volatile i32 7, ptr addrspace(5) %x
+ ret void
+}
+
define void @private_alloca_to_flat(ptr %ptr) {
; OPT-LABEL: define void @private_alloca_to_flat(
; OPT-SAME: ptr [[PTR:%.*]]) {
@@ -218,7 +334,7 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
-; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally
+; DAGISEL-ASM-NEXT: .LBB11_3: ; %finally
; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7]
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
@@ -226,7 +342,7 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
+; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB11_3
; DAGISEL-ASM-NEXT: ; %bb.4: ; %end
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
@@ -249,14 +365,14 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
; GISEL-ASM-NEXT: s_mov_b64 s[6:7], 0
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
-; GISEL-ASM-NEXT: .LBB7_3: ; %finally
+; GISEL-ASM-NEXT: .LBB11_3: ; %finally
; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5]
; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7]
-; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
+; GISEL-ASM-NEXT: s_cbranch_execnz .LBB11_3
; GISEL-ASM-NEXT: ; %bb.4: ; %end
; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
More information about the llvm-commits
mailing list