[llvm] [AMDGPU] Check for nonnull loads feeding addrspacecast (PR #138184)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 1 12:22:06 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

Handle nonnull loads just like nonnull arguments when checking for
addrspacecasts that are known never null.


---
Full diff: https://github.com/llvm/llvm-project/pull/138184.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+5) 
- (modified) llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll (+120-4) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6617373f89c8b..3915028b557d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -2130,6 +2130,11 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
   if (const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
     return true;
 
+  // Check nonnull loads.
+  if (const auto *Load = dyn_cast<LoadInst>(V);
+      Load && Load->hasMetadata(LLVMContext::MD_nonnull))
+    return true;
+
   // getUnderlyingObject may have looked through another addrspacecast, although
   // the optimizable situations most likely folded out by now.
   if (AS != cast<PointerType>(V->getType())->getAddressSpace())
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index 3216e71e6221a..31c23b94a8de8 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -88,6 +88,122 @@ define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
   ret void
 }
 
+define void @local_to_flat_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @local_to_flat_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT:    [[PTR:%.*]] = load ptr addrspace(3), ptr [[P]], align 4, !nonnull [[META0:![0-9]+]]
+; OPT-NEXT:    [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
+; OPT-NEXT:    store volatile i32 7, ptr [[X]], align 4
+; OPT-NEXT:    ret void
+;
+; ASM-LABEL: local_to_flat_nonnull_load:
+; ASM:       ; %bb.0:
+; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT:    flat_load_dword v0, v[0:1]
+; ASM-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; ASM-NEXT:    v_mov_b32_e32 v1, s5
+; ASM-NEXT:    v_mov_b32_e32 v2, 7
+; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT:    flat_store_dword v[0:1], v2
+; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT:    s_setpc_b64 s[30:31]
+  %ptr = load ptr addrspace(3), ptr %p, !nonnull !{}
+  %x = addrspacecast ptr addrspace(3) %ptr to ptr
+  store volatile i32 7, ptr %x
+  ret void
+}
+
+define void @private_to_flat_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @private_to_flat_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT:    [[PTR:%.*]] = load ptr addrspace(5), ptr [[P]], align 4, !nonnull [[META0]]
+; OPT-NEXT:    [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]])
+; OPT-NEXT:    store volatile i32 7, ptr [[X]], align 4
+; OPT-NEXT:    ret void
+;
+; ASM-LABEL: private_to_flat_nonnull_load:
+; ASM:       ; %bb.0:
+; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT:    flat_load_dword v0, v[0:1]
+; ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
+; ASM-NEXT:    v_mov_b32_e32 v1, s5
+; ASM-NEXT:    v_mov_b32_e32 v2, 7
+; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT:    flat_store_dword v[0:1], v2
+; ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ASM-NEXT:    s_setpc_b64 s[30:31]
+  %ptr = load ptr addrspace(5), ptr %p, !nonnull !{}
+  %x = addrspacecast ptr addrspace(5) %ptr to ptr
+  store volatile i32 7, ptr %x
+  ret void
+}
+
+define void @flat_to_local_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @flat_to_local_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT:    [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
+; OPT-NEXT:    [[X:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]])
+; OPT-NEXT:    store volatile i32 7, ptr addrspace(3) [[X]], align 4
+; OPT-NEXT:    ret void
+;
+; DAGISEL-ASM-LABEL: flat_to_local_nonnull_load:
+; DAGISEL-ASM:       ; %bb.0:
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    flat_load_dword v0, v[0:1]
+; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, 7
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    ds_write_b32 v0, v1
+; DAGISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: flat_to_local_nonnull_load:
+; GISEL-ASM:       ; %bb.0:
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, 7
+; GISEL-ASM-NEXT:    ds_write_b32 v0, v1
+; GISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+  %ptr = load ptr, ptr %p, !nonnull !{}
+  %x = addrspacecast ptr %ptr to ptr addrspace(3)
+  store volatile i32 7, ptr addrspace(3) %x
+  ret void
+}
+
+define void @flat_to_private_nonnull_load(ptr %p) {
+; OPT-LABEL: define void @flat_to_private_nonnull_load(
+; OPT-SAME: ptr [[P:%.*]]) {
+; OPT-NEXT:    [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
+; OPT-NEXT:    [[X:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]])
+; OPT-NEXT:    store volatile i32 7, ptr addrspace(5) [[X]], align 4
+; OPT-NEXT:    ret void
+;
+; DAGISEL-ASM-LABEL: flat_to_private_nonnull_load:
+; DAGISEL-ASM:       ; %bb.0:
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    flat_load_dword v0, v[0:1]
+; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, 7
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
+; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: flat_to_private_nonnull_load:
+; GISEL-ASM:       ; %bb.0:
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, 7
+; GISEL-ASM-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+  %ptr = load ptr, ptr %p, !nonnull !{}
+  %x = addrspacecast ptr %ptr to ptr addrspace(5)
+  store volatile i32 7, ptr addrspace(5) %x
+  ret void
+}
+
 define void @private_alloca_to_flat(ptr %ptr) {
 ; OPT-LABEL: define void @private_alloca_to_flat(
 ; OPT-SAME: ptr [[PTR:%.*]]) {
@@ -218,7 +334,7 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
 ; DAGISEL-ASM-NEXT:    s_mov_b64 s[4:5], 0
 ; DAGISEL-ASM-NEXT:    s_mov_b64 s[8:9], src_private_base
 ; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
-; DAGISEL-ASM-NEXT:  .LBB7_3: ; %finally
+; DAGISEL-ASM-NEXT:  .LBB11_3: ; %finally
 ; DAGISEL-ASM-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; DAGISEL-ASM-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
 ; DAGISEL-ASM-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
@@ -226,7 +342,7 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
 ; DAGISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
 ; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
 ; DAGISEL-ASM-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; DAGISEL-ASM-NEXT:    s_cbranch_execnz .LBB7_3
+; DAGISEL-ASM-NEXT:    s_cbranch_execnz .LBB11_3
 ; DAGISEL-ASM-NEXT:  ; %bb.4: ; %end
 ; DAGISEL-ASM-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; DAGISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
@@ -249,14 +365,14 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
 ; GISEL-ASM-NEXT:    s_mov_b64 s[6:7], 0
 ; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, s9
 ; GISEL-ASM-NEXT:    v_mov_b32_e32 v2, 7
-; GISEL-ASM-NEXT:  .LBB7_3: ; %finally
+; GISEL-ASM-NEXT:  .LBB11_3: ; %finally
 ; GISEL-ASM-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GISEL-ASM-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
 ; GISEL-ASM-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
 ; GISEL-ASM-NEXT:    flat_store_dword v[0:1], v2
 ; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0)
 ; GISEL-ASM-NEXT:    s_andn2_b64 exec, exec, s[6:7]
-; GISEL-ASM-NEXT:    s_cbranch_execnz .LBB7_3
+; GISEL-ASM-NEXT:    s_cbranch_execnz .LBB11_3
 ; GISEL-ASM-NEXT:  ; %bb.4: ; %end
 ; GISEL-ASM-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)

``````````

</details>


https://github.com/llvm/llvm-project/pull/138184


More information about the llvm-commits mailing list