[llvm] AMDGPU: Fix assert from wrong address space size assumption (PR #97267)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 02:11:52 PDT 2024


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/97267

This was assuming the source address space was at least as large
as the destination of the cast. I'm not sure why this was casting
to begin with; the assumption seems to be the source
address space from the root addrspacecast matches the underlying
object so directly check that.

>From c89f1c2a18574cd4acd4815086dc20dae7b2fd70 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 30 Jun 2024 11:56:51 +0200
Subject: [PATCH] AMDGPU: Fix assert from wrong address space size assumption

This was assuming the source address space was at least as large
as the destination of the cast. I'm not sure why this was casting
to begin with; the assumption seems to be the source
address space from the root addrspacecast matches the underlying
object so directly check that.
---
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |  9 +-
 .../codegen-prepare-addrspacecast-non-null.ll | 91 ++++++++++++++++++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6e7d34f5adaa3..052e1140533f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -2035,6 +2035,11 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
   if (const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
     return true;
 
+  // getUnderlyingObject may have looked through another addrspacecast, although
+  // the optimizable situations most likely folded out by now.
+  if (AS != cast<PointerType>(V->getType())->getAddressSpace())
+    return false;
+
   // TODO: Calls that return nonnull?
 
   // For all other things, use KnownBits.
@@ -2043,8 +2048,10 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
   //
   // TODO: Use ValueTracking's isKnownNeverNull if it becomes aware that some
   // address spaces have non-zero null values.
-  auto SrcPtrKB = computeKnownBits(V, DL).trunc(DL.getPointerSizeInBits(AS));
+  auto SrcPtrKB = computeKnownBits(V, DL);
   const auto NullVal = TM.getNullPointerValue(AS);
+
+  assert(SrcPtrKB.getBitWidth() == DL.getPointerSizeInBits(AS));
   assert((NullVal == 0 || NullVal == -1) &&
          "don't know how to check for this null value!");
   return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index cddc3161038b0..e2b4865410db8 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
@@ -270,3 +270,92 @@ finally:
 end:
   ret void
 }
+
+; This used to assert due to assuming the size of the source address
+; space was larger than the destination.
+
+define i32 @cast_private_to_flat_to_private(ptr addrspace(5) %private.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_private(
+; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
+; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
+; OPT-NEXT:    [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(5)
+; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(5) [[CAST_BACK]], align 4
+; OPT-NEXT:    ret i32 [[LOAD]]
+;
+; ASM-LABEL: cast_private_to_flat_to_private:
+; ASM:       ; %bb.0:
+; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; ASM-NEXT:    s_waitcnt vmcnt(0)
+; ASM-NEXT:    s_setpc_b64 s[30:31]
+  %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
+  %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(5)
+  %load = load volatile i32, ptr addrspace(5) %cast.back
+  ret i32 %load
+}
+
+; This is UB but shouldn't assert.
+define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_local(
+; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
+; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
+; OPT-NEXT:    [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
+; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[CAST_BACK]], align 4
+; OPT-NEXT:    ret i32 [[LOAD]]
+;
+; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local:
+; DAGISEL-ASM:       ; %bb.0:
+; DAGISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
+; DAGISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
+; DAGISEL-ASM-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; DAGISEL-ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; DAGISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; DAGISEL-ASM-NEXT:    ds_read_b32 v0, v0
+; DAGISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: cast_private_to_flat_to_local:
+; GISEL-ASM:       ; %bb.0:
+; GISEL-ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GISEL-ASM-NEXT:    v_mov_b32_e32 v1, s5
+; GISEL-ASM-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; GISEL-ASM-NEXT:    ds_read_b32 v0, v0
+; GISEL-ASM-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-ASM-NEXT:    s_setpc_b64 s[30:31]
+  %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
+  %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(3)
+  %load = load volatile i32, ptr addrspace(3) %cast.back
+  ret i32 %load
+}
+
+; This is UB but shouldn't assert.
+define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_global(
+; OPT-SAME: ptr addrspace(6) [[CONST32_PTR:%.*]]) {
+; OPT-NEXT:    [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(6) [[CONST32_PTR]] to ptr
+; OPT-NEXT:    [[LOCAL_PTR:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
+; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
+; OPT-NEXT:    ret i32 [[LOAD]]
+;
+; ASM-LABEL: cast_private_to_flat_to_global:
+; ASM:       ; %bb.0:
+; ASM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT:    v_mov_b32_e32 v1, 0
+; ASM-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; ASM-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; ASM-NEXT:    ds_read_b32 v0, v0
+; ASM-NEXT:    s_waitcnt lgkmcnt(0)
+; ASM-NEXT:    s_setpc_b64 s[30:31]
+  %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr
+  %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3)
+  %load = load volatile i32, ptr addrspace(3) %local.ptr
+  ret i32 %load
+}



More information about the llvm-commits mailing list