[llvm] AMDGPU: Fix assert from wrong address space size assumption (PR #97267)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 02:12:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This was assuming the source address space was at least as large
as the destination of the cast. I'm not sure why this was casting
to begin with; the assumption seems to be the source
address space from the root addrspacecast matches the underlying
object so directly check that.
---
Full diff: https://github.com/llvm/llvm-project/pull/97267.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+8-1)
- (modified) llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll (+90-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 6e7d34f5adaa3..052e1140533f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -2035,6 +2035,11 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
if (const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
return true;
+ // getUnderlyingObject may have looked through another addrspacecast, although
+ // the optimizable situations most likely folded out by now.
+ if (AS != cast<PointerType>(V->getType())->getAddressSpace())
+ return false;
+
// TODO: Calls that return nonnull?
// For all other things, use KnownBits.
@@ -2043,8 +2048,10 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
//
// TODO: Use ValueTracking's isKnownNeverNull if it becomes aware that some
// address spaces have non-zero null values.
- auto SrcPtrKB = computeKnownBits(V, DL).trunc(DL.getPointerSizeInBits(AS));
+ auto SrcPtrKB = computeKnownBits(V, DL);
const auto NullVal = TM.getNullPointerValue(AS);
+
+ assert(SrcPtrKB.getBitWidth() == DL.getPointerSizeInBits(AS));
assert((NullVal == 0 || NullVal == -1) &&
"don't know how to check for this null value!");
return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index cddc3161038b0..e2b4865410db8 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM
@@ -270,3 +270,92 @@ finally:
end:
ret void
}
+
+; This used to assert due to assuming the size of the source address
+; space was larger than the destination.
+
+define i32 @cast_private_to_flat_to_private(ptr addrspace(5) %private.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_private(
+; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
+; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
+; OPT-NEXT: [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(5)
+; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(5) [[CAST_BACK]], align 4
+; OPT-NEXT: ret i32 [[LOAD]]
+;
+; ASM-LABEL: cast_private_to_flat_to_private:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; ASM-NEXT: s_waitcnt vmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
+ %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(5)
+ %load = load volatile i32, ptr addrspace(5) %cast.back
+ ret i32 %load
+}
+
+; This is UB but shouldn't assert.
+define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_local(
+; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) {
+; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr
+; OPT-NEXT: [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
+; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[CAST_BACK]], align 4
+; OPT-NEXT: ret i32 [[LOAD]]
+;
+; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local:
+; DAGISEL-ASM: ; %bb.0:
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
+; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
+; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; DAGISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0
+; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-ASM-LABEL: cast_private_to_flat_to_local:
+; GISEL-ASM: ; %bb.0:
+; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base
+; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5
+; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; GISEL-ASM-NEXT: ds_read_b32 v0, v0
+; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
+ %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr
+ %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(3)
+ %load = load volatile i32, ptr addrspace(3) %cast.back
+ ret i32 %load
+}
+
+; This is UB but shouldn't assert.
+define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
+; OPT-LABEL: define i32 @cast_private_to_flat_to_global(
+; OPT-SAME: ptr addrspace(6) [[CONST32_PTR:%.*]]) {
+; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(6) [[CONST32_PTR]] to ptr
+; OPT-NEXT: [[LOCAL_PTR:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
+; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
+; OPT-NEXT: ret i32 [[LOAD]]
+;
+; ASM-LABEL: cast_private_to_flat_to_global:
+; ASM: ; %bb.0:
+; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ASM-NEXT: v_mov_b32_e32 v1, 0
+; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; ASM-NEXT: ds_read_b32 v0, v0
+; ASM-NEXT: s_waitcnt lgkmcnt(0)
+; ASM-NEXT: s_setpc_b64 s[30:31]
+ %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr
+ %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3)
+ %load = load volatile i32, ptr addrspace(3) %local.ptr
+ ret i32 %load
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/97267
More information about the llvm-commits
mailing list