[clang] [clang][CodeGen] add addr space cast if needed when storing ptrs (PR #154380)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Aug 19 09:44:26 PDT 2025
https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/154380
Fixes a bug on AMDGPU targets where a pointer was stored as address space 5, but then loaded as address space 0.
Issue found as part of [Kokkos](https://github.com/kokkos/kokkos) testing, specifically `hip.atomics` (see [core/unit_test/TestAtomics.hpp](https://github.com/kokkos/kokkos/blob/develop/core/unit_test/TestAtomics.hpp)).
Issue was introduced by commit [39ec9de7c230](https://github.com/llvm/llvm-project/commit/39ec9de7c230) - [clang][CodeGen] sret args should always point to the alloca AS, so use that (https://github.com/llvm/llvm-project/pull/114062).
>From 1af9c61c2ea4797145fc3e41a4fdebe6548a7dd9 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Mon, 18 Aug 2025 06:26:49 -0500
Subject: [PATCH] [clang][CodeGen] add addr space cast if needed when storing
ptrs
---
clang/lib/CodeGen/CGExpr.cpp | 12 ++++++
clang/test/CodeGenCXX/amdgcn-func-arg.cpp | 7 ++--
clang/test/CodeGenHIP/store-addr-space.hip | 47 ++++++++++++++++++++++
3 files changed, 63 insertions(+), 3 deletions(-)
create mode 100644 clang/test/CodeGenHIP/store-addr-space.hip
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d229d81d6b934..c02e84eb753e9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2209,6 +2209,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
}
}
+ // When storing a pointer, perform address space cast if needed.
+ if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) {
+ if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) {
+ LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace());
+ LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace());
+ if (ValueAS != MemAS) {
+ Value =
+ getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, MemTy);
+ }
+ }
+ }
+
Value = EmitToMemory(Value, Ty);
LValue AtomicLValue =
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index a5f83dc91b038..21945bfc36677 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -24,9 +24,10 @@ void func_with_ref_arg(B &b);
// CHECK-NEXT: [[P:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_INDIRECT_ADDR]] to ptr
// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to ptr
-// CHECK-NEXT: store ptr addrspace(5) [[A:%.*]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
-// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8
+// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] to ptr
+// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8
// CHECK-NEXT: ret void
//
void func_with_indirect_arg(A a) {
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip
new file mode 100644
index 0000000000000..46ab1157d0704
--- /dev/null
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "bar" --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
+// RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s
+
+struct Foo {
+ unsigned long long val;
+//
+ __attribute__((device)) inline Foo() { val = 0; }
+ __attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
+ __attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; }
+};
+
+// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
+// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
+// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
+// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
+// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 0
+// AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
+// AMDGCN-NEXT: ret void
+//
+__attribute__((device)) Foo bar(const Foo *const src_ptr) {
+ Foo result;
+
+ union {
+ Foo* const ptr;
+ unsigned long long * const ptr64;
+ } dst = {&result};
+
+ dst.ptr64[0] = src_ptr->val;
+ return result;
+}
More information about the cfe-commits
mailing list