[clang] [Clang] Fix invalid sret addrspacecast for placement new on HIP (PR #183639)
Vigneshwar Jayakumar via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 3 15:09:59 PST 2026
https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/183639
>From c91d4e3d461ca477131997e22729d6b45f3b53bb Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 26 Feb 2026 16:37:30 -0600
Subject: [PATCH 1/5] [Clang] Fix invalid sret addspacecast for' new' on AMDGPU
When a HIP kernel uses placement new with a function returning an
aggregate via sret (e.g. `new (out) T(make_t())`), and the placement
destination is in global memory (addrspace 1), CGCall's EmitCall would
addrspacecast the pointer to the callee's expected sret address space
(addrspace 5 / private). the addrspacecast produces an invalid pointer
that faults at runtime.
Instead of casting the caller's pointer directly, materialise a
temporary alloca in the callee's expected address space, pass that as
the sret argument, and copy the result back to the original destination
after the call.
---
clang/lib/CodeGen/CGCall.cpp | 34 +++++++--
.../CodeGenHIP/placement-new-addrspace.hip | 71 +++++++++++++++++++
2 files changed, 100 insertions(+), 5 deletions(-)
create mode 100644 clang/test/CodeGenHIP/placement-new-addrspace.hip
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 1d950ffed8a0b..91fac02a667f8 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5427,7 +5427,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
+ Address OriginalSRetPtr = Address::invalid();
bool NeedSRetLifetimeEnd = false;
+ bool NeedSRetCopyBack = false;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
// For virtual function pointer thunks and musttail calls, we must always
// forward an incoming SRet pointer to the callee, because a local alloca
@@ -5439,6 +5441,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RetTy, CharUnits::fromQuantity(1));
} else if (!ReturnValue.isNull()) {
SRetPtr = ReturnValue.getAddress();
+ OriginalSRetPtr = SRetPtr;
} else {
SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp");
if (HaveInsertPoint() && ReturnValue.isUnused())
@@ -5450,12 +5453,26 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// a chain involving stores to / loads from the DefaultAS; we address this
// here, symmetrically with the handling we have for normal pointer args.
if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
- llvm::Value *V = SRetPtr.getBasePointer();
- llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
- RetAI.getIndirectAddrSpace());
+ // If the caller supplied a destination in a different address space,
+ // materialize the call result in a temporary with the callee's expected
+ // sret address space and copy back after the call.
+ if (OriginalSRetPtr.isValid() && !IsVirtualFunctionPointerThunk &&
+ !IsMustTail) {
+ Address TmpSRetPtr = CreateMemTempWithoutCast(RetTy, "tmp.sret");
+ if (TmpSRetPtr.getAddressSpace() == RetAI.getIndirectAddrSpace()) {
+ SRetPtr = TmpSRetPtr;
+ NeedSRetCopyBack = true;
+ }
+ }
+
+ if (!NeedSRetCopyBack) {
+ llvm::Value *V = SRetPtr.getBasePointer();
+ llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
+ RetAI.getIndirectAddrSpace());
- SRetPtr = SRetPtr.withPointer(performAddrSpaceCast(V, Ty),
- SRetPtr.isKnownNonNull());
+ SRetPtr = SRetPtr.withPointer(performAddrSpaceCast(V, Ty),
+ SRetPtr.isKnownNonNull());
+ }
}
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
getAsNaturalPointerTo(SRetPtr, RetTy);
@@ -6271,6 +6288,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// lexical order, so deactivate it and run it manually here.
CallArgs.freeArgumentMemory(*this);
+ if (NeedSRetCopyBack) {
+ LValue DstLV = MakeAddrLValue(OriginalSRetPtr, RetTy);
+ LValue SrcLV = MakeAddrLValue(SRetPtr, RetTy);
+ EmitAggregateCopy(DstLV, SrcLV, RetTy, AggValueSlot::MayOverlap);
+ SRetPtr = OriginalSRetPtr;
+ }
+
// Extract the return value.
RValue Ret;
diff --git a/clang/test/CodeGenHIP/placement-new-addrspace.hip b/clang/test/CodeGenHIP/placement-new-addrspace.hip
new file mode 100644
index 0000000000000..106d9ca4867a4
--- /dev/null
+++ b/clang/test/CodeGenHIP/placement-new-addrspace.hip
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel" --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -std=c++17 %s -o - | FileCheck %s
+// REQUIRES: amdgpu-registered-target
+
+// Verify that when a function returning an aggregate via sret is called with a
+// destination in a different address space (e.g. global pointer from kernel
+// arg), the compiler materialises a temporary in the alloca AS and copies back,
+// rather than emitting an invalid addrspacecast of the destination pointer.
+
+typedef __SIZE_TYPE__ size_t;
+__attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; }
+
+struct Big {
+ int v[32];
+ __attribute__((device)) Big(int x) {
+ for (int i = 0; i < 32; ++i)
+ v[i] = x + i;
+ }
+};
+
+// CHECK-LABEL: define dso_local void @_Z8make_bigv(
+// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+__attribute__((device)) Big make_big() { return Big(7); }
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big(
+// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP_SRET:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP_SRET]]) #[[ATTR3]]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP_SRET]], i64 128, i1 false)
+// CHECK-NEXT: ret void
+//
+__attribute__((global)) void kernel(Big *out) {
+ new (out) Big(make_big());
+}
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z13kernel_assignP3Big(
+// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
+// CHECK-NEXT: [[TMP_SRET:%.*]] = alloca [[STRUCT_BIG]], align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP_SRET]]) #[[ATTR3]]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[REF_TMP_ASCAST]], ptr addrspace(5) align 4 [[TMP_SRET]], i64 128, i1 false)
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[REF_TMP_ASCAST]], i64 128, i1 false)
+// CHECK-NEXT: ret void
+//
+__attribute__((global)) void kernel_assign(Big *out) {
+ *out = make_big();
+}
>From f1760c0fa59e9bc664737e939a31936480eed7d5 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 26 Feb 2026 17:07:21 -0600
Subject: [PATCH 2/5] moved implementation to CGExprAgg
---
clang/lib/CodeGen/CGCall.cpp | 34 +++----------------
clang/lib/CodeGen/CGExprAgg.cpp | 10 ++++--
.../CodeGenHIP/placement-new-addrspace.hip | 29 ++--------------
3 files changed, 16 insertions(+), 57 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 91fac02a667f8..1d950ffed8a0b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5427,9 +5427,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
- Address OriginalSRetPtr = Address::invalid();
bool NeedSRetLifetimeEnd = false;
- bool NeedSRetCopyBack = false;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
// For virtual function pointer thunks and musttail calls, we must always
// forward an incoming SRet pointer to the callee, because a local alloca
@@ -5441,7 +5439,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RetTy, CharUnits::fromQuantity(1));
} else if (!ReturnValue.isNull()) {
SRetPtr = ReturnValue.getAddress();
- OriginalSRetPtr = SRetPtr;
} else {
SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp");
if (HaveInsertPoint() && ReturnValue.isUnused())
@@ -5453,26 +5450,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// a chain involving stores to / loads from the DefaultAS; we address this
// here, symmetrically with the handling we have for normal pointer args.
if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
- // If the caller supplied a destination in a different address space,
- // materialize the call result in a temporary with the callee's expected
- // sret address space and copy back after the call.
- if (OriginalSRetPtr.isValid() && !IsVirtualFunctionPointerThunk &&
- !IsMustTail) {
- Address TmpSRetPtr = CreateMemTempWithoutCast(RetTy, "tmp.sret");
- if (TmpSRetPtr.getAddressSpace() == RetAI.getIndirectAddrSpace()) {
- SRetPtr = TmpSRetPtr;
- NeedSRetCopyBack = true;
- }
- }
-
- if (!NeedSRetCopyBack) {
- llvm::Value *V = SRetPtr.getBasePointer();
- llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
- RetAI.getIndirectAddrSpace());
+ llvm::Value *V = SRetPtr.getBasePointer();
+ llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
+ RetAI.getIndirectAddrSpace());
- SRetPtr = SRetPtr.withPointer(performAddrSpaceCast(V, Ty),
- SRetPtr.isKnownNonNull());
- }
+ SRetPtr = SRetPtr.withPointer(performAddrSpaceCast(V, Ty),
+ SRetPtr.isKnownNonNull());
}
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
getAsNaturalPointerTo(SRetPtr, RetTy);
@@ -6288,13 +6271,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// lexical order, so deactivate it and run it manually here.
CallArgs.freeArgumentMemory(*this);
- if (NeedSRetCopyBack) {
- LValue DstLV = MakeAddrLValue(OriginalSRetPtr, RetTy);
- LValue SrcLV = MakeAddrLValue(SRetPtr, RetTy);
- EmitAggregateCopy(DstLV, SrcLV, RetTy, AggValueSlot::MayOverlap);
- SRetPtr = OriginalSRetPtr;
- }
-
// Extract the return value.
RValue Ret;
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 0ca6af3def57f..3957904314698 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -286,8 +286,14 @@ void AggExprEmitter::withReturnValueSlot(
// We need to always provide our own temporary if destruction is required.
// Otherwise, EmitCall will emit its own, notice that it's "unused", and end
// its lifetime before we have the chance to emit a proper destructor call.
- bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
- (RequiresDestruction && Dest.isIgnored());
+ //
+ // We also need a temporary if the destination is in a different address space
+ // from the alloca AS, to avoid an invalid addrspacecast on the sret pointer.
+ bool UseTemp =
+ Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
+ (RequiresDestruction && Dest.isIgnored()) ||
+ (!Dest.isIgnored() && Dest.getAddress().getAddressSpace() !=
+ CGF.CGM.getDataLayout().getAllocaAddrSpace());
Address RetAddr = Address::invalid();
diff --git a/clang/test/CodeGenHIP/placement-new-addrspace.hip b/clang/test/CodeGenHIP/placement-new-addrspace.hip
index 106d9ca4867a4..07c7e87c37a60 100644
--- a/clang/test/CodeGenHIP/placement-new-addrspace.hip
+++ b/clang/test/CodeGenHIP/placement-new-addrspace.hip
@@ -32,40 +32,17 @@ __attribute__((device)) Big make_big() { return Big(7); }
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[TMP_SRET:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP_SRET]]) #[[ATTR3]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP_SRET]], i64 128, i1 false)
+// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]]
+// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false)
// CHECK-NEXT: ret void
//
__attribute__((global)) void kernel(Big *out) {
new (out) Big(make_big());
}
-
-// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z13kernel_assignP3Big(
-// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
-// CHECK-NEXT: [[TMP_SRET:%.*]] = alloca [[STRUCT_BIG]], align 4, addrspace(5)
-// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
-// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// CHECK-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
-// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
-// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP_SRET]]) #[[ATTR3]]
-// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[REF_TMP_ASCAST]], ptr addrspace(5) align 4 [[TMP_SRET]], i64 128, i1 false)
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[REF_TMP_ASCAST]], i64 128, i1 false)
-// CHECK-NEXT: ret void
-//
-__attribute__((global)) void kernel_assign(Big *out) {
- *out = make_big();
-}
>From 0667723cccd2e54cc83d53c1fa9ea5a13d674a1e Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 26 Feb 2026 17:49:46 -0600
Subject: [PATCH 3/5] missed test
---
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index 99ca31b78e1fc..8d2c035e23472 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -16,15 +16,13 @@ E::E() noexcept : foo(s()) {}
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5)
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
-// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
-// CHECK-LABEL: declare void @_Z1sv(
-// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1) #[[ATTR1:[0-9]+]]
//
// CHECK-LABEL: define hidden void @_ZN1EC1Ev(
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
>From d0eb6b1f235e3447c8bd77cd1a95e88ca2697b04 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Sun, 1 Mar 2026 18:48:34 -0600
Subject: [PATCH 4/5] review change
---
clang/lib/CodeGen/CGExprAgg.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 3957904314698..efcf570cdda10 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -293,7 +293,8 @@ void AggExprEmitter::withReturnValueSlot(
Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
(RequiresDestruction && Dest.isIgnored()) ||
(!Dest.isIgnored() && Dest.getAddress().getAddressSpace() !=
- CGF.CGM.getDataLayout().getAllocaAddrSpace());
+ CGF.getContext().getTargetAddressSpace(
+ CGF.CGM.getASTAllocaAddressSpace()));
Address RetAddr = Address::invalid();
>From c3bec8f44acab3c0aac8bd9b8e0ae8f5ef63f6af Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Tue, 3 Mar 2026 17:09:33 -0600
Subject: [PATCH 5/5] review comments
---
clang/test/CodeGenHIP/placement-new-addrspace.hip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenHIP/placement-new-addrspace.hip b/clang/test/CodeGenHIP/placement-new-addrspace.hip
index 07c7e87c37a60..27f799b05193b 100644
--- a/clang/test/CodeGenHIP/placement-new-addrspace.hip
+++ b/clang/test/CodeGenHIP/placement-new-addrspace.hip
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel" --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip -std=c++17 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s
// REQUIRES: amdgpu-registered-target
// Verify that when a function returning an aggregate via sret is called with a
More information about the cfe-commits
mailing list