[clang] [Clang] Fix sret AS for non-trivial-copy returns. (PR #186275)
Vigneshwar Jayakumar via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 14 09:53:43 PDT 2026
https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/186275
>From fa081737217b635c8e5c0c53a1a63cc98cf98a2b Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Thu, 12 Mar 2026 17:57:15 -0500
Subject: [PATCH 1/5] [Clang] Fix sret AS for non-trivially-copyable returns.
ItaniumCXXABI::classifyReturnType used getAllocaAddrSpace() for sret,
forcing callers to return a pointer in the alloca address space. This
is wrong whenever the caller's destination is in the default address
space for the below cases:
Non-trivially-copyable types cannot be copied out of an alloca
temp, and types with deleted copy/move constructors make any
temp - memcpy workaround wrong.
Using LangAS::Default instead so the caller can pass any default-AS
pointer directly.
Fixes issue #185744
---
clang/lib/CodeGen/CGExprAgg.cpp | 7 -----
clang/lib/CodeGen/ItaniumCXXABI.cpp | 11 ++++---
clang/lib/CodeGen/MicrosoftCXXABI.cpp | 7 +++--
.../test/CodeGenCXX/no-elide-constructors.cpp | 3 +-
.../CodeGenHIP/sret-nontrivial-copyable.hip | 29 ++++++++++++++-----
clang/test/CodeGenHIP/store-addr-space.hip | 11 ++++---
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 +-
7 files changed, 39 insertions(+), 32 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 3a4291719da74..8aad0294fb36e 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -309,13 +309,6 @@ void AggExprEmitter::withReturnValueSlot(
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
if (!UseTemp) {
RetAddr = Dest.getAddress();
- if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) {
- llvm::Type *SRetPtrTy =
- llvm::PointerType::get(CGF.getLLVMContext(), SRetAS);
- RetAddr = RetAddr.withPointer(
- CGF.performAddrSpaceCast(RetAddr.getBasePointer(), SRetPtrTy),
- RetAddr.isKnownNonNull());
- }
} else {
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) {
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 8a06051a1c730..f1c3af0f0634d 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -1381,12 +1381,15 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (!RD)
return false;
- // If C++ prohibits us from making a copy, return by address.
+ // If C++ prohibits us from making a copy, return by address using the
+ // language default AS. The alloca AS would force callers to provide a
+ // stack pointer, which is invalid when the destination is a default AS
+ // and prohibited for types with deleted copy/move constructors.
if (!RD->canPassInRegisters()) {
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
- FI.getReturnInfo() = ABIArgInfo::getIndirect(
- Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
- /*ByVal=*/false);
+ auto DefaultAS = CGM.getContext().getTargetAddressSpace(LangAS::Default);
+ FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS,
+ /*ByVal=*/false);
return true;
}
return false;
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 06fce6171eb28..ac04bd267ce67 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1197,9 +1197,10 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (isIndirectReturn) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
- FI.getReturnInfo() = ABIArgInfo::getIndirect(
- Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
- /*ByVal=*/false);
+ unsigned DefaultAS =
+ CGM.getContext().getTargetAddressSpace(LangAS::Default);
+ FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS,
+ /*ByVal=*/false);
// MSVC always passes `this` before the `sret` parameter.
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp
index 994282debb0d0..66c4a4895035d 100644
--- a/clang/test/CodeGenCXX/no-elide-constructors.cpp
+++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp
@@ -26,8 +26,7 @@ X Test()
// sret argument.
// CHECK-CXX98: call void @_ZN1XC1ERKS_(
// CHECK-CXX11: call void @_ZN1XC1EOS_(
- // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
- // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]
+ // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]]
// CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_(
// CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_(
// CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_(
diff --git a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip
index ee39104470fa1..31ac0f2e4b5c4 100644
--- a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip
+++ b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip
@@ -1,15 +1,15 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck %s
// REQUIRES: amdgpu-registered-target
// Verify that a non-trivially-copyable type returned via sret into a member
-// field does not trigger an EmitAggregateCopy assertion.
-// fix for a buildbot failure
+// field uses addrspace(0) for the sret pointer (not addrspace(5)). So
+// in-place construction through the addrspace(0) is the only legal option.
struct NontrivialPtr {
void *p;
NontrivialPtr() noexcept;
- NontrivialPtr(const NontrivialPtr &) noexcept;
- NontrivialPtr &operator=(const NontrivialPtr &) noexcept;
+ NontrivialPtr(const NontrivialPtr &) = delete;
+ NontrivialPtr(NontrivialPtr &&) = delete;
~NontrivialPtr() noexcept;
};
@@ -21,14 +21,27 @@ struct Wrapper {
virtual ~Wrapper() noexcept;
};
-// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev(
+Wrapper::Wrapper() noexcept : field(make()) {}
+// CHECK-LABEL: define dso_local void @_ZN7WrapperC2Ev(
// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr [[THIS1]], align 8
+// CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK-NEXT: call void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR:%.*]]) align 8 [[FIELD]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2]]
// CHECK-NEXT: ret void
//
-Wrapper::Wrapper() noexcept : field(make()) {}
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip
index 6103edba46274..eaca0c76477cb 100644
--- a/clang/test/CodeGenHIP/store-addr-space.hip
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -12,19 +12,18 @@ struct Foo {
};
// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
-// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
-// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
-// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8
// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
-// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
-// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index fc6f7c15eb5e6..5d2f63c61e57d 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -19,8 +19,7 @@ E::E() noexcept : foo(s()) {}
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
-// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
//
>From 88038778da8516fd2696f16c29820516d74203c8 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Wed, 25 Mar 2026 14:01:01 -0500
Subject: [PATCH 2/5] changes
---
clang/lib/CodeGen/CGExprAgg.cpp | 17 ++++++++----
clang/lib/CodeGen/ItaniumCXXABI.cpp | 13 +++++-----
clang/lib/CodeGen/MicrosoftCXXABI.cpp | 10 ++++---
clang/lib/CodeGen/TargetInfo.h | 7 +++++
clang/lib/CodeGen/Targets/AMDGPU.cpp | 26 +++++++++++++++++++
.../test/CodeGenCXX/no-elide-constructors.cpp | 3 ++-
clang/test/CodeGenHIP/store-addr-space.hip | 11 ++++----
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 ++-
8 files changed, 67 insertions(+), 23 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 8aad0294fb36e..acd29f5b7546c 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -288,11 +288,11 @@ void AggExprEmitter::withReturnValueSlot(
// its lifetime before we have the chance to emit a proper destructor call.
//
// We also need a temporary if the destination is in a different address space
- // from the alloca AS, to avoid an invalid addrspacecast on the sret pointer.
- // Look through addrspacecasts to avoid unnecessary temps when the
- // destination is already in the alloca AS.
- unsigned SRetAS = CGF.getContext().getTargetAddressSpace(
- CGF.CGM.getASTAllocaAddressSpace());
+ // from the sret AS. Use the target hook to get the actual sret AS for this
+ // return type.
+ const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl();
+ LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
+ unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS);
bool DestASMismatch = !Dest.isIgnored() &&
RetTy.isTriviallyCopyableType(CGF.getContext()) &&
Dest.getAddress()
@@ -309,6 +309,13 @@ void AggExprEmitter::withReturnValueSlot(
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
if (!UseTemp) {
RetAddr = Dest.getAddress();
+ if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) {
+ llvm::Type *SRetPtrTy =
+ llvm::PointerType::get(CGF.getLLVMContext(), SRetAS);
+ RetAddr = RetAddr.withPointer(
+ CGF.performAddrSpaceCast(RetAddr.getBasePointer(), SRetPtrTy),
+ RetAddr.isKnownNonNull());
+ }
} else {
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) {
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index f1c3af0f0634d..668b20079f0d0 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -1381,15 +1381,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (!RD)
return false;
- // If C++ prohibits us from making a copy, return by address using the
- // language default AS. The alloca AS would force callers to provide a
- // stack pointer, which is invalid when the destination is a default AS
- // and prohibited for types with deleted copy/move constructors.
+ // If C++ prohibits us from making a copy, return by address using the target
+ // hook getSRetAddrSpace to decide the AS.
if (!RD->canPassInRegisters()) {
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
- auto DefaultAS = CGM.getContext().getTargetAddressSpace(LangAS::Default);
- FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS,
- /*ByVal=*/false);
+ LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
+ unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS);
+ FI.getReturnInfo() =
+ ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false);
return true;
}
return false;
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index ac04bd267ce67..5345d0af4070d 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1197,10 +1197,12 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (isIndirectReturn) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
- unsigned DefaultAS =
- CGM.getContext().getTargetAddressSpace(LangAS::Default);
- FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS,
- /*ByVal=*/false);
+ LangAS SRetAS = !isTrivialForABI
+ ? CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD)
+ : CGM.getTargetCodeGenInfo().getASTAllocaAddressSpace();
+ unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS);
+ FI.getReturnInfo() =
+ ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false);
// MSVC always passes `this` before the `sret` parameter.
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 98ee894fe557f..93997d881d5ad 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -32,6 +32,7 @@ class Value;
}
namespace clang {
+class CXXRecordDecl;
class Decl;
namespace CodeGen {
@@ -321,6 +322,12 @@ class TargetCodeGenInfo {
/// Get the AST address space for alloca.
virtual LangAS getASTAllocaAddressSpace() const { return LangAS::Default; }
+ /// Get the address space for an indirect (sret) return of the given type.
+ /// The default falls back to the alloca AS.
+ virtual LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const {
+ return getASTAllocaAddressSpace();
+ }
+
/// Get address space of pointer parameter for __cxa_atexit.
virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const {
return LangAS::Default;
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..ce374822e0fd2 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -8,6 +8,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "clang/AST/DeclCXX.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
@@ -308,6 +309,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
return getLangASFromTargetAS(
getABIInfo().getDataLayout().getAllocaAddrSpace());
}
+
+ LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override;
+
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
@@ -467,6 +471,28 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}
+static bool hasViableCopyOrMoveConstructor(const CXXRecordDecl *RD) {
+ if ((RD->needsImplicitCopyConstructor() &&
+ !RD->defaultedCopyConstructorIsDeleted()) ||
+ (RD->needsImplicitMoveConstructor() &&
+ !RD->defaultedMoveConstructorIsDeleted()))
+ return true;
+
+ return llvm::any_of(RD->ctors(), [](const CXXConstructorDecl *CD) {
+ return CD->isCopyOrMoveConstructor() && !CD->isDeleted() &&
+ !CD->isIneligibleOrNotSelected();
+ });
+}
+
+LangAS
+AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const {
+ // Types with no viable copy/move must be constructed in-place , use the
+ // default AS so the sret pointer matches the "this" convention.
+ if (RD && !RD->canPassInRegisters() && !hasViableCopyOrMoveConstructor(RD))
+ return LangAS::Default;
+ return getASTAllocaAddressSpace();
+}
+
LangAS
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp
index 66c4a4895035d..994282debb0d0 100644
--- a/clang/test/CodeGenCXX/no-elide-constructors.cpp
+++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp
@@ -26,7 +26,8 @@ X Test()
// sret argument.
// CHECK-CXX98: call void @_ZN1XC1ERKS_(
// CHECK-CXX11: call void @_ZN1XC1EOS_(
- // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]]
+ // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+ // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]
// CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_(
// CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_(
// CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_(
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip
index eaca0c76477cb..6103edba46274 100644
--- a/clang/test/CodeGenHIP/store-addr-space.hip
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -12,18 +12,19 @@ struct Foo {
};
// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
-// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
-// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8
+// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
-// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]]
-// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index 5d2f63c61e57d..fc6f7c15eb5e6 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -19,7 +19,8 @@ E::E() noexcept : foo(s()) {}
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
+// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
//
>From f4c65cb20e95c4c425626ad63027b526aaac4c44 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Wed, 25 Mar 2026 16:15:46 -0500
Subject: [PATCH 3/5] fix trivialllycopyable
---
clang/lib/CodeGen/CGExprAgg.cpp | 9 +++++++--
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 4 ++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index acd29f5b7546c..686358877e8e0 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -293,8 +293,13 @@ void AggExprEmitter::withReturnValueSlot(
const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl();
LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS);
- bool DestASMismatch = !Dest.isIgnored() &&
- RetTy.isTriviallyCopyableType(CGF.getContext()) &&
+ bool CanAggregateCopy =
+ RD ? (RD->hasTrivialCopyConstructor() ||
+ RD->hasTrivialMoveConstructor() || RD->hasTrivialCopyAssignment() ||
+ RD->hasTrivialMoveAssignment() || RD->hasAttr<TrivialABIAttr>() ||
+ RD->isUnion())
+ : RetTy.isTriviallyCopyableType(CGF.getContext());
+ bool DestASMismatch = !Dest.isIgnored() && CanAggregateCopy &&
Dest.getAddress()
.getBasePointer()
->stripPointerCasts()
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index fc6f7c15eb5e6..8d2c035e23472 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -16,11 +16,11 @@ E::E() noexcept : foo(s()) {}
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5)
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
-// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
//
>From fd12f16be040b6edfbceca48a97ddf58e6875441 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 13 Apr 2026 10:31:58 -0500
Subject: [PATCH 4/5] changes
---
clang/lib/CodeGen/MicrosoftCXXABI.cpp | 4 +---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 15 +--------------
2 files changed, 2 insertions(+), 17 deletions(-)
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 5345d0af4070d..0373dd042236d 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1197,9 +1197,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (isIndirectReturn) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
- LangAS SRetAS = !isTrivialForABI
- ? CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD)
- : CGM.getTargetCodeGenInfo().getASTAllocaAddressSpace();
+ LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS);
FI.getReturnInfo() =
ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false);
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index ce374822e0fd2..809a210917449 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -471,24 +471,11 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}
-static bool hasViableCopyOrMoveConstructor(const CXXRecordDecl *RD) {
- if ((RD->needsImplicitCopyConstructor() &&
- !RD->defaultedCopyConstructorIsDeleted()) ||
- (RD->needsImplicitMoveConstructor() &&
- !RD->defaultedMoveConstructorIsDeleted()))
- return true;
-
- return llvm::any_of(RD->ctors(), [](const CXXConstructorDecl *CD) {
- return CD->isCopyOrMoveConstructor() && !CD->isDeleted() &&
- !CD->isIneligibleOrNotSelected();
- });
-}
-
LangAS
AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const {
// Types with no viable copy/move must be constructed in-place , use the
// default AS so the sret pointer matches the "this" convention.
- if (RD && !RD->canPassInRegisters() && !hasViableCopyOrMoveConstructor(RD))
+ if (RD && !RD->canPassInRegisters())
return LangAS::Default;
return getASTAllocaAddressSpace();
}
>From d0ac9c2de28e60b997da8cc6f8a35e8f892ea335 Mon Sep 17 00:00:00 2001
From: vigneshwar jayakumar <vigneshwar.jayakumar at amd.com>
Date: Mon, 13 Apr 2026 10:54:55 -0500
Subject: [PATCH 5/5] update tests
---
clang/test/CodeGenCXX/no-elide-constructors.cpp | 3 +--
clang/test/CodeGenHIP/store-addr-space.hip | 11 +++++------
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 +--
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp
index 994282debb0d0..66c4a4895035d 100644
--- a/clang/test/CodeGenCXX/no-elide-constructors.cpp
+++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp
@@ -26,8 +26,7 @@ X Test()
// sret argument.
// CHECK-CXX98: call void @_ZN1XC1ERKS_(
// CHECK-CXX11: call void @_ZN1XC1EOS_(
- // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
- // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]
+ // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]]
// CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_(
// CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_(
// CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_(
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip
index 6103edba46274..eaca0c76477cb 100644
--- a/clang/test/CodeGenHIP/store-addr-space.hip
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -12,19 +12,18 @@ struct Foo {
};
// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
-// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
-// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
-// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8
// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
-// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
-// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index 8d2c035e23472..5d2f63c61e57d 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -16,11 +16,10 @@ E::E() noexcept : foo(s()) {}
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5)
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
//
More information about the cfe-commits
mailing list