[clang] [clang][CodeGen] Additional fixes for #114062 (PR #128166)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 26 08:50:53 PST 2025
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/128166
>From 77db8a15d472e112be93cb7566cf56e26dc80501 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 21 Feb 2025 13:47:42 +0200
Subject: [PATCH 1/5] Handle HLL casts on sret args, remove ill advised cast
strip and replace it with later AS cast.
---
clang/lib/CodeGen/CGCall.cpp | 18 +++++++--
clang/lib/CodeGen/CGExprAgg.cpp | 11 +-----
clang/lib/CodeGen/CGExprScalar.cpp | 17 +++++++++
.../sret_cast_with_nonzero_alloca_as.cpp | 32 ++++++++++++++++
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 38 +++++++++++++++++++
5 files changed, 104 insertions(+), 12 deletions(-)
create mode 100644 clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
create mode 100644 clang/test/OpenMP/amdgcn_sret_ctor.cpp
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 47bfd470dbafb..b5a4435e75ea0 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5163,6 +5163,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
}
if (IRFunctionArgs.hasSRetArg()) {
+ // A mismatch between the allocated return value's AS and the target's
+ // chosen IndirectAS can happen e.g. when passing the this pointer through
+ // a chain involving stores to / loads from the DefaultAS; we address this
+ // here, symmetrically with the handling we have for normal pointer args.
+ if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())
+ SRetPtr = SRetPtr.withPointer(
+ getTargetHooks().performAddrSpaceCast(
+ *this, SRetPtr.getBasePointer(),
+ getLangASFromTargetAS(SRetPtr.getAddressSpace()),
+ getLangASFromTargetAS(RetAI.getIndirectAddrSpace()),
+ llvm::PointerType::get(getLLVMContext(),
+ RetAI.getIndirectAddrSpace()),
+ true),
+ SRetPtr.isKnownNonNull());
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
getAsNaturalPointerTo(SRetPtr, RetTy);
} else if (RetAI.isInAlloca()) {
@@ -5394,9 +5408,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
V->getType()->isIntegerTy())
V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());
- // The only plausible mismatch here would be for pointer address spaces,
- // which can happen e.g. when passing a sret arg that is in the AllocaAS
- // to a function that takes a pointer to and argument in the DefaultAS.
+ // The only plausible mismatch here would be for pointer address spaces.
// We assume that the target has a reasonable mapping for the DefaultAS
// (it can be casted to from incoming specific ASes), and insert an AS
// cast to address the mismatch.
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 625ca363d9019..200fb7fd756fd 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -302,15 +302,8 @@ void AggExprEmitter::withReturnValueSlot(
llvm::Value *LifetimeSizePtr = nullptr;
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
if (!UseTemp) {
- // It is possible for the existing slot we are using directly to have been
- // allocated in the correct AS for an indirect return, and then cast to
- // the default AS (this is the behaviour of CreateMemTemp), however we know
- // that the return address is expected to point to the uncasted AS, hence we
- // strip possible pointer casts here.
- if (Dest.getAddress().isValid())
- RetAddr = Dest.getAddress().withPointer(
- Dest.getAddress().getBasePointer()->stripPointerCasts(),
- Dest.getAddress().isKnownNonNull());
+ RetAddr = Dest.getAddress();
+ RawAddress RetAllocaAddr = RawAddress::invalid();
} else {
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
llvm::TypeSize Size =
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 5ee8a1bfa8175..7f8d438e36d6a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -30,6 +30,7 @@
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -2352,6 +2353,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Value *Src = Visit(const_cast<Expr*>(E));
llvm::Type *SrcTy = Src->getType();
llvm::Type *DstTy = ConvertType(DestTy);
+
+ // FIXME: this is a gross but seemingly necessary workaround for an issue
+ // manifesting when a target uses a non-default AS for indirect sret args,
+ // but the source HLL is generic, wherein a valid C-cast or reinterpret_cast
+ // on the address of a local struct that gets returned by value yields an
+ // invalid bitcast from the a pointer to the IndirectAS to a pointer to the
+ // DefaultAS. We can only do this subversive thing because sret args are
+ // manufactured and them residing in the IndirectAS is a target specific
+ // detail, and doing an AS cast here still retains the semantics the user
+ // expects. It is desirable to remove this iff a better solution is found.
+ if (SrcTy != DstTy && isa<llvm::Argument>(Src) &&
+ cast<llvm::Argument>(Src)->hasStructRetAttr())
+ return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+ CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(),
+ DstTy);
+
assert(
(!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() ||
SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) &&
diff --git a/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
new file mode 100644
index 0000000000000..320c712b665de
--- /dev/null
+++ b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
+
+struct X { int z[17]; };
+
+// CHECK-LABEL: define dso_local void @_Z3foocc(
+// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_X:%.*]]) align 4 [[AGG_RESULT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
+// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
+// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
+// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
+// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
+// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2
+// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
+// CHECK-NEXT: ret void
+//
+X foo(char x, char y) {
+ X r;
+
+ *(((char*)&r) + 1) = x;
+ *(reinterpret_cast<char*>(&r) + 2) = y;
+
+ return r;
+}
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
new file mode 100644
index 0000000000000..0eb4748571d56
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -0,0 +1,38 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s
+
+#pragma omp begin declare target
+struct S {
+ ~S();
+};
+S s();
+struct E {
+ S foo;
+ E() noexcept;
+};
+E::E() noexcept : foo(s()) {}
+#pragma omp end declare target
+// CHECK-LABEL: define hidden void @_ZN1EC2Ev(
+// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
+// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+// CHECK-LABEL: declare void @_Z1sv(
+// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1) #[[ATTR1:[0-9]+]]
+//
+// CHECK-LABEL: define hidden void @_ZN1EC1Ev(
+// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT: call void @_ZN1EC2Ev(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: ret void
+//
>From 5b186a3f00b83631476e5c131ab73ec6a4345350 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 21 Feb 2025 14:54:27 +0200
Subject: [PATCH 2/5] Fix test.
---
clang/test/CodeGen/partial-reinitialization2.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c
index 7949a69555031..8d8e04f24541a 100644
--- a/clang/test/CodeGen/partial-reinitialization2.c
+++ b/clang/test/CodeGen/partial-reinitialization2.c
@@ -91,8 +91,8 @@ void test5(void)
// CHECK-LABEL: test6
void test6(void)
{
- // CHECK: [[VAR:%[a-z0-9]+]] = alloca
- // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]])
+ // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0
+ // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[LP]])
// CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
// CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]]
>From 2dd8052e82ad721d059bddf7f356aba25f5fd408 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 21 Feb 2025 14:59:19 +0200
Subject: [PATCH 3/5] Unwrap/expand.
---
clang/lib/CodeGen/CGCall.cpp | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b5a4435e75ea0..88b1a8aebfa50 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5167,16 +5167,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// chosen IndirectAS can happen e.g. when passing the this pointer through
// a chain involving stores to / loads from the DefaultAS; we address this
// here, symmetrically with the handling we have for normal pointer args.
- if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())
+ if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
+ llvm::Value *V = SRetPtr.getBasePointer();
+ LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace());
+ LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace());
+ llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
+ RetAI.getIndirectAddrSpace());
+
SRetPtr = SRetPtr.withPointer(
- getTargetHooks().performAddrSpaceCast(
- *this, SRetPtr.getBasePointer(),
- getLangASFromTargetAS(SRetPtr.getAddressSpace()),
- getLangASFromTargetAS(RetAI.getIndirectAddrSpace()),
- llvm::PointerType::get(getLLVMContext(),
- RetAI.getIndirectAddrSpace()),
- true),
+ getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true),
SRetPtr.isKnownNonNull());
+ }
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
getAsNaturalPointerTo(SRetPtr, RetTy);
} else if (RetAI.isInAlloca()) {
>From b845baaa2cc922f96b315acd35725fdc58c3ba97 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 21 Feb 2025 15:13:18 +0200
Subject: [PATCH 4/5] Clean up `-cc1` invocation in OMP test.
---
clang/test/OpenMP/amdgcn_sret_ctor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
index 0eb4748571d56..99ca31b78e1fc 100644
--- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s
#pragma omp begin declare target
struct S {
>From 10e75d6255a0f139e3c5c1199073b4954a804ce1 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 26 Feb 2025 16:50:22 +0000
Subject: [PATCH 5/5] Use exciting and modern C++17.
---
clang/lib/CodeGen/CGExprScalar.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 7f8d438e36d6a..6646057b9d772 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2363,8 +2363,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// manufactured and them residing in the IndirectAS is a target specific
// detail, and doing an AS cast here still retains the semantics the user
// expects. It is desirable to remove this iff a better solution is found.
- if (SrcTy != DstTy && isa<llvm::Argument>(Src) &&
- cast<llvm::Argument>(Src)->hasStructRetAttr())
+ if (auto A = dyn_cast<llvm::Argument>(Src); A && A->hasStructRetAttr())
return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(),
DstTy);
More information about the cfe-commits
mailing list