[clang] [clang][CodeGen] `sret` args should always point to the `alloca` AS, so use that (PR #114062)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 29 16:20:25 PDT 2024
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/114062
>From d2d2d3d5db3f639aab178f9ca9a20db2842d2b65 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 29 Oct 2024 14:20:44 +0000
Subject: [PATCH 1/2] `sret` args should always point to the `alloca` AS, so we
can use that.
---
clang/lib/CodeGen/CGCall.cpp | 15 ++++++++-------
clang/test/CodeGen/partial-reinitialization2.c | 4 ++--
clang/test/CodeGen/sret.c | 11 +++++++++++
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8f4f5d3ed81601..56acfae7ae9e51 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1672,8 +1672,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
// Add type for sret argument.
if (IRFunctionArgs.hasSRetArg()) {
- QualType Ret = FI.getReturnType();
- unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);
+ unsigned AddressSpace = CGM.getDataLayout().getAllocaAddrSpace();
ArgTypes[IRFunctionArgs.getSRetArgNo()] =
llvm::PointerType::get(getLLVMContext(), AddressSpace);
}
@@ -5145,7 +5144,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
- RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
// For virtual function pointer thunks and musttail calls, we must always
@@ -5159,16 +5157,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else if (!ReturnValue.isNull()) {
SRetPtr = ReturnValue.getAddress();
} else {
- SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
+ SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp");
if (HaveInsertPoint() && ReturnValue.isUnused()) {
llvm::TypeSize size =
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
- UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
+ UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer());
}
}
if (IRFunctionArgs.hasSRetArg()) {
+ // If the caller allocated the return slot, it is possible that the
+ // alloca was AS casted to the default as, so we ensure the cast is
+ // stripped before binding to the sret arg, which is in the allocaAS.
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
- getAsNaturalPointerTo(SRetPtr, RetTy);
+ getAsNaturalPointerTo(SRetPtr, RetTy)->stripPointerCasts();
} else if (RetAI.isInAlloca()) {
Address Addr =
Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
@@ -5740,7 +5741,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// pop this cleanup later on. Being eager about this is OK, since this
// temporary is 'invisible' outside of the callee.
if (UnusedReturnSizePtr)
- pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr,
UnusedReturnSizePtr);
llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c
index e709c1d4ad1ee1..7949a69555031e 100644
--- a/clang/test/CodeGen/partial-reinitialization2.c
+++ b/clang/test/CodeGen/partial-reinitialization2.c
@@ -91,8 +91,8 @@ void test5(void)
// CHECK-LABEL: test6
void test6(void)
{
- // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0
- // CHECK: call {{.*}}get456789(ptr {{.*}}[[LP]])
+ // CHECK: [[VAR:%[a-z0-9]+]] = alloca
+ // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]])
// CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
// CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]]
diff --git a/clang/test/CodeGen/sret.c b/clang/test/CodeGen/sret.c
index 6d905e89b2c6fd..3b4914f29d2bfe 100644
--- a/clang/test/CodeGen/sret.c
+++ b/clang/test/CodeGen/sret.c
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -Wno-strict-prototypes -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -Wno-strict-prototypes -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefix=NONZEROALLOCAAS %s
struct abc {
long a;
@@ -6,18 +7,28 @@ struct abc {
long c;
long d;
long e;
+ long f;
+ long g;
+ long h;
+ long i;
+ long j;
};
struct abc foo1(void);
// CHECK-DAG: declare {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc)
+// NONZEROALLOCAAS-DAG: declare {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc)
struct abc foo2();
// CHECK-DAG: declare {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc)
+// NONZEROALLOCAAS-DAG: declare {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc)
struct abc foo3(void){}
// CHECK-DAG: define {{.*}} @foo3(ptr dead_on_unwind noalias writable sret(%struct.abc)
+// NONZEROALLOCAAS-DAG: define {{.*}} @foo3(ptr addrspace(5) dead_on_unwind noalias writable sret(%struct.abc)
void bar(void) {
struct abc dummy1 = foo1();
// CHECK-DAG: call {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc)
+ // NONZEROALLOCAAS-DAG: call {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc)
struct abc dummy2 = foo2();
// CHECK-DAG: call {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc)
+ // NONZEROALLOCAAS-DAG: call {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc)
}
>From b5a7df0a771cb70d60e58a8727a5d856219dacb3 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 29 Oct 2024 17:16:17 +0000
Subject: [PATCH 2/2] Fix broken tests.
---
clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | 4 ++--
clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 57d056b0ff9d51..4a1db2c3564a57 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -250,7 +250,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @foo_large(
-// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
@@ -335,7 +335,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
-// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
// AMDGCN20-NEXT: ret void
//
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index 084281a8cada46..c2b2e00d15e13f 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -91,7 +91,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
}
// AMDGCN-LABEL: define dso_local void @foo_large(
-// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
// AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
@@ -120,7 +120,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
-// AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
+// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
// AMDGCN-NEXT: ret void
//
More information about the cfe-commits
mailing list