[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Mar 22 09:05:15 PDT 2026
https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083
>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 01/22] Create amdgpu-abi-struct-coerce.c
---
.../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+ float f;
+ int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+ return x;
+}
+
+typedef struct int_fp_pair {
+ int i;
+ float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+ return x;
+}
+
+typedef struct two_floats {
+ float a;
+ float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+ return x;
+}
+
+typedef struct two_ints {
+ int a;
+ int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+ return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+ short a;
+ short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+ return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+ char a;
+ char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+ return x;
+}
>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 02/22] Delete
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
---
.../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 -------------------
1 file changed, 71 deletions(-)
delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
- float f;
- int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
- return x;
-}
-
-typedef struct int_fp_pair {
- int i;
- float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
- return x;
-}
-
-typedef struct two_floats {
- float a;
- float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
- return x;
-}
-
-typedef struct two_ints {
- int a;
- int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
- return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
- short a;
- short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
-small_struct return_small_struct(small_struct x) {
- return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
- char a;
- char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
- return x;
-}
>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 03/22] fix for clang abi lowering
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+ float f;
+ int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+ return x;
+}
+
+typedef struct int_fp_pair {
+ int i;
+ float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+ return x;
+}
+
+typedef struct two_floats {
+ float a;
+ float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+ return x;
+}
+
+typedef struct two_ints {
+ int a;
+ int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+ return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+ short a;
+ short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+ return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+ char a;
+ char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+ return x;
+}
>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 04/22] Update amdgcn-openmp-device-math-complex.c
---
clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df2..34c05e2974a64 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
// CHECK-LABEL: define {{.*}}test_complex_f32
#pragma omp target
{
- // CHECK: call [2 x i32] @__divsc3
- // CHECK: call [2 x i32] @__mulsc3
+ // CHECK: call { float, float } @__divsc3
+ // CHECK: call { float, float } @__mulsc3
(void)(a * (a / a));
}
}
>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 05/22] Update amdgpu-abi-struct-coerce.cl
---
.../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..a13f8e8bbe119 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
void func_struct_padding_arg(struct_padding_arg arg1) { }
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
void func_struct_char_x8(struct_char_x8 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
void func_struct_char_x4(struct_char_x4 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
void func_struct_char_x3(struct_char_x3 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
void func_struct_char_x2(struct_char_x2 arg) { }
// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
return s;
}
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 06/22] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..f3c4b5ad0837b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (Size <= 32)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
- if (Size <= 64) {
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
- }
-
if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
return ABIArgInfo::getDirect();
}
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
- // Pack aggregates <= 8 bytes into single VGPR or pair.
- uint64_t Size = getContext().getTypeSize(Ty);
- if (Size <= 64) {
- unsigned NumRegs = (Size + 31) / 32;
- NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
- if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
- if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
- // XXX: Should this be i64 instead, and should the limit increase?
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
- }
-
if (NumRegsLeft > 0) {
uint64_t NumRegs = numRegsForType(Ty);
if (NumRegsLeft >= NumRegs) {
>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 07/22] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe119..fb5ba69c86c6d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
// CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 08/22] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d..3e4506b88aac6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 09/22] Update amdgpu-abi-struct-coerce.cl
>From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:43:50 +0530
Subject: [PATCH 10/22] Update amdgpu-variadic-call.c
---
clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c
index 17eda215211a2..22402118d862f 100644
--- a/clang/test/CodeGen/amdgpu-variadic-call.c
+++ b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -217,10 +217,9 @@ typedef union
} union_f32_i32;
// CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0
// CHECK-NEXT: tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
@@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1)
}
// CHECK-LABEL: define {{[^@]+}}@multiple_two
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[V1]] to double
// CHECK-NEXT: [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0
// CHECK-NEXT: tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
>From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:00:33 +0530
Subject: [PATCH 11/22] Update amdgpu-abi-struct-coerce.cl
---
.../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 3e4506b88aac6..06d3cdb01deb2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
void func_struct_padding_arg(struct_padding_arg arg1) { }
-// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
void func_struct_char_x8(struct_char_x8 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
void func_struct_char_x4(struct_char_x4 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
void func_struct_char_x3(struct_char_x3 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
void func_struct_char_x2(struct_char_x2 arg) { }
// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret()
return s;
}
-// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
-// CHECK: ret %struct.struct_char_x8 zeroinitializer
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:14 +0530
Subject: [PATCH 12/22] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++---
1 file changed, 48 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2399630ff797b..f827978a8cd18 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,8 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
+// Check that structs containing floats or full-sized integers (i32, i64) are
+// NOT coerced to integer arrays. They should preserve their original types.
+// However, structs containing only sub-32-bit integer types (char, short)
+// should still be packed into i32 registers.
+
+// === Structs with floats - should NOT be coerced to integers ===
typedef struct fp_int_pair {
float f;
@@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) {
return x;
}
+// === Structs with full-sized integers - should NOT be coerced ===
+
typedef struct two_ints {
int a;
int b;
@@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) {
return x;
}
-// Structs <= 32 bits should still be coerced to i32 for return value
+// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+
+// Structs of small integers <= 32 bits should be coerced to i32
typedef struct small_struct {
short a;
short b;
} small_struct;
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
small_struct return_small_struct(small_struct x) {
return x;
}
-// Structs <= 16 bits should still be coerced to i16 for return value
+// Structs of small integers <= 16 bits should be coerced to i16
typedef struct tiny_struct {
char a;
char b;
} tiny_struct;
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
tiny_struct return_tiny_struct(tiny_struct x) {
return x;
}
+
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+ char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+ return x;
+}
+
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+ char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+ return x;
+}
+
+// === Mixed tests - floats prevent coercion even with small integers ===
+
+typedef struct char_and_float {
+ char c;
+ float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+ return x;
+}
>From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:59 +0530
Subject: [PATCH 13/22] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++--
1 file changed, 90 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f3c4b5ad0837b..9e0ca7b77ecdd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
return Members * NumRegs <= MaxNumRegsForArgsRet;
}
+/// Check if all fields in an aggregate type contain only sub-32-bit integer
+/// types. Such aggregates should be packed into i32 registers rather than
+/// passed as individual elements. Aggregates containing floats or full-sized
+/// integer types (i32, i64) should preserve their original types.
+static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD,
+ const ASTContext &Context) {
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+
+ // Recursively check nested structs
+ if (const auto *NestedRD = FieldTy->getAsRecordDecl()) {
+ if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+ return false;
+ continue;
+ }
+
+ // Arrays - check the element type
+ if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) {
+ QualType EltTy = AT->getElementType();
+ if (const auto *NestedRD = EltTy->getAsRecordDecl()) {
+ if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+ return false;
+ continue;
+ }
+ // For non-struct array elements, check if they're packable integers
+ if (!EltTy->isIntegerType())
+ return false;
+ uint64_t EltSize = Context.getTypeSize(EltTy);
+ if (EltSize >= 32)
+ return false;
+ continue;
+ }
+
+ // Floating point types should not be packed into integers
+ if (FieldTy->isFloatingType())
+ return false;
+
+ // Only integer types that are smaller than 32 bits should be packed
+ if (!FieldTy->isIntegerType())
+ return false;
+
+ uint64_t FieldSize = Context.getTypeSize(FieldTy);
+ if (FieldSize >= 32)
+ return false;
+ }
+ return true;
+}
+
/// Estimate number of registers the type will use when passed in registers.
uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
uint64_t NumRegs = 0;
@@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyReturnType(RetTy);
- // Pack aggregates <= 4 bytes into single VGPR or pair.
+ // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+ // contain sub-32-bit integer types. Aggregates with floats or full-sized
+ // integers should preserve their original types.
uint64_t Size = getContext().getTypeSize(RetTy);
- if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ if (Size <= 64) {
+ const auto *RD = RetTy->getAsRecordDecl();
+ bool ShouldPackToInt =
+ RD && containsOnlyPackableIntegerTypes(RD, getContext());
- if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ if (ShouldPackToInt) {
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+ }
if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
return ABIArgInfo::getDirect();
@@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
+ // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+ // contain sub-32-bit integer types. Aggregates with floats or full-sized
+ // integers (i32, i64) should preserve their original types.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ const auto *RD = Ty->getAsRecordDecl();
+ bool ShouldPackToInt =
+ RD && containsOnlyPackableIntegerTypes(RD, getContext());
+
+ if (ShouldPackToInt) {
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+ }
+
if (NumRegsLeft > 0) {
uint64_t NumRegs = numRegsForType(Ty);
if (NumRegsLeft >= NumRegs) {
>From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:15:11 +0530
Subject: [PATCH 14/22] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 829500383a34a..4918bdcd8111b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (ShouldPackToInt) {
if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
@@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
>From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:28:52 +0530
Subject: [PATCH 15/22] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4918bdcd8111b..06b066de59055 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
if (Size <= 16)
- return ABIArgInfo::getDirect(
- llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(
- llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
>From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:53:13 +0530
Subject: [PATCH 16/22] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32()
return s;
}
-// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret i32 0
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
transparent_u func_transparent_union_ret()
{
transparent_u u = { 0 };
>From 928aa4ed1558e7e2d52461df83d0f80004d317e0 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:58:43 +0530
Subject: [PATCH 17/22] Update amdgpu-abi-struct-coerce.cl
---
.../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++-----------
1 file changed, 319 insertions(+), 466 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index e9cdb7f5da32a..7857d01f431c8 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,529 +1,382 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck %s
-
-typedef __attribute__(( ext_vector_type(2) )) char char2;
-typedef __attribute__(( ext_vector_type(3) )) char char3;
-typedef __attribute__(( ext_vector_type(4) )) char char4;
-
-typedef __attribute__(( ext_vector_type(2) )) short short2;
-typedef __attribute__(( ext_vector_type(3) )) short short3;
-typedef __attribute__(( ext_vector_type(4) )) short short4;
-
-typedef __attribute__(( ext_vector_type(2) )) int int2;
-typedef __attribute__(( ext_vector_type(3) )) int int3;
-typedef __attribute__(( ext_vector_type(4) )) int int4;
-typedef __attribute__(( ext_vector_type(16) )) int int16;
-typedef __attribute__(( ext_vector_type(32) )) int int32;
-
-// CHECK: %struct.empty_struct = type {}
-typedef struct empty_struct
-{
-} empty_struct;
-
-// CHECK-NOT: %struct.single_element_struct_arg
-typedef struct single_element_struct_arg
-{
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Test AMDGPU ABI struct coercion behavior:
+// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
+// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
+//
+// This tests the fix for the issue where structs like {float, int} were incorrectly
+// coerced to [2 x i32], losing float type information.
+
+// ============================================================================
+// SECTION 1: Structs with floats - should NOT be coerced to integers
+// ============================================================================
+
+typedef struct fp_int_pair {
+ float f;
+ int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+ return x;
+}
+
+typedef struct int_fp_pair {
int i;
-} single_element_struct_arg_t;
-
-// CHECK-NOT: %struct.nested_single_element_struct_arg
-typedef struct nested_single_element_struct_arg
-{
- single_element_struct_arg_t i;
-} nested_single_element_struct_arg_t;
-
-// CHECK: %struct.struct_arg = type { i32, float, i32 }
-typedef struct struct_arg
-{
- int i1;
float f;
- int i2;
-} struct_arg_t;
-
-// CHECK: %struct.struct_padding_arg = type { i8, i64 }
-typedef struct struct_padding_arg
-{
- char i1;
- long f;
-} struct_padding_arg;
-
-// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 }
-typedef struct struct_of_arrays_arg
-{
- int i1[2];
- float f1;
- int i2[4];
- float f2[3];
- int i3;
-} struct_of_arrays_arg_t;
-
-// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 }
-typedef struct struct_of_structs_arg
-{
- int i1;
- float f1;
- struct_arg_t s1;
- int i2;
-} struct_of_structs_arg_t;
-
-typedef union
-{
- int b1;
- float b2;
-} transparent_u __attribute__((__transparent_union__));
-
-// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
-typedef struct single_array_element_struct_arg
-{
- int i[4];
-} single_array_element_struct_arg_t;
-
-// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
-// CHECK: %struct.inner = type { i32, i64 }
-typedef struct single_struct_element_struct_arg
-{
- struct inner {
- int a;
- long b;
- } s;
-} single_struct_element_struct_arg_t;
-
-// CHECK: %struct.different_size_type_pair
-typedef struct different_size_type_pair {
- long l;
- int i;
-} different_size_type_pair;
-
-// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
-typedef struct flexible_array
-{
- int i;
- int flexible[];
-} flexible_array;
-
-// CHECK: %struct.struct_arr16 = type { [16 x i32] }
-typedef struct struct_arr16
-{
- int arr[16];
-} struct_arr16;
-
-// CHECK: %struct.struct_arr32 = type { [32 x i32] }
-typedef struct struct_arr32
-{
- int arr[32];
-} struct_arr32;
-
-// CHECK: %struct.struct_arr33 = type { [33 x i32] }
-typedef struct struct_arr33
-{
- int arr[33];
-} struct_arr33;
-
-// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
-typedef struct struct_char_arr32
-{
- char arr[32];
-} struct_char_arr32;
-
-// CHECK-NOT: %struct.struct_char_x8
-typedef struct struct_char_x8 {
- char x, y, z, w;
- char a, b, c, d;
-} struct_char_x8;
-
-// CHECK-NOT: %struct.struct_char_x4
-typedef struct struct_char_x4 {
- char x, y, z, w;
-} struct_char_x4;
-
-// CHECK-NOT: %struct.struct_char_x3
-typedef struct struct_char_x3 {
- char x, y, z;
-} struct_char_x3;
-
-// CHECK-NOT: %struct.struct_char_x2
-typedef struct struct_char_x2 {
- char x, y;
-} struct_char_x2;
-
-// CHECK-NOT: %struct.struct_char_x1
-typedef struct struct_char_x1 {
- char x;
-} struct_char_x1;
-
-// 4 registers from fields, 5 if padding included.
-// CHECK: %struct.nested = type { i8, i64 }
-// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
-typedef struct num_regs_nested_struct {
- int x;
- struct nested {
- char z;
- long y;
- } inner;
-} num_regs_nested_struct;
-
-// CHECK: %struct.double_nested = type { %struct.inner_inner }
-// CHECK: %struct.inner_inner = type { i8, i32, i8 }
-// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 }
-typedef struct double_nested_struct {
- int x;
- struct double_nested {
- struct inner_inner {
- char y;
- int q;
- char z;
- } inner_inner;
- } inner;
-
- short w;
-} double_nested_struct;
-
-// This is a large struct, but uses fewer registers than the limit.
-// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
-typedef struct large_struct_padding {
- char e0;
- int e1;
- char e2;
- int e3;
- char e4;
- char e5;
- short e6;
- short e7;
- char e8[3];
- long e9;
- int e10;
- char e11;
- int e12;
- short e13;
- char e14;
-} large_struct_padding;
-
-// The number of registers computed should be 6, not 8.
-typedef struct int3_pair {
- int3 dx;
- int3 dy;
-} int3_pair;
-
-// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
-typedef struct struct_4regs
-{
- int x;
- int y;
- int z;
- int w;
-} struct_4regs;
-
-// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
-__kernel void kernel_empty_struct_arg(empty_struct s) { }
-
-// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
-__kernel void kernel_struct_arg(struct_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
-__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
-
-// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4)
-__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3)
-__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
-
-// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
-__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
-
-// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce)
-__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
-__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
-__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
-
-// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
-void func_f32_arg(float arg) { }
-
-// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
-void func_v2i16_arg(short2 arg) { }
-
-// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
-void func_v3i32_arg(int3 arg) { }
-
-// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
-void func_v4i32_arg(int4 arg) { }
-
-// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
-void func_v16i32_arg(int16 arg) { }
-
-// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
-void func_v32i32_arg(int32 arg) { }
+} int_fp_pair;
-// CHECK: define{{.*}} void @func_empty_struct_arg()
-void func_empty_struct_arg(empty_struct empty) { }
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+ return x;
+}
-// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
-void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+typedef struct two_floats {
+ float a;
+ float b;
+} two_floats;
-// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
-void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+ return x;
+}
-// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
-void func_struct_arg(struct_arg_t arg1) { }
+// Double precision floats
+typedef struct double_struct {
+ double d;
+} double_struct;
-// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
-void func_struct_padding_arg(struct_padding_arg arg1) { }
+// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
+double_struct return_double_struct(double_struct x) {
+ return x;
+}
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
-void func_struct_char_x8(struct_char_x8 arg) { }
+// ============================================================================
+// SECTION 2: Structs with full-sized integers - should NOT be coerced
+// ============================================================================
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
-void func_struct_char_x4(struct_char_x4 arg) { }
+typedef struct two_ints {
+ int a;
+ int b;
+} two_ints;
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
-void func_struct_char_x3(struct_char_x3 arg) { }
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+ return x;
+}
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
-void func_struct_char_x2(struct_char_x2 arg) { }
+typedef struct single_int {
+ int a;
+} single_int;
-// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
-void func_struct_char_x1(struct_char_x1 arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
+single_int return_single_int(single_int x) {
+ return x;
+}
-// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
-void func_transparent_union_arg(transparent_u u) { }
+typedef struct int64_struct {
+ long long a;
+} int64_struct;
-// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
-void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
+int64_struct return_int64_struct(int64_struct x) {
+ return x;
+}
-// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
-void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+// ============================================================================
+// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
+// ============================================================================
-// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
-void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
+// Structs of small integers <= 32 bits should be coerced to i32
+typedef struct small_struct {
+ short a;
+ short b;
+} small_struct;
-// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone byval(%struct.flexible_array) align 4 captures(none) %arg)
-void func_flexible_array_arg(flexible_array arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
+small_struct return_small_struct(small_struct x) {
+ return x;
+}
-// CHECK: define{{.*}} float @func_f32_ret()
-float func_f32_ret()
-{
- return 0.0f;
+// Structs of small integers <= 16 bits should be coerced to i16
+typedef struct tiny_struct {
+ char a;
+ char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
+tiny_struct return_tiny_struct(tiny_struct x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_empty_struct_ret()
-empty_struct func_empty_struct_ret()
-{
- empty_struct s = {};
- return s;
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+ char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+ return x;
}
-// CHECK: define{{.*}} i32 @single_element_struct_ret()
-// CHECK: ret i32 0
-single_element_struct_arg_t single_element_struct_ret()
-{
- single_element_struct_arg_t s = { 0 };
- return s;
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+ char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+ return x;
}
-// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
-// CHECK: ret i32 0
-nested_single_element_struct_arg_t nested_single_element_struct_ret()
-{
- nested_single_element_struct_arg_t s = { 0 };
- return s;
+// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
+typedef struct four_shorts {
+ short a, b, c, d;
+} four_shorts;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
+four_shorts return_four_shorts(four_shorts x) {
+ return x;
}
-// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
-// CHECK: ret %struct.struct_arg zeroinitializer
-struct_arg_t func_struct_ret()
-{
- struct_arg_t s = { 0 };
- return s;
+// ============================================================================
+// SECTION 4: Mixed types - floats prevent coercion even with small integers
+// ============================================================================
+
+typedef struct char_and_float {
+ char c;
+ float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+ return x;
}
-// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
-// CHECK: ret %struct.struct_padding_arg zeroinitializer
-struct_padding_arg func_struct_padding_ret()
-{
- struct_padding_arg s = { 0 };
- return s;
+typedef struct short_and_float {
+ short s;
+ float f;
+} short_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.short_and_float
+short_and_float return_short_and_float(short_and_float x) {
+ return x;
}
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
-struct_char_x8 func_struct_char_x8_ret()
-{
- struct_char_x8 s = { 0 };
- return s;
+// Small int + full-sized int should NOT be coerced
+typedef struct char_and_int {
+ char c;
+ int i;
+} char_and_int;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_and_int
+char_and_int return_char_and_int(char_and_int x) {
+ return x;
}
-// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
-// CHECK: ret i32 0
-struct_char_x4 func_struct_char_x4_ret()
-{
- struct_char_x4 s = { 0 };
- return s;
+// ============================================================================
+// SECTION 5: Exotic/Complex aggregates (per reviewer request)
+// ============================================================================
+
+// --- Nested structs ---
+
+typedef struct inner_chars {
+ char a, b;
+} inner_chars;
+
+typedef struct outer_with_inner_chars {
+ inner_chars inner;
+ char c, d;
+} outer_with_inner_chars;
+
+// All chars, 32 bits total - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
+outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
+ return x;
}
-// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
-// CHECK: ret i32 0
-struct_char_x3 func_struct_char_x3_ret()
-{
- struct_char_x3 s = { 0 };
- return s;
+typedef struct inner_with_float {
+ char c;
+ float f;
+} inner_with_float;
+
+typedef struct outer_with_float_inner {
+ inner_with_float inner;
+} outer_with_float_inner;
+
+// Nested struct contains float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
+// CHECK: ret %struct.outer_with_float_inner
+outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
+ return x;
}
-// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
-struct_char_x2 func_struct_char_x2_ret()
-{
- struct_char_x2 s = { 0 };
- return s;
+// --- Arrays within structs ---
+
+typedef struct char_array_struct {
+ char arr[4];
+} char_array_struct;
+
+// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
+char_array_struct return_char_array(char_array_struct x) {
+ return x;
}
-// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
-// CHECK: ret i8 0
-struct_char_x1 func_struct_char_x1_ret()
-{
- struct_char_x1 s = { 0 };
- return s;
+typedef struct short_array_struct {
+ short arr[2];
+} short_array_struct;
+
+// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
+short_array_struct return_short_array(short_array_struct x) {
+ return x;
}
-// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
-// CHECK: ret %struct.struct_arr16 zeroinitializer
-struct_arr16 func_ret_struct_arr16()
-{
- struct_arr16 s = { 0 };
- return s;
+typedef struct int_array_struct {
+ int arr[2];
+} int_array_struct;
+
+// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
+// CHECK: ret %struct.int_array_struct
+int_array_struct return_int_array(int_array_struct x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 captures(none) initializes((0, 128)) %agg.result)
-struct_arr32 func_ret_struct_arr32()
-{
- struct_arr32 s = { 0 };
- return s;
+typedef struct float_array_struct {
+ float arr[2];
+} float_array_struct;
+
+// Array of 2 floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
+// CHECK: ret %struct.float_array_struct
+float_array_struct return_float_array(float_array_struct x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 captures(none) initializes((0, 132)) %agg.result)
-struct_arr33 func_ret_struct_arr33()
-{
- struct_arr33 s = { 0 };
- return s;
+// --- Complex combinations ---
+
+typedef struct mixed_nested {
+ struct {
+ char a;
+ char b;
+ } inner;
+ short s;
+} mixed_nested;
+
+// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
+mixed_nested return_mixed_nested(mixed_nested x) {
+ return x;
}
-// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
-struct_char_arr32 func_ret_struct_char_arr32()
-{
- struct_char_arr32 s = { 0 };
- return s;
+typedef struct deeply_nested_chars {
+ struct {
+ struct {
+ char a, b;
+ } level2;
+ char c, d;
+ } level1;
+} deeply_nested_chars;
+
+// Deeply nested, but all chars = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
+deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
+ return x;
}
-// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret %union.transparent_u zeroinitializer
-transparent_u func_transparent_union_ret()
-{
- transparent_u u = { 0 };
- return u;
+typedef struct deeply_nested_with_float {
+ struct {
+ struct {
+ char a;
+ float f; // Float buried deep
+ } level2;
+ } level1;
+} deeply_nested_with_float;
+
+// Float buried in nested struct - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
+// CHECK: ret %struct.deeply_nested_with_float
+deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
+ return x;
}
-// CHECK: define{{.*}} %struct.different_size_type_pair @func_different_size_type_pair_ret()
-different_size_type_pair func_different_size_type_pair_ret()
-{
- different_size_type_pair s = { 0 };
- return s;
+// --- Edge cases ---
+
+// Single char
+typedef struct single_char {
+ char c;
+} single_char;
+
+// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
+single_char return_single_char(single_char x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 captures(none) initializes((0, 4)) %agg.result)
-flexible_array func_flexible_array_ret()
-{
- flexible_array s = { 0 };
- return s;
+// Three chars (24 bits, rounds up to 32)
+typedef struct three_chars {
+ char a, b, c;
+} three_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
+three_chars return_three_chars(three_chars x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
-void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
+// Five chars (40 bits, rounds up to 64)
+typedef struct five_chars {
+ char a, b, c, d, e;
+} five_chars;
-// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 captures(none) %{{.*}})
-void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
+five_chars return_five_chars(five_chars x) {
+ return x;
+}
-// XXX - Why don't the inner structs flatten?
-// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 captures(none) %{{.*}})
-void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
+// --- Union tests ---
-// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
-void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { }
+typedef union char_int_union {
+ char c;
+ int i;
+} char_int_union;
-// CHECK: define{{.*}} %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
-double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
- double_nested_struct s = { 0 };
- return s;
+// Union with int - preserves union type
+// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
+char_int_union return_char_int_union(char_int_union x) {
+ return x;
}
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
-void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
+typedef union float_int_union {
+ float f;
+ int i;
+} float_int_union;
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 captures(none) %{{.*}})
-void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
- *out = arg;
+// Union with float - preserves union type
+// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
+float_int_union return_float_int_union(float_int_union x) {
+ return x;
}
-// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
-void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { }
-
-// Function signature from blender, nothing should be passed byval. The v3i32
-// should not count as 4 passed registers.
-// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
-void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
-
-// Each short4 should fit pack into 2 registers.
-// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
- short4 arg4, short4 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
- short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
- short3 arg4, short3 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
- short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
-void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
- short2 arg4, short2 arg5, short2 arg6, short2 arg7,
- short2 arg8, short2 arg9, short2 arg10, short2 arg11,
- struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
- short2 arg4, short2 arg5, short2 arg6, short2 arg7,
- short2 arg8, short2 arg9, short2 arg10, short2 arg11,
- short2 arg12, struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
- char2 arg4, char2 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
- char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
-void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
+// --- Padding scenarios ---
+
+typedef struct char_with_padding {
+ char c;
+ // 3 bytes padding
+ int i;
+} char_with_padding;
+
+// Has int, should NOT be coerced even though small + padding
+// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_with_padding
+char_with_padding return_char_with_padding(char_with_padding x) {
+ return x;
+}
>From 8657523a6b425b05207956fc43db0025cd13fa51 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 22:00:44 +0530
Subject: [PATCH 18/22] Update amdgpu-abi-struct-coerce.cl
---
.../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++++++-------
1 file changed, 466 insertions(+), 319 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 7857d01f431c8..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,382 +1,529 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Test AMDGPU ABI struct coercion behavior:
-// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
-// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
-//
-// This tests the fix for the issue where structs like {float, int} were incorrectly
-// coerced to [2 x i32], losing float type information.
-
-// ============================================================================
-// SECTION 1: Structs with floats - should NOT be coerced to integers
-// ============================================================================
-
-typedef struct fp_int_pair {
- float f;
- int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
- return x;
-}
-
-typedef struct int_fp_pair {
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(2) )) char char2;
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+typedef __attribute__(( ext_vector_type(2) )) short short2;
+typedef __attribute__(( ext_vector_type(3) )) short short3;
+typedef __attribute__(( ext_vector_type(4) )) short short4;
+
+typedef __attribute__(( ext_vector_type(2) )) int int2;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+typedef __attribute__(( ext_vector_type(4) )) int int4;
+typedef __attribute__(( ext_vector_type(16) )) int int16;
+typedef __attribute__(( ext_vector_type(32) )) int int32;
+
+// CHECK: %struct.empty_struct = type {}
+typedef struct empty_struct
+{
+} empty_struct;
+
+// CHECK-NOT: %struct.single_element_struct_arg
+typedef struct single_element_struct_arg
+{
int i;
+} single_element_struct_arg_t;
+
+// CHECK-NOT: %struct.nested_single_element_struct_arg
+typedef struct nested_single_element_struct_arg
+{
+ single_element_struct_arg_t i;
+} nested_single_element_struct_arg_t;
+
+// CHECK: %struct.struct_arg = type { i32, float, i32 }
+typedef struct struct_arg
+{
+ int i1;
float f;
-} int_fp_pair;
+ int i2;
+} struct_arg_t;
+
+// CHECK: %struct.struct_padding_arg = type { i8, i64 }
+typedef struct struct_padding_arg
+{
+ char i1;
+ long f;
+} struct_padding_arg;
+
+// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 }
+typedef struct struct_of_arrays_arg
+{
+ int i1[2];
+ float f1;
+ int i2[4];
+ float f2[3];
+ int i3;
+} struct_of_arrays_arg_t;
+
+// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 }
+typedef struct struct_of_structs_arg
+{
+ int i1;
+ float f1;
+ struct_arg_t s1;
+ int i2;
+} struct_of_structs_arg_t;
+
+typedef union
+{
+ int b1;
+ float b2;
+} transparent_u __attribute__((__transparent_union__));
+
+// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
+typedef struct single_array_element_struct_arg
+{
+ int i[4];
+} single_array_element_struct_arg_t;
+
+// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
+// CHECK: %struct.inner = type { i32, i64 }
+typedef struct single_struct_element_struct_arg
+{
+ struct inner {
+ int a;
+ long b;
+ } s;
+} single_struct_element_struct_arg_t;
+
+// CHECK: %struct.different_size_type_pair
+typedef struct different_size_type_pair {
+ long l;
+ int i;
+} different_size_type_pair;
+
+// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
+typedef struct flexible_array
+{
+ int i;
+ int flexible[];
+} flexible_array;
+
+// CHECK: %struct.struct_arr16 = type { [16 x i32] }
+typedef struct struct_arr16
+{
+ int arr[16];
+} struct_arr16;
+
+// CHECK: %struct.struct_arr32 = type { [32 x i32] }
+typedef struct struct_arr32
+{
+ int arr[32];
+} struct_arr32;
+
+// CHECK: %struct.struct_arr33 = type { [33 x i32] }
+typedef struct struct_arr33
+{
+ int arr[33];
+} struct_arr33;
+
+// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
+typedef struct struct_char_arr32
+{
+ char arr[32];
+} struct_char_arr32;
+
+// CHECK-NOT: %struct.struct_char_x8
+typedef struct struct_char_x8 {
+ char x, y, z, w;
+ char a, b, c, d;
+} struct_char_x8;
+
+// CHECK-NOT: %struct.struct_char_x4
+typedef struct struct_char_x4 {
+ char x, y, z, w;
+} struct_char_x4;
+
+// CHECK-NOT: %struct.struct_char_x3
+typedef struct struct_char_x3 {
+ char x, y, z;
+} struct_char_x3;
+
+// CHECK-NOT: %struct.struct_char_x2
+typedef struct struct_char_x2 {
+ char x, y;
+} struct_char_x2;
+
+// CHECK-NOT: %struct.struct_char_x1
+typedef struct struct_char_x1 {
+ char x;
+} struct_char_x1;
+
+// 4 registers from fields, 5 if padding included.
+// CHECK: %struct.nested = type { i8, i64 }
+// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
+typedef struct num_regs_nested_struct {
+ int x;
+ struct nested {
+ char z;
+ long y;
+ } inner;
+} num_regs_nested_struct;
+
+// CHECK: %struct.double_nested = type { %struct.inner_inner }
+// CHECK: %struct.inner_inner = type { i8, i32, i8 }
+// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 }
+typedef struct double_nested_struct {
+ int x;
+ struct double_nested {
+ struct inner_inner {
+ char y;
+ int q;
+ char z;
+ } inner_inner;
+ } inner;
+
+ short w;
+} double_nested_struct;
+
+// This is a large struct, but uses fewer registers than the limit.
+// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
+typedef struct large_struct_padding {
+ char e0;
+ int e1;
+ char e2;
+ int e3;
+ char e4;
+ char e5;
+ short e6;
+ short e7;
+ char e8[3];
+ long e9;
+ int e10;
+ char e11;
+ int e12;
+ short e13;
+ char e14;
+} large_struct_padding;
+
+// The number of registers computed should be 6, not 8.
+typedef struct int3_pair {
+ int3 dx;
+ int3 dy;
+} int3_pair;
+
+// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
+typedef struct struct_4regs
+{
+ int x;
+ int y;
+ int z;
+ int w;
+} struct_4regs;
+
+// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
+__kernel void kernel_empty_struct_arg(empty_struct s) { }
+
+// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
+__kernel void kernel_struct_arg(struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
+
+// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4)
+__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3)
+__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
+
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
+__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
+
+// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
+__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
+__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
+
+// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
+void func_f32_arg(float arg) { }
+
+// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
+void func_v2i16_arg(short2 arg) { }
+
+// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
+void func_v3i32_arg(int3 arg) { }
+
+// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
+void func_v4i32_arg(int4 arg) { }
+
+// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
+void func_v16i32_arg(int16 arg) { }
+
+// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
+void func_v32i32_arg(int32 arg) { }
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
- return x;
-}
+// CHECK: define{{.*}} void @func_empty_struct_arg()
+void func_empty_struct_arg(empty_struct empty) { }
-typedef struct two_floats {
- float a;
- float b;
-} two_floats;
+// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
+void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
- return x;
-}
+// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
+void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
-// Double precision floats
-typedef struct double_struct {
- double d;
-} double_struct;
+// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
+void func_struct_arg(struct_arg_t arg1) { }
-// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
-double_struct return_double_struct(double_struct x) {
- return x;
-}
+// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+void func_struct_padding_arg(struct_padding_arg arg1) { }
-// ============================================================================
-// SECTION 2: Structs with full-sized integers - should NOT be coerced
-// ============================================================================
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+void func_struct_char_x8(struct_char_x8 arg) { }
-typedef struct two_ints {
- int a;
- int b;
-} two_ints;
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+void func_struct_char_x4(struct_char_x4 arg) { }
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
- return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+void func_struct_char_x3(struct_char_x3 arg) { }
-typedef struct single_int {
- int a;
-} single_int;
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+void func_struct_char_x2(struct_char_x2 arg) { }
-// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
-single_int return_single_int(single_int x) {
- return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
+void func_struct_char_x1(struct_char_x1 arg) { }
-typedef struct int64_struct {
- long long a;
-} int64_struct;
+// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
+void func_transparent_union_arg(transparent_u u) { }
-// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
-int64_struct return_int64_struct(int64_struct x) {
- return x;
-}
+// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
-// ============================================================================
-// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
-// ============================================================================
+// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
+void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
-// Structs of small integers <= 32 bits should be coerced to i32
-typedef struct small_struct {
- short a;
- short b;
-} small_struct;
+// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
+void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
-small_struct return_small_struct(small_struct x) {
- return x;
-}
+// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone byval(%struct.flexible_array) align 4 captures(none) %arg)
+void func_flexible_array_arg(flexible_array arg) { }
-// Structs of small integers <= 16 bits should be coerced to i16
-typedef struct tiny_struct {
- char a;
- char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
-tiny_struct return_tiny_struct(tiny_struct x) {
- return x;
+// CHECK: define{{.*}} float @func_f32_ret()
+float func_f32_ret()
+{
+ return 0.0f;
}
-// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
-typedef struct eight_chars {
- char a, b, c, d, e, f, g, h;
-} eight_chars;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
-eight_chars return_eight_chars(eight_chars x) {
- return x;
+// CHECK: define{{.*}} void @func_empty_struct_ret()
+empty_struct func_empty_struct_ret()
+{
+ empty_struct s = {};
+ return s;
}
-// Struct of 4 chars (32 bits) should be coerced to i32
-typedef struct four_chars {
- char a, b, c, d;
-} four_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
-four_chars return_four_chars(four_chars x) {
- return x;
+// CHECK: define{{.*}} i32 @single_element_struct_ret()
+// CHECK: ret i32 0
+single_element_struct_arg_t single_element_struct_ret()
+{
+ single_element_struct_arg_t s = { 0 };
+ return s;
}
-// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
-typedef struct four_shorts {
- short a, b, c, d;
-} four_shorts;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
-four_shorts return_four_shorts(four_shorts x) {
- return x;
+// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
+// CHECK: ret i32 0
+nested_single_element_struct_arg_t nested_single_element_struct_ret()
+{
+ nested_single_element_struct_arg_t s = { 0 };
+ return s;
}
-// ============================================================================
-// SECTION 4: Mixed types - floats prevent coercion even with small integers
-// ============================================================================
-
-typedef struct char_and_float {
- char c;
- float f;
-} char_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.char_and_float
-char_and_float return_char_and_float(char_and_float x) {
- return x;
+// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
+// CHECK: ret %struct.struct_arg zeroinitializer
+struct_arg_t func_struct_ret()
+{
+ struct_arg_t s = { 0 };
+ return s;
}
-typedef struct short_and_float {
- short s;
- float f;
-} short_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.short_and_float
-short_and_float return_short_and_float(short_and_float x) {
- return x;
+// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
+// CHECK: ret %struct.struct_padding_arg zeroinitializer
+struct_padding_arg func_struct_padding_ret()
+{
+ struct_padding_arg s = { 0 };
+ return s;
}
-// Small int + full-sized int should NOT be coerced
-typedef struct char_and_int {
- char c;
- int i;
-} char_and_int;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_and_int
-char_and_int return_char_and_int(char_and_int x) {
- return x;
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
+struct_char_x8 func_struct_char_x8_ret()
+{
+ struct_char_x8 s = { 0 };
+ return s;
}
-// ============================================================================
-// SECTION 5: Exotic/Complex aggregates (per reviewer request)
-// ============================================================================
-
-// --- Nested structs ---
-
-typedef struct inner_chars {
- char a, b;
-} inner_chars;
-
-typedef struct outer_with_inner_chars {
- inner_chars inner;
- char c, d;
-} outer_with_inner_chars;
-
-// All chars, 32 bits total - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
-outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
- return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
+// CHECK: ret i32 0
+struct_char_x4 func_struct_char_x4_ret()
+{
+ struct_char_x4 s = { 0 };
+ return s;
}
-typedef struct inner_with_float {
- char c;
- float f;
-} inner_with_float;
-
-typedef struct outer_with_float_inner {
- inner_with_float inner;
-} outer_with_float_inner;
-
-// Nested struct contains float - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
-// CHECK: ret %struct.outer_with_float_inner
-outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
- return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
+// CHECK: ret i32 0
+struct_char_x3 func_struct_char_x3_ret()
+{
+ struct_char_x3 s = { 0 };
+ return s;
}
-// --- Arrays within structs ---
-
-typedef struct char_array_struct {
- char arr[4];
-} char_array_struct;
-
-// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
-char_array_struct return_char_array(char_array_struct x) {
- return x;
+// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
+struct_char_x2 func_struct_char_x2_ret()
+{
+ struct_char_x2 s = { 0 };
+ return s;
}
-typedef struct short_array_struct {
- short arr[2];
-} short_array_struct;
-
-// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
-short_array_struct return_short_array(short_array_struct x) {
- return x;
+// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
+// CHECK: ret i8 0
+struct_char_x1 func_struct_char_x1_ret()
+{
+ struct_char_x1 s = { 0 };
+ return s;
}
-typedef struct int_array_struct {
- int arr[2];
-} int_array_struct;
-
-// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
-// CHECK: ret %struct.int_array_struct
-int_array_struct return_int_array(int_array_struct x) {
- return x;
+// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
+// CHECK: ret %struct.struct_arr16 zeroinitializer
+struct_arr16 func_ret_struct_arr16()
+{
+ struct_arr16 s = { 0 };
+ return s;
}
-typedef struct float_array_struct {
- float arr[2];
-} float_array_struct;
-
-// Array of 2 floats - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
-// CHECK: ret %struct.float_array_struct
-float_array_struct return_float_array(float_array_struct x) {
- return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 captures(none) initializes((0, 128)) %agg.result)
+struct_arr32 func_ret_struct_arr32()
+{
+ struct_arr32 s = { 0 };
+ return s;
}
-// --- Complex combinations ---
-
-typedef struct mixed_nested {
- struct {
- char a;
- char b;
- } inner;
- short s;
-} mixed_nested;
-
-// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
-// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
-mixed_nested return_mixed_nested(mixed_nested x) {
- return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 captures(none) initializes((0, 132)) %agg.result)
+struct_arr33 func_ret_struct_arr33()
+{
+ struct_arr33 s = { 0 };
+ return s;
}
-typedef struct deeply_nested_chars {
- struct {
- struct {
- char a, b;
- } level2;
- char c, d;
- } level1;
-} deeply_nested_chars;
-
-// Deeply nested, but all chars = 32 bits - should be coerced
-// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
-deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
- return x;
+// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
+struct_char_arr32 func_ret_struct_char_arr32()
+{
+ struct_char_arr32 s = { 0 };
+ return s;
}
-typedef struct deeply_nested_with_float {
- struct {
- struct {
- char a;
- float f; // Float buried deep
- } level2;
- } level1;
-} deeply_nested_with_float;
-
-// Float buried in nested struct - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
-// CHECK: ret %struct.deeply_nested_with_float
-deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
- return x;
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
+transparent_u func_transparent_union_ret()
+{
+ transparent_u u = { 0 };
+ return u;
}
-// --- Edge cases ---
-
-// Single char
-typedef struct single_char {
- char c;
-} single_char;
-
-// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
-single_char return_single_char(single_char x) {
- return x;
+// CHECK: define{{.*}} %struct.different_size_type_pair @func_different_size_type_pair_ret()
+different_size_type_pair func_different_size_type_pair_ret()
+{
+ different_size_type_pair s = { 0 };
+ return s;
}
-// Three chars (24 bits, rounds up to 32)
-typedef struct three_chars {
- char a, b, c;
-} three_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
-three_chars return_three_chars(three_chars x) {
- return x;
+// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 captures(none) initializes((0, 4)) %agg.result)
+flexible_array func_flexible_array_ret()
+{
+ flexible_array s = { 0 };
+ return s;
}
-// Five chars (40 bits, rounds up to 64)
-typedef struct five_chars {
- char a, b, c, d, e;
-} five_chars;
+// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
+void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
-five_chars return_five_chars(five_chars x) {
- return x;
-}
+// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 captures(none) %{{.*}})
+void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
-// --- Union tests ---
+// XXX - Why don't the inner structs flatten?
+// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 captures(none) %{{.*}})
+void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
-typedef union char_int_union {
- char c;
- int i;
-} char_int_union;
+// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
+void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { }
-// Union with int - preserves union type
-// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
-char_int_union return_char_int_union(char_int_union x) {
- return x;
+// CHECK: define{{.*}} %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
+double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
+ double_nested_struct s = { 0 };
+ return s;
}
-typedef union float_int_union {
- float f;
- int i;
-} float_int_union;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
+void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
-// Union with float - preserves union type
-// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
-float_int_union return_float_int_union(float_int_union x) {
- return x;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 captures(none) %{{.*}})
+void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
+ *out = arg;
}
-// --- Padding scenarios ---
-
-typedef struct char_with_padding {
- char c;
- // 3 bytes padding
- int i;
-} char_with_padding;
-
-// Has int, should NOT be coerced even though small + padding
-// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_with_padding
-char_with_padding return_char_with_padding(char_with_padding x) {
- return x;
-}
+// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
+void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { }
+
+// Function signature from blender, nothing should be passed byval. The v3i32
+// should not count as 4 passed registers.
+// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
+void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
+
+// Each short4 should fit pack into 2 registers.
+// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+ short4 arg4, short4 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+ short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+ short3 arg4, short3 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+ short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
+void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+ short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+ short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+ struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+ short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+ short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+ short2 arg12, struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+ char2 arg4, char2 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+ char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From 0b1021e3f49fcf9d4f776439c2415a090813e90d Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 22:01:12 +0530
Subject: [PATCH 19/22] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 286 +++++++++++++++++-
1 file changed, 278 insertions(+), 8 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index f827978a8cd18..7857d01f431c8 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,12 +1,16 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-// Check that structs containing floats or full-sized integers (i32, i64) are
-// NOT coerced to integer arrays. They should preserve their original types.
-// However, structs containing only sub-32-bit integer types (char, short)
-// should still be packed into i32 registers.
+// Test AMDGPU ABI struct coercion behavior:
+// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
+// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
+//
+// This tests the fix for the issue where structs like {float, int} were incorrectly
+// coerced to [2 x i32], losing float type information.
-// === Structs with floats - should NOT be coerced to integers ===
+// ============================================================================
+// SECTION 1: Structs with floats - should NOT be coerced to integers
+// ============================================================================
typedef struct fp_int_pair {
float f;
@@ -41,7 +45,19 @@ two_floats return_two_floats(two_floats x) {
return x;
}
-// === Structs with full-sized integers - should NOT be coerced ===
+// Double precision floats
+typedef struct double_struct {
+ double d;
+} double_struct;
+
+// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
+double_struct return_double_struct(double_struct x) {
+ return x;
+}
+
+// ============================================================================
+// SECTION 2: Structs with full-sized integers - should NOT be coerced
+// ============================================================================
typedef struct two_ints {
int a;
@@ -54,7 +70,27 @@ two_ints return_two_ints(two_ints x) {
return x;
}
-// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+typedef struct single_int {
+ int a;
+} single_int;
+
+// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
+single_int return_single_int(single_int x) {
+ return x;
+}
+
+typedef struct int64_struct {
+ long long a;
+} int64_struct;
+
+// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
+int64_struct return_int64_struct(int64_struct x) {
+ return x;
+}
+
+// ============================================================================
+// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
+// ============================================================================
// Structs of small integers <= 32 bits should be coerced to i32
typedef struct small_struct {
@@ -98,7 +134,19 @@ four_chars return_four_chars(four_chars x) {
return x;
}
-// === Mixed tests - floats prevent coercion even with small integers ===
+// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
+typedef struct four_shorts {
+ short a, b, c, d;
+} four_shorts;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
+four_shorts return_four_shorts(four_shorts x) {
+ return x;
+}
+
+// ============================================================================
+// SECTION 4: Mixed types - floats prevent coercion even with small integers
+// ============================================================================
typedef struct char_and_float {
char c;
@@ -110,3 +158,225 @@ typedef struct char_and_float {
char_and_float return_char_and_float(char_and_float x) {
return x;
}
+
+typedef struct short_and_float {
+ short s;
+ float f;
+} short_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.short_and_float
+short_and_float return_short_and_float(short_and_float x) {
+ return x;
+}
+
+// Small int + full-sized int should NOT be coerced
+typedef struct char_and_int {
+ char c;
+ int i;
+} char_and_int;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_and_int
+char_and_int return_char_and_int(char_and_int x) {
+ return x;
+}
+
+// ============================================================================
+// SECTION 5: Exotic/Complex aggregates (per reviewer request)
+// ============================================================================
+
+// --- Nested structs ---
+
+typedef struct inner_chars {
+ char a, b;
+} inner_chars;
+
+typedef struct outer_with_inner_chars {
+ inner_chars inner;
+ char c, d;
+} outer_with_inner_chars;
+
+// All chars, 32 bits total - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
+outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
+ return x;
+}
+
+typedef struct inner_with_float {
+ char c;
+ float f;
+} inner_with_float;
+
+typedef struct outer_with_float_inner {
+ inner_with_float inner;
+} outer_with_float_inner;
+
+// Nested struct contains float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
+// CHECK: ret %struct.outer_with_float_inner
+outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
+ return x;
+}
+
+// --- Arrays within structs ---
+
+typedef struct char_array_struct {
+ char arr[4];
+} char_array_struct;
+
+// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
+char_array_struct return_char_array(char_array_struct x) {
+ return x;
+}
+
+typedef struct short_array_struct {
+ short arr[2];
+} short_array_struct;
+
+// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
+short_array_struct return_short_array(short_array_struct x) {
+ return x;
+}
+
+typedef struct int_array_struct {
+ int arr[2];
+} int_array_struct;
+
+// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
+// CHECK: ret %struct.int_array_struct
+int_array_struct return_int_array(int_array_struct x) {
+ return x;
+}
+
+typedef struct float_array_struct {
+ float arr[2];
+} float_array_struct;
+
+// Array of 2 floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
+// CHECK: ret %struct.float_array_struct
+float_array_struct return_float_array(float_array_struct x) {
+ return x;
+}
+
+// --- Complex combinations ---
+
+typedef struct mixed_nested {
+ struct {
+ char a;
+ char b;
+ } inner;
+ short s;
+} mixed_nested;
+
+// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
+mixed_nested return_mixed_nested(mixed_nested x) {
+ return x;
+}
+
+typedef struct deeply_nested_chars {
+ struct {
+ struct {
+ char a, b;
+ } level2;
+ char c, d;
+ } level1;
+} deeply_nested_chars;
+
+// Deeply nested, but all chars = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
+deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
+ return x;
+}
+
+typedef struct deeply_nested_with_float {
+ struct {
+ struct {
+ char a;
+ float f; // Float buried deep
+ } level2;
+ } level1;
+} deeply_nested_with_float;
+
+// Float buried in nested struct - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
+// CHECK: ret %struct.deeply_nested_with_float
+deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
+ return x;
+}
+
+// --- Edge cases ---
+
+// Single char
+typedef struct single_char {
+ char c;
+} single_char;
+
+// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
+single_char return_single_char(single_char x) {
+ return x;
+}
+
+// Three chars (24 bits, rounds up to 32)
+typedef struct three_chars {
+ char a, b, c;
+} three_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
+three_chars return_three_chars(three_chars x) {
+ return x;
+}
+
+// Five chars (40 bits, rounds up to 64)
+typedef struct five_chars {
+ char a, b, c, d, e;
+} five_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
+five_chars return_five_chars(five_chars x) {
+ return x;
+}
+
+// --- Union tests ---
+
+typedef union char_int_union {
+ char c;
+ int i;
+} char_int_union;
+
+// Union with int - preserves union type
+// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
+char_int_union return_char_int_union(char_int_union x) {
+ return x;
+}
+
+typedef union float_int_union {
+ float f;
+ int i;
+} float_int_union;
+
+// Union with float - preserves union type
+// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
+float_int_union return_float_int_union(float_int_union x) {
+ return x;
+}
+
+// --- Padding scenarios ---
+
+typedef struct char_with_padding {
+ char c;
+ // 3 bytes padding
+ int i;
+} char_with_padding;
+
+// Has int, should NOT be coerced even though small + padding
+// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_with_padding
+char_with_padding return_char_with_padding(char_with_padding x) {
+ return x;
+}
>From cd01f6b98ff395d73775296d0b1b116232c317b7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Thu, 19 Mar 2026 14:34:20 +0530
Subject: [PATCH 20/22] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 224 ++++++++++++++++++
1 file changed, 224 insertions(+)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 7857d01f431c8..4254621409619 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -380,3 +380,227 @@ typedef struct char_with_padding {
char_with_padding return_char_with_padding(char_with_padding x) {
return x;
}
+
+// ============================================================================
+// SECTION 6: Additional exotic aggregates
+// ============================================================================
+
+// --- Bitfields ---
+
+typedef struct bitfield_small {
+ unsigned a : 4;
+ unsigned b : 4;
+ unsigned c : 8;
+} bitfield_small;
+
+// Bitfields with unsigned backing type (32-bit) - should NOT be coerced
+// The field type is 'unsigned' which is >= 32 bits
+// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i32 %x.coerce)
+// CHECK: ret %struct.bitfield_small
+bitfield_small return_bitfield_small(bitfield_small x) {
+ return x;
+}
+
+typedef struct bitfield_chars {
+ char a : 4;
+ char b : 4;
+} bitfield_chars;
+
+// Bitfields with char backing type (8-bit) - should be coerced to i16
+// CHECK-LABEL: define{{.*}} i16 @return_bitfield_chars(i16 %x.coerce)
+bitfield_chars return_bitfield_chars(bitfield_chars x) {
+ return x;
+}
+
+typedef struct bitfield_with_int {
+ unsigned a : 4;
+ unsigned b : 4;
+ int i;
+} bitfield_with_int;
+
+// Bitfields + full int - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.bitfield_with_int
+bitfield_with_int return_bitfield_with_int(bitfield_with_int x) {
+ return x;
+}
+
+typedef struct bitfield_with_float {
+ unsigned a : 16;
+ float f;
+} bitfield_with_float;
+
+// Bitfield + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_float @return_bitfield_with_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bitfield_with_float
+bitfield_with_float return_bitfield_with_float(bitfield_with_float x) {
+ return x;
+}
+
+// --- _Bool fields ---
+
+typedef struct bool_struct {
+ _Bool a;
+ _Bool b;
+ _Bool c;
+ _Bool d;
+} bool_struct;
+
+// 4 bools = 32 bits, all sub-32-bit - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_bool_struct(i32 %x.coerce)
+bool_struct return_bool_struct(bool_struct x) {
+ return x;
+}
+
+typedef struct bool_and_float {
+ _Bool b;
+ float f;
+} bool_and_float;
+
+// Bool + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bool_and_float @return_bool_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bool_and_float
+bool_and_float return_bool_and_float(bool_and_float x) {
+ return x;
+}
+
+typedef struct bool_and_int {
+ _Bool b;
+ int i;
+} bool_and_int;
+
+// Bool + int - should NOT be coerced (int is full-sized)
+// CHECK-LABEL: define{{.*}} %struct.bool_and_int @return_bool_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.bool_and_int
+bool_and_int return_bool_and_int(bool_and_int x) {
+ return x;
+}
+
+// --- Half-precision floats ---
+
+typedef struct half_struct {
+ __fp16 a;
+ __fp16 b;
+} half_struct;
+
+// Two halfs = 32 bits, but floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.half_struct @return_half_struct(half %x.coerce0, half %x.coerce1)
+// CHECK: ret %struct.half_struct
+half_struct return_half_struct(half_struct x) {
+ return x;
+}
+
+typedef struct half_and_char {
+ __fp16 h;
+ char c;
+} half_and_char;
+
+// Half + char - should NOT be coerced (half is float type)
+// CHECK-LABEL: define{{.*}} %struct.half_and_char @return_half_and_char(half %x.coerce0, i8 %x.coerce1)
+// CHECK: ret %struct.half_and_char
+half_and_char return_half_and_char(half_and_char x) {
+ return x;
+}
+
+typedef struct four_halfs {
+ __fp16 a, b, c, d;
+} four_halfs;
+
+// Four halfs = 64 bits - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.four_halfs @return_four_halfs(half %x.coerce0, half %x.coerce1, half %x.coerce2, half %x.coerce3)
+// CHECK: ret %struct.four_halfs
+four_halfs return_four_halfs(four_halfs x) {
+ return x;
+}
+
+// --- Vectors inside structs ---
+
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+
+typedef struct vec_int2_struct {
+ int2 v;
+} vec_int2_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <2 x i32> @return_vec_int2(<2 x i32> %x.coerce)
+vec_int2_struct return_vec_int2(vec_int2_struct x) {
+ return x;
+}
+
+typedef struct vec_float2_struct {
+ float2 v;
+} vec_float2_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <2 x float> @return_vec_float2(<2 x float> %x.coerce)
+vec_float2_struct return_vec_float2(vec_float2_struct x) {
+ return x;
+}
+
+typedef struct vec_char4_struct {
+ char4 v;
+} vec_char4_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <4 x i8> @return_vec_char4(<4 x i8> %x.coerce)
+vec_char4_struct return_vec_char4(vec_char4_struct x) {
+ return x;
+}
+
+typedef struct vec_and_scalar {
+ char4 v;
+ int i;
+} vec_and_scalar;
+
+// Vector + scalar - should NOT be coerced (vector is not a packable integer type)
+// CHECK-LABEL: define{{.*}} %struct.vec_and_scalar @return_vec_and_scalar(<4 x i8> %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.vec_and_scalar
+vec_and_scalar return_vec_and_scalar(vec_and_scalar x) {
+ return x;
+}
+
+// --- Arrays of nested structs ---
+
+typedef struct inner_two_chars {
+ char a, b;
+} inner_two_chars;
+
+typedef struct array_of_nested_chars {
+ inner_two_chars arr[2];
+} array_of_nested_chars;
+
+// Array of 2 nested structs, each with 2 chars = 32 bits total - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_array_of_nested_chars(i32 %x.coerce)
+array_of_nested_chars return_array_of_nested_chars(array_of_nested_chars x) {
+ return x;
+}
+
+typedef struct inner_char_float {
+ char c;
+ float f;
+} inner_char_float;
+
+typedef struct array_of_nested_floats {
+ inner_char_float arr[1];
+} array_of_nested_floats;
+
+// Array of nested struct containing float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats(%struct.inner_char_float %x.coerce)
+// CHECK: ret %struct.array_of_nested_floats
+array_of_nested_floats return_array_of_nested_floats(array_of_nested_floats x) {
+ return x;
+}
+
+typedef struct nested_array_of_shorts {
+ struct {
+ short arr[2];
+ } inner;
+} nested_array_of_shorts;
+
+// Nested struct with array of shorts = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_nested_array_of_shorts(i32 %x.coerce)
+nested_array_of_shorts return_nested_array_of_shorts(nested_array_of_shorts x) {
+ return x;
+}
>From db475cfc05d83c6fc513f452e7d40e66c71fe0d9 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Thu, 19 Mar 2026 15:01:05 +0530
Subject: [PATCH 21/22] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 4254621409619..62f5c3ec51421 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -394,8 +394,8 @@ typedef struct bitfield_small {
} bitfield_small;
// Bitfields with unsigned backing type (32-bit) - should NOT be coerced
-// The field type is 'unsigned' which is >= 32 bits
-// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i32 %x.coerce)
+// The field type is 'unsigned' which is >= 32 bits, struct layout is {i16, [2 x i8]}
+// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i16 %x.coerce0, [2 x i8] %x.coerce1)
// CHECK: ret %struct.bitfield_small
bitfield_small return_bitfield_small(bitfield_small x) {
return x;
@@ -419,7 +419,8 @@ typedef struct bitfield_with_int {
} bitfield_with_int;
// Bitfields + full int - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i32 %x.coerce0, i32 %x.coerce1)
+// Bitfield packs into i8, then padding, then i32
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i8 %x.coerce0, i32 %x.coerce1)
// CHECK: ret %struct.bitfield_with_int
bitfield_with_int return_bitfield_with_int(bitfield_with_int x) {
return x;
@@ -587,7 +588,7 @@ typedef struct array_of_nested_floats {
} array_of_nested_floats;
// Array of nested struct containing float - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats(%struct.inner_char_float %x.coerce)
+// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats([1 x %struct.inner_char_float] %x.coerce)
// CHECK: ret %struct.array_of_nested_floats
array_of_nested_floats return_array_of_nested_floats(array_of_nested_floats x) {
return x;
>From f91d53f32979b3d740cc446fc3859bcf0acb147b Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 22 Mar 2026 21:34:53 +0530
Subject: [PATCH 22/22] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 64 ++++++++++++++++++-
1 file changed, 63 insertions(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 62f5c3ec51421..38a5918417da6 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
// Test AMDGPU ABI struct coercion behavior:
@@ -514,6 +513,69 @@ four_halfs return_four_halfs(four_halfs x) {
return x;
}
+// --- Bfloat16 tests ---
+
+typedef struct bfloat_struct {
+ __bf16 a;
+ __bf16 b;
+} bfloat_struct;
+
+// Two bfloats = 32 bits, but floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bfloat_struct @return_bfloat_struct(bfloat %x.coerce0, bfloat %x.coerce1)
+// CHECK: ret %struct.bfloat_struct
+bfloat_struct return_bfloat_struct(bfloat_struct x) {
+ return x;
+}
+
+typedef struct bfloat_and_char {
+ __bf16 b;
+ char c;
+} bfloat_and_char;
+
+// Bfloat + char - should NOT be coerced (bfloat is float type)
+// CHECK-LABEL: define{{.*}} %struct.bfloat_and_char @return_bfloat_and_char(bfloat %x.coerce0, i8 %x.coerce1)
+// CHECK: ret %struct.bfloat_and_char
+bfloat_and_char return_bfloat_and_char(bfloat_and_char x) {
+ return x;
+}
+
+typedef struct four_bfloats {
+ __bf16 a, b, c, d;
+} four_bfloats;
+
+// Four bfloats = 64 bits - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.four_bfloats @return_four_bfloats(bfloat %x.coerce0, bfloat %x.coerce1, bfloat %x.coerce2, bfloat %x.coerce3)
+// CHECK: ret %struct.four_bfloats
+four_bfloats return_four_bfloats(four_bfloats x) {
+ return x;
+}
+
+// --- Mixed half and bfloat ---
+
+typedef struct mixed_half_bfloat {
+ __fp16 h;
+ __bf16 b;
+} mixed_half_bfloat;
+
+// Mixed half + bfloat - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.mixed_half_bfloat @return_mixed_half_bfloat(half %x.coerce0, bfloat %x.coerce1)
+// CHECK: ret %struct.mixed_half_bfloat
+mixed_half_bfloat return_mixed_half_bfloat(mixed_half_bfloat x) {
+ return x;
+}
+
+typedef struct bfloat_and_float {
+ __bf16 b;
+ float f;
+} bfloat_and_float;
+
+// Bfloat + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bfloat_and_float @return_bfloat_and_float(bfloat %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bfloat_and_float
+bfloat_and_float return_bfloat_and_float(bfloat_and_float x) {
+ return x;
+}
+
// --- Vectors inside structs ---
typedef int int2 __attribute__((ext_vector_type(2)));
More information about the cfe-commits
mailing list