[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Mar 15 09:23:31 PDT 2026
https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083
>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 01/16] Create amdgpu-abi-struct-coerce.c
---
.../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+ float f;
+ int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+ return x;
+}
+
+typedef struct int_fp_pair {
+ int i;
+ float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+ return x;
+}
+
+typedef struct two_floats {
+ float a;
+ float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+ return x;
+}
+
+typedef struct two_ints {
+ int a;
+ int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+ return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+ short a;
+ short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+ return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+ char a;
+ char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+ return x;
+}
>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 02/16] Delete
clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
---
.../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 -------------------
1 file changed, 71 deletions(-)
delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
- float f;
- int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
- return x;
-}
-
-typedef struct int_fp_pair {
- int i;
- float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
- return x;
-}
-
-typedef struct two_floats {
- float a;
- float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
- return x;
-}
-
-typedef struct two_ints {
- int a;
- int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
- return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
- short a;
- short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
-small_struct return_small_struct(small_struct x) {
- return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
- char a;
- char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
- return x;
-}
>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 03/16] fix for clang abi lowering
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+ float f;
+ int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+ return x;
+}
+
+typedef struct int_fp_pair {
+ int i;
+ float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+ return x;
+}
+
+typedef struct two_floats {
+ float a;
+ float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+ return x;
+}
+
+typedef struct two_ints {
+ int a;
+ int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+ return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+ short a;
+ short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+ return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+ char a;
+ char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+ return x;
+}
>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 04/16] Update amdgcn-openmp-device-math-complex.c
---
clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df2..34c05e2974a64 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
// CHECK-LABEL: define {{.*}}test_complex_f32
#pragma omp target
{
- // CHECK: call [2 x i32] @__divsc3
- // CHECK: call [2 x i32] @__mulsc3
+ // CHECK: call { float, float } @__divsc3
+ // CHECK: call { float, float } @__mulsc3
(void)(a * (a / a));
}
}
>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 05/16] Update amdgpu-abi-struct-coerce.cl
---
.../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..a13f8e8bbe119 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
void func_struct_padding_arg(struct_padding_arg arg1) { }
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
void func_struct_char_x8(struct_char_x8 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
void func_struct_char_x4(struct_char_x4 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
void func_struct_char_x3(struct_char_x3 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
void func_struct_char_x2(struct_char_x2 arg) { }
// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
return s;
}
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 06/16] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..f3c4b5ad0837b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (Size <= 32)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
- if (Size <= 64) {
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
- }
-
if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
return ABIArgInfo::getDirect();
}
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
- // Pack aggregates <= 8 bytes into single VGPR or pair.
- uint64_t Size = getContext().getTypeSize(Ty);
- if (Size <= 64) {
- unsigned NumRegs = (Size + 31) / 32;
- NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
- if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
- if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
- // XXX: Should this be i64 instead, and should the limit increase?
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
- }
-
if (NumRegsLeft > 0) {
uint64_t NumRegs = numRegsForType(Ty);
if (NumRegsLeft >= NumRegs) {
>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 07/16] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe119..fb5ba69c86c6d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
// CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 08/16] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d..3e4506b88aac6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 09/16] Update amdgpu-abi-struct-coerce.cl
>From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:43:50 +0530
Subject: [PATCH 10/16] Update amdgpu-variadic-call.c
---
clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c
index 17eda215211a2..22402118d862f 100644
--- a/clang/test/CodeGen/amdgpu-variadic-call.c
+++ b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -217,10 +217,9 @@ typedef union
} union_f32_i32;
// CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0
// CHECK-NEXT: tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
@@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1)
}
// CHECK-LABEL: define {{[^@]+}}@multiple_two
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[V1]] to double
// CHECK-NEXT: [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0
// CHECK-NEXT: tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
// CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
>From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:00:33 +0530
Subject: [PATCH 11/16] Update amdgpu-abi-struct-coerce.cl
---
.../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 3e4506b88aac6..06d3cdb01deb2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
void func_struct_padding_arg(struct_padding_arg arg1) { }
-// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
void func_struct_char_x8(struct_char_x8 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
void func_struct_char_x4(struct_char_x4 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
void func_struct_char_x3(struct_char_x3 arg) { }
-// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
void func_struct_char_x2(struct_char_x2 arg) { }
// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret()
return s;
}
-// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
-// CHECK: ret %struct.struct_char_x8 zeroinitializer
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
struct_char_x8 func_struct_char_x8_ret()
{
struct_char_x8 s = { 0 };
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
>From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:14 +0530
Subject: [PATCH 12/16] Update amdgpu-abi-struct-coerce.c
---
clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++---
1 file changed, 48 insertions(+), 7 deletions(-)
diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2399630ff797b..f827978a8cd18 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,8 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
+// Check that structs containing floats or full-sized integers (i32, i64) are
+// NOT coerced to integer arrays. They should preserve their original types.
+// However, structs containing only sub-32-bit integer types (char, short)
+// should still be packed into i32 registers.
+
+// === Structs with floats - should NOT be coerced to integers ===
typedef struct fp_int_pair {
float f;
@@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) {
return x;
}
+// === Structs with full-sized integers - should NOT be coerced ===
+
typedef struct two_ints {
int a;
int b;
@@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) {
return x;
}
-// Structs <= 32 bits should still be coerced to i32 for return value
+// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+
+// Structs of small integers <= 32 bits should be coerced to i32
typedef struct small_struct {
short a;
short b;
} small_struct;
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
small_struct return_small_struct(small_struct x) {
return x;
}
-// Structs <= 16 bits should still be coerced to i16 for return value
+// Structs of small integers <= 16 bits should be coerced to i16
typedef struct tiny_struct {
char a;
char b;
} tiny_struct;
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
tiny_struct return_tiny_struct(tiny_struct x) {
return x;
}
+
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+ char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+ return x;
+}
+
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+ char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+ return x;
+}
+
+// === Mixed tests - floats prevent coercion even with small integers ===
+
+typedef struct char_and_float {
+ char c;
+ float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+ return x;
+}
>From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:59 +0530
Subject: [PATCH 13/16] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++--
1 file changed, 90 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f3c4b5ad0837b..9e0ca7b77ecdd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
return Members * NumRegs <= MaxNumRegsForArgsRet;
}
+/// Check if all fields in an aggregate type contain only sub-32-bit integer
+/// types. Such aggregates should be packed into i32 registers rather than
+/// passed as individual elements. Aggregates containing floats or full-sized
+/// integer types (i32, i64) should preserve their original types.
+static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD,
+ const ASTContext &Context) {
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+
+ // Recursively check nested structs
+ if (const auto *NestedRD = FieldTy->getAsRecordDecl()) {
+ if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+ return false;
+ continue;
+ }
+
+ // Arrays - check the element type
+ if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) {
+ QualType EltTy = AT->getElementType();
+ if (const auto *NestedRD = EltTy->getAsRecordDecl()) {
+ if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+ return false;
+ continue;
+ }
+ // For non-struct array elements, check if they're packable integers
+ if (!EltTy->isIntegerType())
+ return false;
+ uint64_t EltSize = Context.getTypeSize(EltTy);
+ if (EltSize >= 32)
+ return false;
+ continue;
+ }
+
+ // Floating point types should not be packed into integers
+ if (FieldTy->isFloatingType())
+ return false;
+
+ // Only integer types that are smaller than 32 bits should be packed
+ if (!FieldTy->isIntegerType())
+ return false;
+
+ uint64_t FieldSize = Context.getTypeSize(FieldTy);
+ if (FieldSize >= 32)
+ return false;
+ }
+ return true;
+}
+
/// Estimate number of registers the type will use when passed in registers.
uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
uint64_t NumRegs = 0;
@@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyReturnType(RetTy);
- // Pack aggregates <= 4 bytes into single VGPR or pair.
+ // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+ // contain sub-32-bit integer types. Aggregates with floats or full-sized
+ // integers should preserve their original types.
uint64_t Size = getContext().getTypeSize(RetTy);
- if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ if (Size <= 64) {
+ const auto *RD = RetTy->getAsRecordDecl();
+ bool ShouldPackToInt =
+ RD && containsOnlyPackableIntegerTypes(RD, getContext());
- if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ if (ShouldPackToInt) {
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+ }
if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
return ABIArgInfo::getDirect();
@@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
+ // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+ // contain sub-32-bit integer types. Aggregates with floats or full-sized
+ // integers (i32, i64) should preserve their original types.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ const auto *RD = Ty->getAsRecordDecl();
+ bool ShouldPackToInt =
+ RD && containsOnlyPackableIntegerTypes(RD, getContext());
+
+ if (ShouldPackToInt) {
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+ }
+
if (NumRegsLeft > 0) {
uint64_t NumRegs = numRegsForType(Ty);
if (NumRegsLeft >= NumRegs) {
>From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:15:11 +0530
Subject: [PATCH 14/16] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 829500383a34a..4918bdcd8111b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
if (ShouldPackToInt) {
if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
@@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
if (Size <= 16)
- return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(
+ llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
>From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:28:52 +0530
Subject: [PATCH 15/16] Update AMDGPU.cpp
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4918bdcd8111b..06b066de59055 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
if (Size <= 16)
- return ABIArgInfo::getDirect(
- llvm::Type::getInt16Ty(getVMContext()));
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
if (Size <= 32)
- return ABIArgInfo::getDirect(
- llvm::Type::getInt32Ty(getVMContext()));
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
>From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:53:13 +0530
Subject: [PATCH 16/16] Update amdgpu-abi-struct-coerce.cl
---
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32()
return s;
}
-// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret i32 0
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
transparent_u func_transparent_union_ret()
{
transparent_u u = { 0 };
More information about the cfe-commits
mailing list