[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)

Sun Mar 22 09:05:15 PDT 2026

https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083

>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 01/22] Create amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 02/22] Delete
 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 -------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
-    int i;
-    float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
-
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
-
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
-}

>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 03/22] fix for clang abi lowering

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 04/22] Update amdgcn-openmp-device-math-complex.c

---
 clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df2..34c05e2974a64 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
 // CHECK-LABEL: define {{.*}}test_complex_f32
 #pragma omp target
   {
-    // CHECK: call [2 x i32] @__divsc3
-    // CHECK: call [2 x i32] @__mulsc3
+    // CHECK: call { float, float } @__divsc3
+    // CHECK: call { float, float } @__mulsc3
     (void)(a * (a / a));
   }
 }

>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 05/22] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..a13f8e8bbe119 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 06/22] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..f3c4b5ad0837b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
       if (Size <= 32)
         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
-      if (Size <= 64) {
-        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-      }
-
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
     }
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
-    // Pack aggregates <= 8 bytes into single VGPR or pair.
-    uint64_t Size = getContext().getTypeSize(Ty);
-    if (Size <= 64) {
-      unsigned NumRegs = (Size + 31) / 32;
-      NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
-      // XXX: Should this be i64 instead, and should the limit increase?
-      llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-    }
-
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 07/22] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe119..fb5ba69c86c6d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
 
 // CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
 // CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };

>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 08/22] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d..3e4506b88aac6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 09/22] Update amdgpu-abi-struct-coerce.cl


>From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:43:50 +0530
Subject: [PATCH 10/22] Update amdgpu-variadic-call.c

---
 clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c
index 17eda215211a2..22402118d862f 100644
--- a/clang/test/CodeGen/amdgpu-variadic-call.c
+++ b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -217,10 +217,9 @@ typedef union
 } union_f32_i32;
 
 // CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
@@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1)
 }
 
 // CHECK-LABEL: define {{[^@]+}}@multiple_two
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
 // CHECK-NEXT:    [[CONV:%.*]] = fpext float [[V1]] to double
 // CHECK-NEXT:    [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
 // CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]

>From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:00:33 +0530
Subject: [PATCH 11/22] Update amdgpu-abi-struct-coerce.cl

---
 .../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 3e4506b88aac6..06d3cdb01deb2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
-// CHECK: ret %struct.struct_char_x8 zeroinitializer
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:14 +0530
Subject: [PATCH 12/22] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++---
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2399630ff797b..f827978a8cd18 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,8 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
 
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
+// Check that structs containing floats or full-sized integers (i32, i64) are
+// NOT coerced to integer arrays. They should preserve their original types.
+// However, structs containing only sub-32-bit integer types (char, short)
+// should still be packed into i32 registers.
+
+// === Structs with floats - should NOT be coerced to integers ===
 
 typedef struct fp_int_pair {
     float f;
@@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) {
     return x;
 }
 
+// === Structs with full-sized integers - should NOT be coerced ===
+
 typedef struct two_ints {
     int a;
     int b;
@@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) {
     return x;
 }
 
-// Structs <= 32 bits should still be coerced to i32 for return value
+// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+
+// Structs of small integers <= 32 bits should be coerced to i32
 typedef struct small_struct {
     short a;
     short b;
 } small_struct;
 
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
 small_struct return_small_struct(small_struct x) {
     return x;
 }
 
-// Structs <= 16 bits should still be coerced to i16 for return value
+// Structs of small integers <= 16 bits should be coerced to i16
 typedef struct tiny_struct {
     char a;
     char b;
 } tiny_struct;
 
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
 tiny_struct return_tiny_struct(tiny_struct x) {
     return x;
 }
+
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+    char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+    return x;
+}
+
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+    char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+    return x;
+}
+
+// === Mixed tests - floats prevent coercion even with small integers ===
+
+typedef struct char_and_float {
+    char c;
+    float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+    return x;
+}

>From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:59 +0530
Subject: [PATCH 13/22] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f3c4b5ad0837b..9e0ca7b77ecdd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
   return Members * NumRegs <= MaxNumRegsForArgsRet;
 }
 
+/// Check if all fields in an aggregate type contain only sub-32-bit integer
+/// types. Such aggregates should be packed into i32 registers rather than
+/// passed as individual elements. Aggregates containing floats or full-sized
+/// integer types (i32, i64) should preserve their original types.
+static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD,
+                                             const ASTContext &Context) {
+  for (const FieldDecl *Field : RD->fields()) {
+    QualType FieldTy = Field->getType();
+
+    // Recursively check nested structs
+    if (const auto *NestedRD = FieldTy->getAsRecordDecl()) {
+      if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+        return false;
+      continue;
+    }
+
+    // Arrays - check the element type
+    if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) {
+      QualType EltTy = AT->getElementType();
+      if (const auto *NestedRD = EltTy->getAsRecordDecl()) {
+        if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+          return false;
+        continue;
+      }
+      // For non-struct array elements, check if they're packable integers
+      if (!EltTy->isIntegerType())
+        return false;
+      uint64_t EltSize = Context.getTypeSize(EltTy);
+      if (EltSize >= 32)
+        return false;
+      continue;
+    }
+
+    // Floating point types should not be packed into integers
+    if (FieldTy->isFloatingType())
+      return false;
+
+    // Only integer types that are smaller than 32 bits should be packed
+    if (!FieldTy->isIntegerType())
+      return false;
+
+    uint64_t FieldSize = Context.getTypeSize(FieldTy);
+    if (FieldSize >= 32)
+      return false;
+  }
+  return true;
+}
+
 /// Estimate number of registers the type will use when passed in registers.
 uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
   uint64_t NumRegs = 0;
@@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
           RD && RD->hasFlexibleArrayMember())
         return DefaultABIInfo::classifyReturnType(RetTy);
 
-      // Pack aggregates <= 4 bytes into single VGPR or pair.
+      // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+      // contain sub-32-bit integer types. Aggregates with floats or full-sized
+      // integers should preserve their original types.
       uint64_t Size = getContext().getTypeSize(RetTy);
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+      if (Size <= 64) {
+        const auto *RD = RetTy->getAsRecordDecl();
+        bool ShouldPackToInt =
+            RD && containsOnlyPackableIntegerTypes(RD, getContext());
 
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+        if (ShouldPackToInt) {
+          if (Size <= 16)
+            return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+          if (Size <= 32)
+            return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+          llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+          return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+        }
+      }
 
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
@@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
+    // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+    // contain sub-32-bit integer types. Aggregates with floats or full-sized
+    // integers (i32, i64) should preserve their original types.
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 64) {
+      const auto *RD = Ty->getAsRecordDecl();
+      bool ShouldPackToInt =
+          RD && containsOnlyPackableIntegerTypes(RD, getContext());
+
+      if (ShouldPackToInt) {
+        unsigned NumRegs = (Size + 31) / 32;
+        NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+        if (Size <= 16)
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+        if (Size <= 32)
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+      }
+    }
+
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:15:11 +0530
Subject: [PATCH 14/22] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 829500383a34a..4918bdcd8111b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
 
         if (ShouldPackToInt) {
           if (Size <= 16)
-            return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt16Ty(getVMContext()));
 
           if (Size <= 32)
-            return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt32Ty(getVMContext()));
 
           llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
           return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
@@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:28:52 +0530
Subject: [PATCH 15/22] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4918bdcd8111b..06b066de59055 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:53:13 +0530
Subject: [PATCH 16/22] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32()
   return s;
 }
 
-// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret i32 0
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
 transparent_u func_transparent_union_ret()
 {
   transparent_u u = { 0 };

>From 928aa4ed1558e7e2d52461df83d0f80004d317e0 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:58:43 +0530
Subject: [PATCH 17/22] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++-----------
 1 file changed, 319 insertions(+), 466 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index e9cdb7f5da32a..7857d01f431c8 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,529 +1,382 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck %s
-
-typedef __attribute__(( ext_vector_type(2) )) char char2;
-typedef __attribute__(( ext_vector_type(3) )) char char3;
-typedef __attribute__(( ext_vector_type(4) )) char char4;
-
-typedef __attribute__(( ext_vector_type(2) )) short short2;
-typedef __attribute__(( ext_vector_type(3) )) short short3;
-typedef __attribute__(( ext_vector_type(4) )) short short4;
-
-typedef __attribute__(( ext_vector_type(2) )) int int2;
-typedef __attribute__(( ext_vector_type(3) )) int int3;
-typedef __attribute__(( ext_vector_type(4) )) int int4;
-typedef __attribute__(( ext_vector_type(16) )) int int16;
-typedef __attribute__(( ext_vector_type(32) )) int int32;
-
-// CHECK: %struct.empty_struct = type {}
-typedef struct empty_struct
-{
-} empty_struct;
-
-// CHECK-NOT: %struct.single_element_struct_arg
-typedef struct single_element_struct_arg
-{
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Test AMDGPU ABI struct coercion behavior:
+// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
+// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
+//
+// This tests the fix for the issue where structs like {float, int} were incorrectly
+// coerced to [2 x i32], losing float type information.
+
+// ============================================================================
+// SECTION 1: Structs with floats - should NOT be coerced to integers
+// ============================================================================
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
     int i;
-} single_element_struct_arg_t;
-
-// CHECK-NOT: %struct.nested_single_element_struct_arg
-typedef struct nested_single_element_struct_arg
-{
-  single_element_struct_arg_t i;
-} nested_single_element_struct_arg_t;
-
-// CHECK: %struct.struct_arg = type { i32, float, i32 }
-typedef struct struct_arg
-{
-    int i1;
     float f;
-    int i2;
-} struct_arg_t;
-
-// CHECK: %struct.struct_padding_arg = type { i8, i64 }
-typedef struct struct_padding_arg
-{
-  char i1;
-  long f;
-} struct_padding_arg;
-
-// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 }
-typedef struct struct_of_arrays_arg
-{
-    int i1[2];
-    float f1;
-    int i2[4];
-    float f2[3];
-    int i3;
-} struct_of_arrays_arg_t;
-
-// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 }
-typedef struct struct_of_structs_arg
-{
-    int i1;
-    float f1;
-    struct_arg_t s1;
-    int i2;
-} struct_of_structs_arg_t;
-
-typedef union
-{
-  int b1;
-  float b2;
-} transparent_u __attribute__((__transparent_union__));
-
-// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
-typedef struct single_array_element_struct_arg
-{
-    int i[4];
-} single_array_element_struct_arg_t;
-
-// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
-// CHECK: %struct.inner = type { i32, i64 }
-typedef struct single_struct_element_struct_arg
-{
-  struct inner {
-    int a;
-    long b;
-  } s;
-} single_struct_element_struct_arg_t;
-
-// CHECK: %struct.different_size_type_pair
-typedef struct different_size_type_pair {
-  long l;
-  int i;
-} different_size_type_pair;
-
-// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
-typedef struct flexible_array
-{
-  int i;
-  int flexible[];
-} flexible_array;
-
-// CHECK: %struct.struct_arr16 = type { [16 x i32] }
-typedef struct struct_arr16
-{
-    int arr[16];
-} struct_arr16;
-
-// CHECK: %struct.struct_arr32 = type { [32 x i32] }
-typedef struct struct_arr32
-{
-    int arr[32];
-} struct_arr32;
-
-// CHECK: %struct.struct_arr33 = type { [33 x i32] }
-typedef struct struct_arr33
-{
-    int arr[33];
-} struct_arr33;
-
-// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
-typedef struct struct_char_arr32
-{
-  char arr[32];
-} struct_char_arr32;
-
-// CHECK-NOT: %struct.struct_char_x8
-typedef struct struct_char_x8 {
-  char x, y, z, w;
-  char a, b, c, d;
-} struct_char_x8;
-
-// CHECK-NOT: %struct.struct_char_x4
-typedef struct struct_char_x4 {
-  char x, y, z, w;
-} struct_char_x4;
-
-// CHECK-NOT: %struct.struct_char_x3
-typedef struct struct_char_x3 {
-  char x, y, z;
-} struct_char_x3;
-
-// CHECK-NOT: %struct.struct_char_x2
-typedef struct struct_char_x2 {
-  char x, y;
-} struct_char_x2;
-
-// CHECK-NOT: %struct.struct_char_x1
-typedef struct struct_char_x1 {
-  char x;
-} struct_char_x1;
-
-// 4 registers from fields, 5 if padding included.
-// CHECK: %struct.nested = type { i8, i64 }
-// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
-typedef struct num_regs_nested_struct {
-  int x;
-  struct nested {
-    char z;
-    long y;
-  } inner;
-} num_regs_nested_struct;
-
-// CHECK: %struct.double_nested = type { %struct.inner_inner }
-// CHECK: %struct.inner_inner = type { i8, i32, i8 }
-// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 }
-typedef struct double_nested_struct {
-  int x;
-  struct double_nested {
-    struct inner_inner {
-      char y;
-      int q;
-      char z;
-    } inner_inner;
-  } inner;
-
-  short w;
-} double_nested_struct;
-
-// This is a large struct, but uses fewer registers than the limit.
-// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
-typedef struct large_struct_padding {
-  char e0;
-  int e1;
-  char e2;
-  int e3;
-  char e4;
-  char e5;
-  short e6;
-  short e7;
-  char e8[3];
-  long e9;
-  int e10;
-  char e11;
-  int e12;
-  short e13;
-  char e14;
-} large_struct_padding;
-
-// The number of registers computed should be 6, not 8.
-typedef struct int3_pair {
-	int3 dx;
-	int3 dy;
-} int3_pair;
-
-// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
-typedef struct struct_4regs
-{
-  int x;
-  int y;
-  int z;
-  int w;
-} struct_4regs;
-
-// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
-__kernel void kernel_empty_struct_arg(empty_struct s) { }
-
-// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
-__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
-__kernel void kernel_struct_arg(struct_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
-__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
-
-// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4)
-__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
-
-// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3)
-__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
-
-// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
-__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
-
-// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce)
-__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
-__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
-
-// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
-// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
-__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
-
-// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
-void func_f32_arg(float arg) { }
-
-// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
-void func_v2i16_arg(short2 arg) { }
-
-// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
-void func_v3i32_arg(int3 arg) { }
-
-// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
-void func_v4i32_arg(int4 arg) { }
-
-// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
-void func_v16i32_arg(int16 arg) { }
-
-// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
-void func_v32i32_arg(int32 arg) { }
+} int_fp_pair;
 
-// CHECK: define{{.*}} void @func_empty_struct_arg()
-void func_empty_struct_arg(empty_struct empty) { }
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
 
-// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
-void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
 
-// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
-void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
 
-// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
-void func_struct_arg(struct_arg_t arg1) { }
+// Double precision floats
+typedef struct double_struct {
+    double d;
+} double_struct;
 
-// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
-void func_struct_padding_arg(struct_padding_arg arg1) { }
+// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
+double_struct return_double_struct(double_struct x) {
+    return x;
+}
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
-void func_struct_char_x8(struct_char_x8 arg) { }
+// ============================================================================
+// SECTION 2: Structs with full-sized integers - should NOT be coerced
+// ============================================================================
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
-void func_struct_char_x4(struct_char_x4 arg) { }
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
-void func_struct_char_x3(struct_char_x3 arg) { }
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
-void func_struct_char_x2(struct_char_x2 arg) { }
+typedef struct single_int {
+    int a;
+} single_int;
 
-// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
-void func_struct_char_x1(struct_char_x1 arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
+single_int return_single_int(single_int x) {
+    return x;
+}
 
-// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
-void func_transparent_union_arg(transparent_u u) { }
+typedef struct int64_struct {
+    long long a;
+} int64_struct;
 
-// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
-void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
+int64_struct return_int64_struct(int64_struct x) {
+    return x;
+}
 
-// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
-void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+// ============================================================================
+// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
+// ============================================================================
 
-// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
-void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
+// Structs of small integers <= 32 bits should be coerced to i32
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
 
-// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone byval(%struct.flexible_array) align 4 captures(none) %arg)
-void func_flexible_array_arg(flexible_array arg) { }
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
 
-// CHECK: define{{.*}} float @func_f32_ret()
-float func_f32_ret()
-{
-  return 0.0f;
+// Structs of small integers <= 16 bits should be coerced to i16
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_empty_struct_ret()
-empty_struct func_empty_struct_ret()
-{
-  empty_struct s = {};
-  return s;
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+    char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @single_element_struct_ret()
-// CHECK: ret i32 0
-single_element_struct_arg_t single_element_struct_ret()
-{
-  single_element_struct_arg_t s = { 0 };
-  return s;
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+    char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
-// CHECK: ret i32 0
-nested_single_element_struct_arg_t nested_single_element_struct_ret()
-{
-  nested_single_element_struct_arg_t s = { 0 };
-  return s;
+// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
+typedef struct four_shorts {
+    short a, b, c, d;
+} four_shorts;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
+four_shorts return_four_shorts(four_shorts x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
-// CHECK: ret %struct.struct_arg zeroinitializer
-struct_arg_t func_struct_ret()
-{
-  struct_arg_t s = { 0 };
-  return s;
+// ============================================================================
+// SECTION 4: Mixed types - floats prevent coercion even with small integers
+// ============================================================================
+
+typedef struct char_and_float {
+    char c;
+    float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
-// CHECK: ret %struct.struct_padding_arg zeroinitializer
-struct_padding_arg func_struct_padding_ret()
-{
-  struct_padding_arg s = { 0 };
-  return s;
+typedef struct short_and_float {
+    short s;
+    float f;
+} short_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.short_and_float
+short_and_float return_short_and_float(short_and_float x) {
+    return x;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
-struct_char_x8 func_struct_char_x8_ret()
-{
-  struct_char_x8 s = { 0 };
-  return s;
+// Small int + full-sized int should NOT be coerced
+typedef struct char_and_int {
+    char c;
+    int i;
+} char_and_int;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_and_int
+char_and_int return_char_and_int(char_and_int x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
-// CHECK: ret i32 0
-struct_char_x4 func_struct_char_x4_ret()
-{
-  struct_char_x4 s = { 0 };
-  return s;
+// ============================================================================
+// SECTION 5: Exotic/Complex aggregates (per reviewer request)
+// ============================================================================
+
+// --- Nested structs ---
+
+typedef struct inner_chars {
+    char a, b;
+} inner_chars;
+
+typedef struct outer_with_inner_chars {
+    inner_chars inner;
+    char c, d;
+} outer_with_inner_chars;
+
+// All chars, 32 bits total - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
+outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
-// CHECK: ret i32 0
-struct_char_x3 func_struct_char_x3_ret()
-{
-  struct_char_x3 s = { 0 };
-  return s;
+typedef struct inner_with_float {
+    char c;
+    float f;
+} inner_with_float;
+
+typedef struct outer_with_float_inner {
+    inner_with_float inner;
+} outer_with_float_inner;
+
+// Nested struct contains float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
+// CHECK: ret %struct.outer_with_float_inner
+outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
-struct_char_x2 func_struct_char_x2_ret()
-{
-  struct_char_x2 s = { 0 };
-  return s;
+// --- Arrays within structs ---
+
+typedef struct char_array_struct {
+    char arr[4];
+} char_array_struct;
+
+// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
+char_array_struct return_char_array(char_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
-// CHECK: ret i8 0
-struct_char_x1 func_struct_char_x1_ret()
-{
-  struct_char_x1 s = { 0 };
-  return s;
+typedef struct short_array_struct {
+    short arr[2];
+} short_array_struct;
+
+// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
+short_array_struct return_short_array(short_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
-// CHECK: ret %struct.struct_arr16 zeroinitializer
-struct_arr16 func_ret_struct_arr16()
-{
-  struct_arr16 s = { 0 };
-  return s;
+typedef struct int_array_struct {
+    int arr[2];
+} int_array_struct;
+
+// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
+// CHECK: ret %struct.int_array_struct
+int_array_struct return_int_array(int_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 captures(none) initializes((0, 128)) %agg.result)
-struct_arr32 func_ret_struct_arr32()
-{
-  struct_arr32 s = { 0 };
-  return s;
+typedef struct float_array_struct {
+    float arr[2];
+} float_array_struct;
+
+// Array of 2 floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
+// CHECK: ret %struct.float_array_struct
+float_array_struct return_float_array(float_array_struct x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 captures(none) initializes((0, 132)) %agg.result)
-struct_arr33 func_ret_struct_arr33()
-{
-  struct_arr33 s = { 0 };
-  return s;
+// --- Complex combinations ---
+
+typedef struct mixed_nested {
+    struct {
+        char a;
+        char b;
+    } inner;
+    short s;
+} mixed_nested;
+
+// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
+mixed_nested return_mixed_nested(mixed_nested x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
-struct_char_arr32 func_ret_struct_char_arr32()
-{
-  struct_char_arr32 s = { 0 };
-  return s;
+typedef struct deeply_nested_chars {
+    struct {
+        struct {
+            char a, b;
+        } level2;
+        char c, d;
+    } level1;
+} deeply_nested_chars;
+
+// Deeply nested, but all chars = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
+deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret %union.transparent_u zeroinitializer
-transparent_u func_transparent_union_ret()
-{
-  transparent_u u = { 0 };
-  return u;
+typedef struct deeply_nested_with_float {
+    struct {
+        struct {
+            char a;
+            float f;  // Float buried deep
+        } level2;
+    } level1;
+} deeply_nested_with_float;
+
+// Float buried in nested struct - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
+// CHECK: ret %struct.deeply_nested_with_float
+deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
+    return x;
 }
 
-// CHECK: define{{.*}} %struct.different_size_type_pair @func_different_size_type_pair_ret()
-different_size_type_pair func_different_size_type_pair_ret()
-{
-  different_size_type_pair s = { 0 };
-  return s;
+// --- Edge cases ---
+
+// Single char
+typedef struct single_char {
+    char c;
+} single_char;
+
+// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
+single_char return_single_char(single_char x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 captures(none) initializes((0, 4)) %agg.result)
-flexible_array func_flexible_array_ret()
-{
-  flexible_array s = { 0 };
-  return s;
+// Three chars (24 bits, rounds up to 32)
+typedef struct three_chars {
+    char a, b, c;
+} three_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
+three_chars return_three_chars(three_chars x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
-void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
+// Five chars (40 bits, rounds up to 64)
+typedef struct five_chars {
+    char a, b, c, d, e;
+} five_chars;
 
-// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 captures(none) %{{.*}})
-void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
+five_chars return_five_chars(five_chars x) {
+    return x;
+}
 
-// XXX - Why don't the inner structs flatten?
-// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 captures(none) %{{.*}})
-void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
+// --- Union tests ---
 
-// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
-void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { }
+typedef union char_int_union {
+    char c;
+    int i;
+} char_int_union;
 
-// CHECK: define{{.*}} %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
-double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
-  double_nested_struct s = { 0 };
-  return s;
+// Union with int - preserves union type
+// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
+char_int_union return_char_int_union(char_int_union x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
-void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
+typedef union float_int_union {
+    float f;
+    int i;
+} float_int_union;
 
-// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 captures(none) %{{.*}})
-void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
-  *out = arg;
+// Union with float - preserves union type
+// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
+float_int_union return_float_int_union(float_int_union x) {
+    return x;
 }
 
-// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
-void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { }
-
-// Function signature from blender, nothing should be passed byval. The v3i32
-// should not count as 4 passed registers.
-// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
-void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
-
-// Each short4 should fit pack into 2 registers.
-// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
-                     short4 arg4, short4 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
-                               short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
-                     short3 arg4, short3 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
-                          short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
-void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
-                     short2 arg4, short2 arg5, short2 arg6, short2 arg7,
-                     short2 arg8, short2 arg9, short2 arg10, short2 arg11,
-                     struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
-                          short2 arg4, short2 arg5, short2 arg6, short2 arg7,
-                          short2 arg8, short2 arg9, short2 arg10, short2 arg11,
-                          short2 arg12, struct_4regs arg13) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
-void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
-                    char2 arg4, char2 arg5, struct_4regs arg6) { }
-
-// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
-void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
-                         char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
-
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
-void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }
+// --- Padding scenarios ---
+
+typedef struct char_with_padding {
+    char c;
+    // 3 bytes padding
+    int i;
+} char_with_padding;
+
+// Has int, should NOT be coerced even though small + padding
+// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_with_padding
+char_with_padding return_char_with_padding(char_with_padding x) {
+    return x;
+}

>From 8657523a6b425b05207956fc43db0025cd13fa51 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 22:00:44 +0530
Subject: [PATCH 18/22] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++++++-------
 1 file changed, 466 insertions(+), 319 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 7857d01f431c8..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,382 +1,529 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Test AMDGPU ABI struct coercion behavior:
-// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
-// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
-//
-// This tests the fix for the issue where structs like {float, int} were incorrectly
-// coerced to [2 x i32], losing float type information.
-
-// ============================================================================
-// SECTION 1: Structs with floats - should NOT be coerced to integers
-// ============================================================================
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+typedef __attribute__(( ext_vector_type(2) )) char char2;
+typedef __attribute__(( ext_vector_type(3) )) char char3;
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+typedef __attribute__(( ext_vector_type(2) )) short short2;
+typedef __attribute__(( ext_vector_type(3) )) short short3;
+typedef __attribute__(( ext_vector_type(4) )) short short4;
+
+typedef __attribute__(( ext_vector_type(2) )) int int2;
+typedef __attribute__(( ext_vector_type(3) )) int int3;
+typedef __attribute__(( ext_vector_type(4) )) int int4;
+typedef __attribute__(( ext_vector_type(16) )) int int16;
+typedef __attribute__(( ext_vector_type(32) )) int int32;
+
+// CHECK: %struct.empty_struct = type {}
+typedef struct empty_struct
+{
+} empty_struct;
+
+// CHECK-NOT: %struct.single_element_struct_arg
+typedef struct single_element_struct_arg
+{
     int i;
+} single_element_struct_arg_t;
+
+// CHECK-NOT: %struct.nested_single_element_struct_arg
+typedef struct nested_single_element_struct_arg
+{
+  single_element_struct_arg_t i;
+} nested_single_element_struct_arg_t;
+
+// CHECK: %struct.struct_arg = type { i32, float, i32 }
+typedef struct struct_arg
+{
+    int i1;
     float f;
-} int_fp_pair;
+    int i2;
+} struct_arg_t;
+
+// CHECK: %struct.struct_padding_arg = type { i8, i64 }
+typedef struct struct_padding_arg
+{
+  char i1;
+  long f;
+} struct_padding_arg;
+
+// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 }
+typedef struct struct_of_arrays_arg
+{
+    int i1[2];
+    float f1;
+    int i2[4];
+    float f2[3];
+    int i3;
+} struct_of_arrays_arg_t;
+
+// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 }
+typedef struct struct_of_structs_arg
+{
+    int i1;
+    float f1;
+    struct_arg_t s1;
+    int i2;
+} struct_of_structs_arg_t;
+
+typedef union
+{
+  int b1;
+  float b2;
+} transparent_u __attribute__((__transparent_union__));
+
+// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] }
+typedef struct single_array_element_struct_arg
+{
+    int i[4];
+} single_array_element_struct_arg_t;
+
+// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner }
+// CHECK: %struct.inner = type { i32, i64 }
+typedef struct single_struct_element_struct_arg
+{
+  struct inner {
+    int a;
+    long b;
+  } s;
+} single_struct_element_struct_arg_t;
+
+// CHECK: %struct.different_size_type_pair
+typedef struct different_size_type_pair {
+  long l;
+  int i;
+} different_size_type_pair;
+
+// CHECK: %struct.flexible_array = type { i32, [0 x i32] }
+typedef struct flexible_array
+{
+  int i;
+  int flexible[];
+} flexible_array;
+
+// CHECK: %struct.struct_arr16 = type { [16 x i32] }
+typedef struct struct_arr16
+{
+    int arr[16];
+} struct_arr16;
+
+// CHECK: %struct.struct_arr32 = type { [32 x i32] }
+typedef struct struct_arr32
+{
+    int arr[32];
+} struct_arr32;
+
+// CHECK: %struct.struct_arr33 = type { [33 x i32] }
+typedef struct struct_arr33
+{
+    int arr[33];
+} struct_arr33;
+
+// CHECK: %struct.struct_char_arr32 = type { [32 x i8] }
+typedef struct struct_char_arr32
+{
+  char arr[32];
+} struct_char_arr32;
+
+// CHECK-NOT: %struct.struct_char_x8
+typedef struct struct_char_x8 {
+  char x, y, z, w;
+  char a, b, c, d;
+} struct_char_x8;
+
+// CHECK-NOT: %struct.struct_char_x4
+typedef struct struct_char_x4 {
+  char x, y, z, w;
+} struct_char_x4;
+
+// CHECK-NOT: %struct.struct_char_x3
+typedef struct struct_char_x3 {
+  char x, y, z;
+} struct_char_x3;
+
+// CHECK-NOT: %struct.struct_char_x2
+typedef struct struct_char_x2 {
+  char x, y;
+} struct_char_x2;
+
+// CHECK-NOT: %struct.struct_char_x1
+typedef struct struct_char_x1 {
+  char x;
+} struct_char_x1;
+
+// 4 registers from fields, 5 if padding included.
+// CHECK: %struct.nested = type { i8, i64 }
+// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested }
+typedef struct num_regs_nested_struct {
+  int x;
+  struct nested {
+    char z;
+    long y;
+  } inner;
+} num_regs_nested_struct;
+
+// CHECK: %struct.double_nested = type { %struct.inner_inner }
+// CHECK: %struct.inner_inner = type { i8, i32, i8 }
+// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 }
+typedef struct double_nested_struct {
+  int x;
+  struct double_nested {
+    struct inner_inner {
+      char y;
+      int q;
+      char z;
+    } inner_inner;
+  } inner;
+
+  short w;
+} double_nested_struct;
+
+// This is a large struct, but uses fewer registers than the limit.
+// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 }
+typedef struct large_struct_padding {
+  char e0;
+  int e1;
+  char e2;
+  int e3;
+  char e4;
+  char e5;
+  short e6;
+  short e7;
+  char e8[3];
+  long e9;
+  int e10;
+  char e11;
+  int e12;
+  short e13;
+  char e14;
+} large_struct_padding;
+
+// The number of registers computed should be 6, not 8.
+typedef struct int3_pair {
+	int3 dx;
+	int3 dy;
+} int3_pair;
+
+// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 }
+typedef struct struct_4regs
+{
+  int x;
+  int y;
+  int z;
+  int w;
+} struct_4regs;
+
+// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
+__kernel void kernel_empty_struct_arg(empty_struct s) { }
+
+// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
+__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
+__kernel void kernel_struct_arg(struct_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
+
+// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4)
+__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
+
+// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3)
+__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
+
+// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
+__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
+
+// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
+__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
+
+// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
+__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
+
+// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
+void func_f32_arg(float arg) { }
+
+// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg)
+void func_v2i16_arg(short2 arg) { }
+
+// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg)
+void func_v3i32_arg(int3 arg) { }
+
+// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg)
+void func_v4i32_arg(int4 arg) { }
+
+// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg)
+void func_v16i32_arg(int16 arg) { }
+
+// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg)
+void func_v32i32_arg(int32 arg) { }
 
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_empty_struct_arg()
+void func_empty_struct_arg(empty_struct empty) { }
 
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
+// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce)
+void func_single_element_struct_arg(single_element_struct_arg_t arg1) { }
 
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
+// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce)
+void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
 
-// Double precision floats
-typedef struct double_struct {
-    double d;
-} double_struct;
+// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
+void func_struct_arg(struct_arg_t arg1) { }
 
-// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
-double_struct return_double_struct(double_struct x) {
-    return x;
-}
+// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
+void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// ============================================================================
-// SECTION 2: Structs with full-sized integers - should NOT be coerced
-// ============================================================================
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+void func_struct_char_x8(struct_char_x8 arg) { }
 
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+void func_struct_char_x3(struct_char_x3 arg) { }
 
-typedef struct single_int {
-    int a;
-} single_int;
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+void func_struct_char_x2(struct_char_x2 arg) { }
 
-// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
-single_int return_single_int(single_int x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
+void func_struct_char_x1(struct_char_x1 arg) { }
 
-typedef struct int64_struct {
-    long long a;
-} int64_struct;
+// CHECK: void @func_transparent_union_arg(i32 %u.coerce)
+void func_transparent_union_arg(transparent_u u) { }
 
-// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
-int64_struct return_int64_struct(int64_struct x) {
-    return x;
-}
+// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce)
+void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
 
-// ============================================================================
-// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
-// ============================================================================
+// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
+void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
 
-// Structs of small integers <= 32 bits should be coerced to i32
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
+// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
+void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
+// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone byval(%struct.flexible_array) align 4 captures(none) %arg)
+void func_flexible_array_arg(flexible_array arg) { }
 
-// Structs of small integers <= 16 bits should be coerced to i16
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
+// CHECK: define{{.*}} float @func_f32_ret()
+float func_f32_ret()
+{
+  return 0.0f;
 }
 
-// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
-typedef struct eight_chars {
-    char a, b, c, d, e, f, g, h;
-} eight_chars;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
-eight_chars return_eight_chars(eight_chars x) {
-    return x;
+// CHECK: define{{.*}} void @func_empty_struct_ret()
+empty_struct func_empty_struct_ret()
+{
+  empty_struct s = {};
+  return s;
 }
 
-// Struct of 4 chars (32 bits) should be coerced to i32
-typedef struct four_chars {
-    char a, b, c, d;
-} four_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
-four_chars return_four_chars(four_chars x) {
-    return x;
+// CHECK: define{{.*}} i32 @single_element_struct_ret()
+// CHECK: ret i32 0
+single_element_struct_arg_t single_element_struct_ret()
+{
+  single_element_struct_arg_t s = { 0 };
+  return s;
 }
 
-// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
-typedef struct four_shorts {
-    short a, b, c, d;
-} four_shorts;
-
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
-four_shorts return_four_shorts(four_shorts x) {
-    return x;
+// CHECK: define{{.*}} i32 @nested_single_element_struct_ret()
+// CHECK: ret i32 0
+nested_single_element_struct_arg_t nested_single_element_struct_ret()
+{
+  nested_single_element_struct_arg_t s = { 0 };
+  return s;
 }
 
-// ============================================================================
-// SECTION 4: Mixed types - floats prevent coercion even with small integers
-// ============================================================================
-
-typedef struct char_and_float {
-    char c;
-    float f;
-} char_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.char_and_float
-char_and_float return_char_and_float(char_and_float x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret()
+// CHECK: ret %struct.struct_arg zeroinitializer
+struct_arg_t func_struct_ret()
+{
+  struct_arg_t s = { 0 };
+  return s;
 }
 
-typedef struct short_and_float {
-    short s;
-    float f;
-} short_and_float;
-
-// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.short_and_float
-short_and_float return_short_and_float(short_and_float x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret()
+// CHECK: ret %struct.struct_padding_arg zeroinitializer
+struct_padding_arg func_struct_padding_ret()
+{
+  struct_padding_arg s = { 0 };
+  return s;
 }
 
-// Small int + full-sized int should NOT be coerced
-typedef struct char_and_int {
-    char c;
-    int i;
-} char_and_int;
-
-// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_and_int
-char_and_int return_char_and_int(char_and_int x) {
-    return x;
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
+struct_char_x8 func_struct_char_x8_ret()
+{
+  struct_char_x8 s = { 0 };
+  return s;
 }
 
-// ============================================================================
-// SECTION 5: Exotic/Complex aggregates (per reviewer request)
-// ============================================================================
-
-// --- Nested structs ---
-
-typedef struct inner_chars {
-    char a, b;
-} inner_chars;
-
-typedef struct outer_with_inner_chars {
-    inner_chars inner;
-    char c, d;
-} outer_with_inner_chars;
-
-// All chars, 32 bits total - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
-outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
-    return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x4_ret()
+// CHECK: ret i32 0
+struct_char_x4 func_struct_char_x4_ret()
+{
+  struct_char_x4 s = { 0 };
+  return s;
 }
 
-typedef struct inner_with_float {
-    char c;
-    float f;
-} inner_with_float;
-
-typedef struct outer_with_float_inner {
-    inner_with_float inner;
-} outer_with_float_inner;
-
-// Nested struct contains float - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
-// CHECK: ret %struct.outer_with_float_inner
-outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
-    return x;
+// CHECK: define{{.*}} i32 @func_struct_char_x3_ret()
+// CHECK: ret i32 0
+struct_char_x3 func_struct_char_x3_ret()
+{
+  struct_char_x3 s = { 0 };
+  return s;
 }
 
-// --- Arrays within structs ---
-
-typedef struct char_array_struct {
-    char arr[4];
-} char_array_struct;
-
-// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
-char_array_struct return_char_array(char_array_struct x) {
-    return x;
+// CHECK: define{{.*}} i16 @func_struct_char_x2_ret()
+struct_char_x2 func_struct_char_x2_ret()
+{
+  struct_char_x2 s = { 0 };
+  return s;
 }
 
-typedef struct short_array_struct {
-    short arr[2];
-} short_array_struct;
-
-// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
-// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
-short_array_struct return_short_array(short_array_struct x) {
-    return x;
+// CHECK: define{{.*}} i8 @func_struct_char_x1_ret()
+// CHECK: ret i8 0
+struct_char_x1 func_struct_char_x1_ret()
+{
+  struct_char_x1 s = { 0 };
+  return s;
 }
 
-typedef struct int_array_struct {
-    int arr[2];
-} int_array_struct;
-
-// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
-// CHECK: ret %struct.int_array_struct
-int_array_struct return_int_array(int_array_struct x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16()
+// CHECK: ret %struct.struct_arr16 zeroinitializer
+struct_arr16 func_ret_struct_arr16()
+{
+  struct_arr16 s = { 0 };
+  return s;
 }
 
-typedef struct float_array_struct {
-    float arr[2];
-} float_array_struct;
-
-// Array of 2 floats - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
-// CHECK: ret %struct.float_array_struct
-float_array_struct return_float_array(float_array_struct x) {
-    return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 captures(none) initializes((0, 128)) %agg.result)
+struct_arr32 func_ret_struct_arr32()
+{
+  struct_arr32 s = { 0 };
+  return s;
 }
 
-// --- Complex combinations ---
-
-typedef struct mixed_nested {
-    struct {
-        char a;
-        char b;
-    } inner;
-    short s;
-} mixed_nested;
-
-// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
-// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
-mixed_nested return_mixed_nested(mixed_nested x) {
-    return x;
+// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 captures(none) initializes((0, 132)) %agg.result)
+struct_arr33 func_ret_struct_arr33()
+{
+  struct_arr33 s = { 0 };
+  return s;
 }
 
-typedef struct deeply_nested_chars {
-    struct {
-        struct {
-            char a, b;
-        } level2;
-        char c, d;
-    } level1;
-} deeply_nested_chars;
-
-// Deeply nested, but all chars = 32 bits - should be coerced
-// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
-deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
-    return x;
+// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32()
+struct_char_arr32 func_ret_struct_char_arr32()
+{
+  struct_char_arr32 s = { 0 };
+  return s;
 }
 
-typedef struct deeply_nested_with_float {
-    struct {
-        struct {
-            char a;
-            float f;  // Float buried deep
-        } level2;
-    } level1;
-} deeply_nested_with_float;
-
-// Float buried in nested struct - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
-// CHECK: ret %struct.deeply_nested_with_float
-deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
-    return x;
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
+transparent_u func_transparent_union_ret()
+{
+  transparent_u u = { 0 };
+  return u;
 }
 
-// --- Edge cases ---
-
-// Single char
-typedef struct single_char {
-    char c;
-} single_char;
-
-// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
-single_char return_single_char(single_char x) {
-    return x;
+// CHECK: define{{.*}} %struct.different_size_type_pair @func_different_size_type_pair_ret()
+different_size_type_pair func_different_size_type_pair_ret()
+{
+  different_size_type_pair s = { 0 };
+  return s;
 }
 
-// Three chars (24 bits, rounds up to 32)
-typedef struct three_chars {
-    char a, b, c;
-} three_chars;
-
-// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
-three_chars return_three_chars(three_chars x) {
-    return x;
+// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 captures(none) initializes((0, 4)) %agg.result)
+flexible_array func_flexible_array_ret()
+{
+  flexible_array s = { 0 };
+  return s;
 }
 
-// Five chars (40 bits, rounds up to 64)
-typedef struct five_chars {
-    char a, b, c, d, e;
-} five_chars;
+// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
+void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
-five_chars return_five_chars(five_chars x) {
-    return x;
-}
+// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 captures(none) %{{.*}})
+void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
-// --- Union tests ---
+// XXX - Why don't the inner structs flatten?
+// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 captures(none) %{{.*}})
+void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
-typedef union char_int_union {
-    char c;
-    int i;
-} char_int_union;
+// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
+void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { }
 
-// Union with int - preserves union type
-// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
-char_int_union return_char_int_union(char_int_union x) {
-    return x;
+// CHECK: define{{.*}} %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1)
+double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
+  double_nested_struct s = { 0 };
+  return s;
 }
 
-typedef union float_int_union {
-    float f;
-    int i;
-} float_int_union;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
+void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// Union with float - preserves union type
-// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
-float_int_union return_float_int_union(float_int_union x) {
-    return x;
+// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 captures(none) %{{.*}})
+void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
+  *out = arg;
 }
 
-// --- Padding scenarios ---
-
-typedef struct char_with_padding {
-    char c;
-    // 3 bytes padding
-    int i;
-} char_with_padding;
-
-// Has int, should NOT be coerced even though small + padding
-// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.char_with_padding
-char_with_padding return_char_with_padding(char_with_padding x) {
-    return x;
-}
+// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2)
+void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { }
+
+// Function signature from blender, nothing should be passed byval. The v3i32
+// should not count as 4 passed registers.
+// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5)
+void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
+
+// Each short4 should fit pack into 2 registers.
+// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+                     short4 arg4, short4 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
+                               short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                     short3 arg4, short3 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
+                          short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3)
+void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                     short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                     short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                     struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
+                          short2 arg4, short2 arg5, short2 arg6, short2 arg7,
+                          short2 arg8, short2 arg9, short2 arg10, short2 arg11,
+                          short2 arg12, struct_4regs arg13) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3)
+void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                    char2 arg4, char2 arg5, struct_4regs arg6) { }
+
+// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}})
+void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
+                         char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
+
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From 0b1021e3f49fcf9d4f776439c2415a090813e90d Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 22:01:12 +0530
Subject: [PATCH 19/22] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 286 +++++++++++++++++-
 1 file changed, 278 insertions(+), 8 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index f827978a8cd18..7857d01f431c8 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,12 +1,16 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
 
-// Check that structs containing floats or full-sized integers (i32, i64) are
-// NOT coerced to integer arrays. They should preserve their original types.
-// However, structs containing only sub-32-bit integer types (char, short)
-// should still be packed into i32 registers.
+// Test AMDGPU ABI struct coercion behavior:
+// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers
+// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types
+//
+// This tests the fix for the issue where structs like {float, int} were incorrectly
+// coerced to [2 x i32], losing float type information.
 
-// === Structs with floats - should NOT be coerced to integers ===
+// ============================================================================
+// SECTION 1: Structs with floats - should NOT be coerced to integers
+// ============================================================================
 
 typedef struct fp_int_pair {
     float f;
@@ -41,7 +45,19 @@ two_floats return_two_floats(two_floats x) {
     return x;
 }
 
-// === Structs with full-sized integers - should NOT be coerced ===
+// Double precision floats
+typedef struct double_struct {
+    double d;
+} double_struct;
+
+// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce)
+double_struct return_double_struct(double_struct x) {
+    return x;
+}
+
+// ============================================================================
+// SECTION 2: Structs with full-sized integers - should NOT be coerced
+// ============================================================================
 
 typedef struct two_ints {
     int a;
@@ -54,7 +70,27 @@ two_ints return_two_ints(two_ints x) {
     return x;
 }
 
-// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+typedef struct single_int {
+    int a;
+} single_int;
+
+// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce)
+single_int return_single_int(single_int x) {
+    return x;
+}
+
+typedef struct int64_struct {
+    long long a;
+} int64_struct;
+
+// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce)
+int64_struct return_int64_struct(int64_struct x) {
+    return x;
+}
+
+// ============================================================================
+// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced
+// ============================================================================
 
 // Structs of small integers <= 32 bits should be coerced to i32
 typedef struct small_struct {
@@ -98,7 +134,19 @@ four_chars return_four_chars(four_chars x) {
     return x;
 }
 
-// === Mixed tests - floats prevent coercion even with small integers ===
+// Struct of 4 shorts (64 bits) should be coerced to [2 x i32]
+typedef struct four_shorts {
+    short a, b, c, d;
+} four_shorts;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce)
+four_shorts return_four_shorts(four_shorts x) {
+    return x;
+}
+
+// ============================================================================
+// SECTION 4: Mixed types - floats prevent coercion even with small integers
+// ============================================================================
 
 typedef struct char_and_float {
     char c;
@@ -110,3 +158,225 @@ typedef struct char_and_float {
 char_and_float return_char_and_float(char_and_float x) {
     return x;
 }
+
+typedef struct short_and_float {
+    short s;
+    float f;
+} short_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.short_and_float
+short_and_float return_short_and_float(short_and_float x) {
+    return x;
+}
+
+// Small int + full-sized int should NOT be coerced
+typedef struct char_and_int {
+    char c;
+    int i;
+} char_and_int;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_and_int
+char_and_int return_char_and_int(char_and_int x) {
+    return x;
+}
+
+// ============================================================================
+// SECTION 5: Exotic/Complex aggregates (per reviewer request)
+// ============================================================================
+
+// --- Nested structs ---
+
+typedef struct inner_chars {
+    char a, b;
+} inner_chars;
+
+typedef struct outer_with_inner_chars {
+    inner_chars inner;
+    char c, d;
+} outer_with_inner_chars;
+
+// All chars, 32 bits total - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce)
+outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) {
+    return x;
+}
+
+typedef struct inner_with_float {
+    char c;
+    float f;
+} inner_with_float;
+
+typedef struct outer_with_float_inner {
+    inner_with_float inner;
+} outer_with_float_inner;
+
+// Nested struct contains float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce)
+// CHECK: ret %struct.outer_with_float_inner
+outer_with_float_inner return_nested_with_float(outer_with_float_inner x) {
+    return x;
+}
+
+// --- Arrays within structs ---
+
+typedef struct char_array_struct {
+    char arr[4];
+} char_array_struct;
+
+// Array of 4 chars = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce)
+char_array_struct return_char_array(char_array_struct x) {
+    return x;
+}
+
+typedef struct short_array_struct {
+    short arr[2];
+} short_array_struct;
+
+// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce)
+short_array_struct return_short_array(short_array_struct x) {
+    return x;
+}
+
+typedef struct int_array_struct {
+    int arr[2];
+} int_array_struct;
+
+// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce)
+// CHECK: ret %struct.int_array_struct
+int_array_struct return_int_array(int_array_struct x) {
+    return x;
+}
+
+typedef struct float_array_struct {
+    float arr[2];
+} float_array_struct;
+
+// Array of 2 floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce)
+// CHECK: ret %struct.float_array_struct
+float_array_struct return_float_array(float_array_struct x) {
+    return x;
+}
+
+// --- Complex combinations ---
+
+typedef struct mixed_nested {
+    struct {
+        char a;
+        char b;
+    } inner;
+    short s;
+} mixed_nested;
+
+// All small integers (nested anonymous struct + short) = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce)
+mixed_nested return_mixed_nested(mixed_nested x) {
+    return x;
+}
+
+typedef struct deeply_nested_chars {
+    struct {
+        struct {
+            char a, b;
+        } level2;
+        char c, d;
+    } level1;
+} deeply_nested_chars;
+
+// Deeply nested, but all chars = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce)
+deeply_nested_chars return_deeply_nested(deeply_nested_chars x) {
+    return x;
+}
+
+typedef struct deeply_nested_with_float {
+    struct {
+        struct {
+            char a;
+            float f;  // Float buried deep
+        } level2;
+    } level1;
+} deeply_nested_with_float;
+
+// Float buried in nested struct - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float
+// CHECK: ret %struct.deeply_nested_with_float
+deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) {
+    return x;
+}
+
+// --- Edge cases ---
+
+// Single char
+typedef struct single_char {
+    char c;
+} single_char;
+
+// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce)
+single_char return_single_char(single_char x) {
+    return x;
+}
+
+// Three chars (24 bits, rounds up to 32)
+typedef struct three_chars {
+    char a, b, c;
+} three_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce)
+three_chars return_three_chars(three_chars x) {
+    return x;
+}
+
+// Five chars (40 bits, rounds up to 64)
+typedef struct five_chars {
+    char a, b, c, d, e;
+} five_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce)
+five_chars return_five_chars(five_chars x) {
+    return x;
+}
+
+// --- Union tests ---
+
+typedef union char_int_union {
+    char c;
+    int i;
+} char_int_union;
+
+// Union with int - preserves union type
+// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce)
+char_int_union return_char_int_union(char_int_union x) {
+    return x;
+}
+
+typedef union float_int_union {
+    float f;
+    int i;
+} float_int_union;
+
+// Union with float - preserves union type
+// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce)
+float_int_union return_float_int_union(float_int_union x) {
+    return x;
+}
+
+// --- Padding scenarios ---
+
+typedef struct char_with_padding {
+    char c;
+    // 3 bytes padding
+    int i;
+} char_with_padding;
+
+// Has int, should NOT be coerced even though small + padding
+// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.char_with_padding
+char_with_padding return_char_with_padding(char_with_padding x) {
+    return x;
+}

>From cd01f6b98ff395d73775296d0b1b116232c317b7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Thu, 19 Mar 2026 14:34:20 +0530
Subject: [PATCH 20/22] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 224 ++++++++++++++++++
 1 file changed, 224 insertions(+)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 7857d01f431c8..4254621409619 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -380,3 +380,227 @@ typedef struct char_with_padding {
 char_with_padding return_char_with_padding(char_with_padding x) {
     return x;
 }
+
+// ============================================================================
+// SECTION 6: Additional exotic aggregates
+// ============================================================================
+
+// --- Bitfields ---
+
+typedef struct bitfield_small {
+    unsigned a : 4;
+    unsigned b : 4;
+    unsigned c : 8;
+} bitfield_small;
+
+// Bitfields with unsigned backing type (32-bit) - should NOT be coerced
+// The field type is 'unsigned' which is >= 32 bits
+// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i32 %x.coerce)
+// CHECK: ret %struct.bitfield_small
+bitfield_small return_bitfield_small(bitfield_small x) {
+    return x;
+}
+
+typedef struct bitfield_chars {
+    char a : 4;
+    char b : 4;
+} bitfield_chars;
+
+// Bitfields with char backing type (8-bit) - should be coerced to i16
+// CHECK-LABEL: define{{.*}} i16 @return_bitfield_chars(i16 %x.coerce)
+bitfield_chars return_bitfield_chars(bitfield_chars x) {
+    return x;
+}
+
+typedef struct bitfield_with_int {
+    unsigned a : 4;
+    unsigned b : 4;
+    int i;
+} bitfield_with_int;
+
+// Bitfields + full int - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.bitfield_with_int
+bitfield_with_int return_bitfield_with_int(bitfield_with_int x) {
+    return x;
+}
+
+typedef struct bitfield_with_float {
+    unsigned a : 16;
+    float f;
+} bitfield_with_float;
+
+// Bitfield + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_float @return_bitfield_with_float(i16 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bitfield_with_float
+bitfield_with_float return_bitfield_with_float(bitfield_with_float x) {
+    return x;
+}
+
+// --- _Bool fields ---
+
+typedef struct bool_struct {
+    _Bool a;
+    _Bool b;
+    _Bool c;
+    _Bool d;
+} bool_struct;
+
+// 4 bools = 32 bits, all sub-32-bit - should be coerced to i32
+// CHECK-LABEL: define{{.*}} i32 @return_bool_struct(i32 %x.coerce)
+bool_struct return_bool_struct(bool_struct x) {
+    return x;
+}
+
+typedef struct bool_and_float {
+    _Bool b;
+    float f;
+} bool_and_float;
+
+// Bool + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bool_and_float @return_bool_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bool_and_float
+bool_and_float return_bool_and_float(bool_and_float x) {
+    return x;
+}
+
+typedef struct bool_and_int {
+    _Bool b;
+    int i;
+} bool_and_int;
+
+// Bool + int - should NOT be coerced (int is full-sized)
+// CHECK-LABEL: define{{.*}} %struct.bool_and_int @return_bool_and_int(i8 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.bool_and_int
+bool_and_int return_bool_and_int(bool_and_int x) {
+    return x;
+}
+
+// --- Half-precision floats ---
+
+typedef struct half_struct {
+    __fp16 a;
+    __fp16 b;
+} half_struct;
+
+// Two halfs = 32 bits, but floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.half_struct @return_half_struct(half %x.coerce0, half %x.coerce1)
+// CHECK: ret %struct.half_struct
+half_struct return_half_struct(half_struct x) {
+    return x;
+}
+
+typedef struct half_and_char {
+    __fp16 h;
+    char c;
+} half_and_char;
+
+// Half + char - should NOT be coerced (half is float type)
+// CHECK-LABEL: define{{.*}} %struct.half_and_char @return_half_and_char(half %x.coerce0, i8 %x.coerce1)
+// CHECK: ret %struct.half_and_char
+half_and_char return_half_and_char(half_and_char x) {
+    return x;
+}
+
+typedef struct four_halfs {
+    __fp16 a, b, c, d;
+} four_halfs;
+
+// Four halfs = 64 bits - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.four_halfs @return_four_halfs(half %x.coerce0, half %x.coerce1, half %x.coerce2, half %x.coerce3)
+// CHECK: ret %struct.four_halfs
+four_halfs return_four_halfs(four_halfs x) {
+    return x;
+}
+
+// --- Vectors inside structs ---
+
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+
+typedef struct vec_int2_struct {
+    int2 v;
+} vec_int2_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <2 x i32> @return_vec_int2(<2 x i32> %x.coerce)
+vec_int2_struct return_vec_int2(vec_int2_struct x) {
+    return x;
+}
+
+typedef struct vec_float2_struct {
+    float2 v;
+} vec_float2_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <2 x float> @return_vec_float2(<2 x float> %x.coerce)
+vec_float2_struct return_vec_float2(vec_float2_struct x) {
+    return x;
+}
+
+typedef struct vec_char4_struct {
+    char4 v;
+} vec_char4_struct;
+
+// Single-element vector struct - unwrapped to vector type
+// CHECK-LABEL: define{{.*}} <4 x i8> @return_vec_char4(<4 x i8> %x.coerce)
+vec_char4_struct return_vec_char4(vec_char4_struct x) {
+    return x;
+}
+
+typedef struct vec_and_scalar {
+    char4 v;
+    int i;
+} vec_and_scalar;
+
+// Vector + scalar - should NOT be coerced (vector is not a packable integer type)
+// CHECK-LABEL: define{{.*}} %struct.vec_and_scalar @return_vec_and_scalar(<4 x i8> %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.vec_and_scalar
+vec_and_scalar return_vec_and_scalar(vec_and_scalar x) {
+    return x;
+}
+
+// --- Arrays of nested structs ---
+
+typedef struct inner_two_chars {
+    char a, b;
+} inner_two_chars;
+
+typedef struct array_of_nested_chars {
+    inner_two_chars arr[2];
+} array_of_nested_chars;
+
+// Array of 2 nested structs, each with 2 chars = 32 bits total - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_array_of_nested_chars(i32 %x.coerce)
+array_of_nested_chars return_array_of_nested_chars(array_of_nested_chars x) {
+    return x;
+}
+
+typedef struct inner_char_float {
+    char c;
+    float f;
+} inner_char_float;
+
+typedef struct array_of_nested_floats {
+    inner_char_float arr[1];
+} array_of_nested_floats;
+
+// Array of nested struct containing float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats(%struct.inner_char_float %x.coerce)
+// CHECK: ret %struct.array_of_nested_floats
+array_of_nested_floats return_array_of_nested_floats(array_of_nested_floats x) {
+    return x;
+}
+
+typedef struct nested_array_of_shorts {
+    struct {
+        short arr[2];
+    } inner;
+} nested_array_of_shorts;
+
+// Nested struct with array of shorts = 32 bits - should be coerced
+// CHECK-LABEL: define{{.*}} i32 @return_nested_array_of_shorts(i32 %x.coerce)
+nested_array_of_shorts return_nested_array_of_shorts(nested_array_of_shorts x) {
+    return x;
+}

>From db475cfc05d83c6fc513f452e7d40e66c71fe0d9 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Thu, 19 Mar 2026 15:01:05 +0530
Subject: [PATCH 21/22] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 4254621409619..62f5c3ec51421 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -394,8 +394,8 @@ typedef struct bitfield_small {
 } bitfield_small;
 
 // Bitfields with unsigned backing type (32-bit) - should NOT be coerced
-// The field type is 'unsigned' which is >= 32 bits
-// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i32 %x.coerce)
+// The field type is 'unsigned' which is >= 32 bits, struct layout is {i16, [2 x i8]}
+// CHECK-LABEL: define{{.*}} %struct.bitfield_small @return_bitfield_small(i16 %x.coerce0, [2 x i8] %x.coerce1)
 // CHECK: ret %struct.bitfield_small
 bitfield_small return_bitfield_small(bitfield_small x) {
     return x;
@@ -419,7 +419,8 @@ typedef struct bitfield_with_int {
 } bitfield_with_int;
 
 // Bitfields + full int - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i32 %x.coerce0, i32 %x.coerce1)
+// Bitfield packs into i8, then padding, then i32
+// CHECK-LABEL: define{{.*}} %struct.bitfield_with_int @return_bitfield_with_int(i8 %x.coerce0, i32 %x.coerce1)
 // CHECK: ret %struct.bitfield_with_int
 bitfield_with_int return_bitfield_with_int(bitfield_with_int x) {
     return x;
@@ -587,7 +588,7 @@ typedef struct array_of_nested_floats {
 } array_of_nested_floats;
 
 // Array of nested struct containing float - should NOT be coerced
-// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats(%struct.inner_char_float %x.coerce)
+// CHECK-LABEL: define{{.*}} %struct.array_of_nested_floats @return_array_of_nested_floats([1 x %struct.inner_char_float] %x.coerce)
 // CHECK: ret %struct.array_of_nested_floats
 array_of_nested_floats return_array_of_nested_floats(array_of_nested_floats x) {
     return x;

>From f91d53f32979b3d740cc446fc3859bcf0acb147b Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 22 Mar 2026 21:34:53 +0530
Subject: [PATCH 22/22] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 64 ++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 62f5c3ec51421..38a5918417da6 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
 
 // Test AMDGPU ABI struct coercion behavior:
@@ -514,6 +513,69 @@ four_halfs return_four_halfs(four_halfs x) {
     return x;
 }
 
+// --- Bfloat16 tests ---
+
+typedef struct bfloat_struct {
+    __bf16 a;
+    __bf16 b;
+} bfloat_struct;
+
+// Two bfloats = 32 bits, but floats - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bfloat_struct @return_bfloat_struct(bfloat %x.coerce0, bfloat %x.coerce1)
+// CHECK: ret %struct.bfloat_struct
+bfloat_struct return_bfloat_struct(bfloat_struct x) {
+    return x;
+}
+
+typedef struct bfloat_and_char {
+    __bf16 b;
+    char c;
+} bfloat_and_char;
+
+// Bfloat + char - should NOT be coerced (bfloat is float type)
+// CHECK-LABEL: define{{.*}} %struct.bfloat_and_char @return_bfloat_and_char(bfloat %x.coerce0, i8 %x.coerce1)
+// CHECK: ret %struct.bfloat_and_char
+bfloat_and_char return_bfloat_and_char(bfloat_and_char x) {
+    return x;
+}
+
+typedef struct four_bfloats {
+    __bf16 a, b, c, d;
+} four_bfloats;
+
+// Four bfloats = 64 bits - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.four_bfloats @return_four_bfloats(bfloat %x.coerce0, bfloat %x.coerce1, bfloat %x.coerce2, bfloat %x.coerce3)
+// CHECK: ret %struct.four_bfloats
+four_bfloats return_four_bfloats(four_bfloats x) {
+    return x;
+}
+
+// --- Mixed half and bfloat ---
+
+typedef struct mixed_half_bfloat {
+    __fp16 h;
+    __bf16 b;
+} mixed_half_bfloat;
+
+// Mixed half + bfloat - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.mixed_half_bfloat @return_mixed_half_bfloat(half %x.coerce0, bfloat %x.coerce1)
+// CHECK: ret %struct.mixed_half_bfloat
+mixed_half_bfloat return_mixed_half_bfloat(mixed_half_bfloat x) {
+    return x;
+}
+
+typedef struct bfloat_and_float {
+    __bf16 b;
+    float f;
+} bfloat_and_float;
+
+// Bfloat + float - should NOT be coerced
+// CHECK-LABEL: define{{.*}} %struct.bfloat_and_float @return_bfloat_and_float(bfloat %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.bfloat_and_float
+bfloat_and_float return_bfloat_and_float(bfloat_and_float x) {
+    return x;
+}
+
 // --- Vectors inside structs ---
 
 typedef int int2 __attribute__((ext_vector_type(2)));