[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)

Sun Mar 15 09:23:31 PDT 2026

https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083

>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 01/16] Create amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 02/16] Delete
 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 -------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
-    int i;
-    float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
-
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
-
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
-}

>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 03/16] fix for clang abi lowering

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 04/16] Update amdgcn-openmp-device-math-complex.c

---
 clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df2..34c05e2974a64 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
 // CHECK-LABEL: define {{.*}}test_complex_f32
 #pragma omp target
   {
-    // CHECK: call [2 x i32] @__divsc3
-    // CHECK: call [2 x i32] @__mulsc3
+    // CHECK: call { float, float } @__divsc3
+    // CHECK: call { float, float } @__mulsc3
     (void)(a * (a / a));
   }
 }

>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 05/16] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..a13f8e8bbe119 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 06/16] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..f3c4b5ad0837b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
       if (Size <= 32)
         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
-      if (Size <= 64) {
-        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-      }
-
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
     }
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
-    // Pack aggregates <= 8 bytes into single VGPR or pair.
-    uint64_t Size = getContext().getTypeSize(Ty);
-    if (Size <= 64) {
-      unsigned NumRegs = (Size + 31) / 32;
-      NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
-      // XXX: Should this be i64 instead, and should the limit increase?
-      llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-    }
-
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 07/16] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe119..fb5ba69c86c6d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
 
 // CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
 // CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };

>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 08/16] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d..3e4506b88aac6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 09/16] Update amdgpu-abi-struct-coerce.cl


>From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:43:50 +0530
Subject: [PATCH 10/16] Update amdgpu-variadic-call.c

---
 clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c
index 17eda215211a2..22402118d862f 100644
--- a/clang/test/CodeGen/amdgpu-variadic-call.c
+++ b/clang/test/CodeGen/amdgpu-variadic-call.c
@@ -217,10 +217,9 @@ typedef union
 } union_f32_i32;
 
 // CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]]
@@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1)
 }
 
 // CHECK-LABEL: define {{[^@]+}}@multiple_two
-// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float
 // CHECK-NEXT:    [[CONV:%.*]] = fpext float [[V1]] to double
 // CHECK-NEXT:    [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0
 // CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1
-// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0
 // CHECK-NEXT:    tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]
 // CHECK-NEXT:    tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]]

>From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:00:33 +0530
Subject: [PATCH 11/16] Update amdgpu-abi-struct-coerce.cl

---
 .../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 3e4506b88aac6..06d3cdb01deb2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
+// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
+// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
+// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
+// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
-// CHECK: ret %struct.struct_char_x8 zeroinitializer
+// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
+// CHECK: ret [2 x i32] zeroinitializer
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:14 +0530
Subject: [PATCH 12/16] Update amdgpu-abi-struct-coerce.c

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++---
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
index 2399630ff797b..f827978a8cd18 100644
--- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -1,8 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
 
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
+// Check that structs containing floats or full-sized integers (i32, i64) are
+// NOT coerced to integer arrays. They should preserve their original types.
+// However, structs containing only sub-32-bit integer types (char, short)
+// should still be packed into i32 registers.
+
+// === Structs with floats - should NOT be coerced to integers ===
 
 typedef struct fp_int_pair {
     float f;
@@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) {
     return x;
 }
 
+// === Structs with full-sized integers - should NOT be coerced ===
+
 typedef struct two_ints {
     int a;
     int b;
@@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) {
     return x;
 }
 
-// Structs <= 32 bits should still be coerced to i32 for return value
+// === Structs with only sub-32-bit integers - SHOULD be coerced ===
+
+// Structs of small integers <= 32 bits should be coerced to i32
 typedef struct small_struct {
     short a;
     short b;
 } small_struct;
 
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce)
 small_struct return_small_struct(small_struct x) {
     return x;
 }
 
-// Structs <= 16 bits should still be coerced to i16 for return value
+// Structs of small integers <= 16 bits should be coerced to i16
 typedef struct tiny_struct {
     char a;
     char b;
 } tiny_struct;
 
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce)
 tiny_struct return_tiny_struct(tiny_struct x) {
     return x;
 }
+
+// Struct of 8 chars (64 bits) should be coerced to [2 x i32]
+typedef struct eight_chars {
+    char a, b, c, d, e, f, g, h;
+} eight_chars;
+
+// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce)
+eight_chars return_eight_chars(eight_chars x) {
+    return x;
+}
+
+// Struct of 4 chars (32 bits) should be coerced to i32
+typedef struct four_chars {
+    char a, b, c, d;
+} four_chars;
+
+// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce)
+four_chars return_four_chars(four_chars x) {
+    return x;
+}
+
+// === Mixed tests - floats prevent coercion even with small integers ===
+
+typedef struct char_and_float {
+    char c;
+    float f;
+} char_and_float;
+
+// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.char_and_float
+char_and_float return_char_and_float(char_and_float x) {
+    return x;
+}

>From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:01:59 +0530
Subject: [PATCH 13/16] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f3c4b5ad0837b..9e0ca7b77ecdd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
   return Members * NumRegs <= MaxNumRegsForArgsRet;
 }
 
+/// Check if all fields in an aggregate type contain only sub-32-bit integer
+/// types. Such aggregates should be packed into i32 registers rather than
+/// passed as individual elements. Aggregates containing floats or full-sized
+/// integer types (i32, i64) should preserve their original types.
+static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD,
+                                             const ASTContext &Context) {
+  for (const FieldDecl *Field : RD->fields()) {
+    QualType FieldTy = Field->getType();
+
+    // Recursively check nested structs
+    if (const auto *NestedRD = FieldTy->getAsRecordDecl()) {
+      if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+        return false;
+      continue;
+    }
+
+    // Arrays - check the element type
+    if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) {
+      QualType EltTy = AT->getElementType();
+      if (const auto *NestedRD = EltTy->getAsRecordDecl()) {
+        if (!containsOnlyPackableIntegerTypes(NestedRD, Context))
+          return false;
+        continue;
+      }
+      // For non-struct array elements, check if they're packable integers
+      if (!EltTy->isIntegerType())
+        return false;
+      uint64_t EltSize = Context.getTypeSize(EltTy);
+      if (EltSize >= 32)
+        return false;
+      continue;
+    }
+
+    // Floating point types should not be packed into integers
+    if (FieldTy->isFloatingType())
+      return false;
+
+    // Only integer types that are smaller than 32 bits should be packed
+    if (!FieldTy->isIntegerType())
+      return false;
+
+    uint64_t FieldSize = Context.getTypeSize(FieldTy);
+    if (FieldSize >= 32)
+      return false;
+  }
+  return true;
+}
+
 /// Estimate number of registers the type will use when passed in registers.
 uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const {
   uint64_t NumRegs = 0;
@@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
           RD && RD->hasFlexibleArrayMember())
         return DefaultABIInfo::classifyReturnType(RetTy);
 
-      // Pack aggregates <= 4 bytes into single VGPR or pair.
+      // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+      // contain sub-32-bit integer types. Aggregates with floats or full-sized
+      // integers should preserve their original types.
       uint64_t Size = getContext().getTypeSize(RetTy);
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+      if (Size <= 64) {
+        const auto *RD = RetTy->getAsRecordDecl();
+        bool ShouldPackToInt =
+            RD && containsOnlyPackableIntegerTypes(RD, getContext());
 
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+        if (ShouldPackToInt) {
+          if (Size <= 16)
+            return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+          if (Size <= 32)
+            return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+          llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+          return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+        }
+      }
 
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
@@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
+    // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they
+    // contain sub-32-bit integer types. Aggregates with floats or full-sized
+    // integers (i32, i64) should preserve their original types.
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 64) {
+      const auto *RD = Ty->getAsRecordDecl();
+      bool ShouldPackToInt =
+          RD && containsOnlyPackableIntegerTypes(RD, getContext());
+
+      if (ShouldPackToInt) {
+        unsigned NumRegs = (Size + 31) / 32;
+        NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+        if (Size <= 16)
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+        if (Size <= 32)
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+      }
+    }
+
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:15:11 +0530
Subject: [PATCH 14/16] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 829500383a34a..4918bdcd8111b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
 
         if (ShouldPackToInt) {
           if (Size <= 16)
-            return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt16Ty(getVMContext()));
 
           if (Size <= 32)
-            return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+            return ABIArgInfo::getDirect(
+                llvm::Type::getInt32Ty(getVMContext()));
 
           llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
           return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
@@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(
+              llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:28:52 +0530
Subject: [PATCH 15/16] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4918bdcd8111b..06b066de59055 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
 
         if (Size <= 16)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt16Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
 
         if (Size <= 32)
-          return ABIArgInfo::getDirect(
-              llvm::Type::getInt32Ty(getVMContext()));
+          return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
         llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
         return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));

>From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001
From: Addmisol <addmisol9 at gmail.com>
Date: Sun, 15 Mar 2026 21:53:13 +0530
Subject: [PATCH 16/16] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..e9cdb7f5da32a 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32()
   return s;
 }
 
-// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
-// CHECK: ret i32 0
+// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// CHECK: ret %union.transparent_u zeroinitializer
 transparent_u func_transparent_union_ret()
 {
   transparent_u u = { 0 };