[clang] [AMDGPU] Stop coercing structs with FP and int fields to integer arrays (PR #185083)

Fri Mar 6 12:07:57 PST 2026

https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083

>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:56:34 +0530
Subject: [PATCH 1/9] Create amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:57:11 +0530
Subject: [PATCH 2/9] Delete
 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

---
 .../test/CodeGen/amdgpu-abi-struct-coerce.c   | 71 -------------------
 1 file changed, 71 deletions(-)
 delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
deleted file mode 100644
index 2399630ff797b..0000000000000
--- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
-
-// Check that structs containing mixed float and int types are not coerced
-// to integer arrays. They should preserve the original struct type and
-// individual field types.
-
-typedef struct fp_int_pair {
-    float f;
-    int i;
-} fp_int_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.fp_int_pair
-fp_int_pair return_fp_int_pair(fp_int_pair x) {
-    return x;
-}
-
-typedef struct int_fp_pair {
-    int i;
-    float f;
-} int_fp_pair;
-
-// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.int_fp_pair
-int_fp_pair return_int_fp_pair(int_fp_pair x) {
-    return x;
-}
-
-typedef struct two_floats {
-    float a;
-    float b;
-} two_floats;
-
-// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
-// CHECK: ret %struct.two_floats
-two_floats return_two_floats(two_floats x) {
-    return x;
-}
-
-typedef struct two_ints {
-    int a;
-    int b;
-} two_ints;
-
-// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
-// CHECK: ret %struct.two_ints
-two_ints return_two_ints(two_ints x) {
-    return x;
-}
-
-// Structs <= 32 bits should still be coerced to i32 for return value
-typedef struct small_struct {
-    short a;
-    short b;
-} small_struct;
-
-// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
-small_struct return_small_struct(small_struct x) {
-    return x;
-}
-
-// Structs <= 16 bits should still be coerced to i16 for return value
-typedef struct tiny_struct {
-    char a;
-    char b;
-} tiny_struct;
-
-// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
-tiny_struct return_tiny_struct(tiny_struct x) {
-    return x;
-}

>From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Fri, 6 Mar 2026 23:58:43 +0530
Subject: [PATCH 3/9] fix for clang abi lowering

---
 clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c

diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
new file mode 100644
index 0000000000000..2399630ff797b
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+
+// Check that structs containing mixed float and int types are not coerced
+// to integer arrays. They should preserve the original struct type and
+// individual field types.
+
+typedef struct fp_int_pair {
+    float f;
+    int i;
+} fp_int_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.fp_int_pair
+fp_int_pair return_fp_int_pair(fp_int_pair x) {
+    return x;
+}
+
+typedef struct int_fp_pair {
+    int i;
+    float f;
+} int_fp_pair;
+
+// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.int_fp_pair
+int_fp_pair return_int_fp_pair(int_fp_pair x) {
+    return x;
+}
+
+typedef struct two_floats {
+    float a;
+    float b;
+} two_floats;
+
+// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1)
+// CHECK: ret %struct.two_floats
+two_floats return_two_floats(two_floats x) {
+    return x;
+}
+
+typedef struct two_ints {
+    int a;
+    int b;
+} two_ints;
+
+// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1)
+// CHECK: ret %struct.two_ints
+two_ints return_two_ints(two_ints x) {
+    return x;
+}
+
+// Structs <= 32 bits should still be coerced to i32 for return value
+typedef struct small_struct {
+    short a;
+    short b;
+} small_struct;
+
+// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1)
+small_struct return_small_struct(small_struct x) {
+    return x;
+}
+
+// Structs <= 16 bits should still be coerced to i16 for return value
+typedef struct tiny_struct {
+    char a;
+    char b;
+} tiny_struct;
+
+// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1)
+tiny_struct return_tiny_struct(tiny_struct x) {
+    return x;
+}

>From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:00:29 +0530
Subject: [PATCH 4/9] Update amdgcn-openmp-device-math-complex.c

---
 clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
index b347cf4716df2..34c05e2974a64 100644
--- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c
+++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c
@@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) {
 // CHECK-LABEL: define {{.*}}test_complex_f32
 #pragma omp target
   {
-    // CHECK: call [2 x i32] @__divsc3
-    // CHECK: call [2 x i32] @__mulsc3
+    // CHECK: call { float, float } @__divsc3
+    // CHECK: call { float, float } @__mulsc3
     (void)(a * (a / a));
   }
 }

>From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:11:07 +0530
Subject: [PATCH 5/9] Update amdgpu-abi-struct-coerce.cl

---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 06d3cdb01deb2..a13f8e8bbe119 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { }
 // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
 void func_struct_padding_arg(struct_padding_arg arg1) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7)
 void func_struct_char_x8(struct_char_x8 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3)
 void func_struct_char_x4(struct_char_x4 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2)
 void func_struct_char_x3(struct_char_x3 arg) { }
 
-// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce)
+// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1)
 void func_struct_char_x2(struct_char_x2 arg) { }
 
 // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce)
@@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret()
   return s;
 }
 
-// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret()
-// CHECK: ret [2 x i32] zeroinitializer
+// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
+// CHECK: ret %struct.struct_char_x8 zeroinitializer
+ struct_char_x8 func_struct_char_x8_ret()
+ {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };
@@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5)
+// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:13:09 +0530
Subject: [PATCH 6/9] Update AMDGPU.cpp

---
 clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4ac7f42289d6d..f3c4b5ad0837b 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
       if (Size <= 32)
         return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
 
-      if (Size <= 64) {
-        llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-        return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-      }
-
       if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
         return ABIArgInfo::getDirect();
     }
@@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
         RD && RD->hasFlexibleArrayMember())
       return DefaultABIInfo::classifyArgumentType(Ty);
 
-    // Pack aggregates <= 8 bytes into single VGPR or pair.
-    uint64_t Size = getContext().getTypeSize(Ty);
-    if (Size <= 64) {
-      unsigned NumRegs = (Size + 31) / 32;
-      NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
-
-      if (Size <= 16)
-        return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-
-      if (Size <= 32)
-        return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
-
-      // XXX: Should this be i64 instead, and should the limit increase?
-      llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
-    }
-
     if (NumRegsLeft > 0) {
       uint64_t NumRegs = numRegsForType(Ty);
       if (NumRegsLeft >= NumRegs) {

>From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 00:31:52 +0530
Subject: [PATCH 7/9] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index a13f8e8bbe119..fb5ba69c86c6d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret()
 
 // CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret()
 // CHECK: ret %struct.struct_char_x8 zeroinitializer
- struct_char_x8 func_struct_char_x8_ret()
- {
 struct_char_x8 func_struct_char_x8_ret()
 {
   struct_char_x8 s = { 0 };

>From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:35:56 +0530
Subject: [PATCH 8/9] Update amdgpu-abi-struct-coerce.cl

---
 clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index fb5ba69c86c6d..3e4506b88aac6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
-// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5)
+// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5)
 void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { }

>From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol at users.noreply.github.com>
Date: Sat, 7 Mar 2026 01:37:44 +0530
Subject: [PATCH 9/9] Update amdgpu-abi-struct-coerce.cl