[clang] [clang][LoongArch] Add support for the _Float16 type (PR #141703)

Tue May 27 19:14:50 PDT 2025

https://github.com/Ami-zhang created https://github.com/llvm/llvm-project/pull/141703

Enable _Float16 for LoongArch target. Additionally, this change fixes incorrect ABI lowering of _Float16 in the case of structs containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR. Finally, it also fixes int16 -> __fp16 conversion code gen, which uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.

>From 537b973a7bab435eb6a569bc1466c7768fae17e6 Mon Sep 17 00:00:00 2001
From: Ami-zhang <zhanglimin at loongson.cn>
Date: Thu, 17 Apr 2025 15:59:05 +0800
Subject: [PATCH] [clang][LoongArch] Add support for the _Float16 type

Enable _Float16 for LoongArch target. Additionally, this change
fixes incorrect ABI lowering of _Float16 in the case of structs
containing fp16 that are eligible for passing via GPR+FPR or FPR+FPR.
Finally, it also fixes int16 -> __fp16 conversion code gen, which
uses generic LLVM IR rather than llvm.convert.to.fp16 intrinsics.
---
 clang/docs/LanguageExtensions.rst             |  1 +
 clang/lib/Basic/Targets/LoongArch.h           |  3 +
 clang/lib/CodeGen/Targets/LoongArch.cpp       |  7 +-
 clang/test/CodeGen/LoongArch/__fp16-convert.c | 30 ++++++++
 clang/test/CodeGen/LoongArch/abi-lp64d.c      | 71 +++++++++++++++++++
 5 files changed, 108 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGen/LoongArch/__fp16-convert.c

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a40dd4d1a1673..088f01a0199e4 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1001,6 +1001,7 @@ to ``float``; see below for more information on this emulation.
   * X86 (if SSE2 is available; natively if AVX512-FP16 is also available)
   * RISC-V (natively if Zfh or Zhinx is available)
   * SystemZ (emulated)
+  * LoongArch
 
 * ``__bf16`` is supported on the following targets (currently never natively):
 
diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h
index 4c7b53abfef9b..8a8c978ab89db 100644
--- a/clang/lib/Basic/Targets/LoongArch.h
+++ b/clang/lib/Basic/Targets/LoongArch.h
@@ -53,6 +53,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
     LongDoubleAlign = 128;
     LongDoubleFormat = &llvm::APFloat::IEEEquad();
     MCountName = "_mcount";
+    HasFloat16 = true;
     SuitableAlign = 128;
     WCharType = SignedInt;
     WIntType = UnsignedInt;
@@ -98,6 +99,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo {
 
   bool hasBitIntType() const override { return true; }
 
+  bool useFP16ConversionIntrinsics() const override { return false; }
+
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
 
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 0f689371a60db..7640f3779816a 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -110,10 +110,9 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
     uint64_t Size = getContext().getTypeSize(Ty);
     if (IsInt && Size > GRLen)
       return false;
-    // Can't be eligible if larger than the FP registers. Half precision isn't
-    // currently supported on LoongArch and the ABI hasn't been confirmed, so
-    // default to the integer ABI in that case.
-    if (IsFloat && (Size > FRLen || Size < 32))
+    // Can't be eligible if larger than the FP registers. Handling of half
+    // precision values has been specified in the ABI, so don't block those.
+    if (IsFloat && Size > FRLen)
       return false;
     // Can't be eligible if an integer type was already found (int+int pairs
     // are not eligible).
diff --git a/clang/test/CodeGen/LoongArch/__fp16-convert.c b/clang/test/CodeGen/LoongArch/__fp16-convert.c
new file mode 100644
index 0000000000000..84ef5de960b47
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/__fp16-convert.c
@@ -0,0 +1,30 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+
+__fp16 y;
+short z;
+// CHECK-LABEL: define dso_local void @bar1(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr @y, align 2
+// CHECK-NEXT:    [[CONV:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[CONV]] to i16
+// CHECK-NEXT:    store i16 [[CONV1]], ptr @z, align 2
+// CHECK-NEXT:    ret void
+//
+void bar1(){
+    z = y;
+}
+// CHECK-LABEL: define dso_local void @bar2(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @z, align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to float
+// CHECK-NEXT:    [[CONV1:%.*]] = fptrunc float [[CONV]] to half
+// CHECK-NEXT:    store half [[CONV1]], ptr @y, align 2
+// CHECK-NEXT:    ret void
+//
+void bar2(){
+    y = z;
+}
diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c
index fc7f1eada586b..9f64cfd662e5f 100644
--- a/clang/test/CodeGen/LoongArch/abi-lp64d.c
+++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c
@@ -48,6 +48,9 @@ unsigned long check_ulong() { return 0; }
 // CHECK-LABEL: define{{.*}} i64 @check_ulonglong()
 unsigned long long check_ulonglong() { return 0; }
 
+// CHECK-LABEL: define{{.*}}  half @check_float16()
+_Float16 check_float16() { return 0; }
+
 // CHECK-LABEL: define{{.*}} float @check_float()
 float check_float() { return 0; }
 
@@ -127,6 +130,14 @@ struct i16x4_s f_i16x4_s(struct i16x4_s x) {
 /// available, the value is passed in a GAR; if no GAR is available, the value
 /// is passed on the stack.
 
+struct f16x1_s {
+  __fp16 a;
+};
+
+struct float16x1_s {
+  _Float16 a;
+};
+
 struct f32x1_s {
   float a;
 };
@@ -135,6 +146,16 @@ struct f64x1_s {
   double a;
 };
 
+// CHECK-LABEL: define{{.*}} half @f_f16x1_s(half %0)
+struct f16x1_s f_f16x1_s(struct f16x1_s x) {
+  return x;
+}
+
+// CHECK-LABEL: define{{.*}} half @f_float16x1_s(half %0)
+struct float16x1_s f_float16x1_s(struct float16x1_s x) {
+  return x;
+}
+
 // CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0)
 struct f32x1_s f_f32x1_s(struct f32x1_s x) {
   return x;
@@ -151,10 +172,20 @@ struct f64x1_s f_f64x1_s(struct f64x1_s x) {
 /// number of available FAR is less than 2, it’s passed in a GAR, and passed on
 /// the stack if no GAR is available.
 
+struct f16x2_s {
+  __fp16 a;
+  _Float16 b;
+};
+
 struct f32x2_s {
   float a, b;
 };
 
+// CHECK-LABEL: define{{.*}} { half, half } @f_f16x2_s(half %0, half %1)
+struct f16x2_s f_f16x2_s(struct f16x2_s x) {
+  return x;
+}
+
 // CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1)
 struct f32x2_s f_f32x2_s(struct f32x2_s x) {
   return x;
@@ -165,11 +196,21 @@ struct f32x2_s f_f32x2_s(struct f32x2_s x) {
 /// i. Multiple fixed-point members. If there are available GAR, the structure
 /// is passed in a GAR, and passed on the stack if no GAR is available.
 
+struct f16x1_i16x2_s {
+  _Float16 a;
+  int16_t b, c;
+};
+
 struct f32x1_i16x2_s {
   float a;
   int16_t b, c;
 };
 
+// CHECK-LABEL: define{{.*}} i64 @f_f16x1_i16x2_s(i64 %x.coerce)
+struct f16x1_i16x2_s f_f16x1_i16x2_s(struct f16x1_i16x2_s x) {
+  return x;
+}
+
 // CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce)
 struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
   return x;
@@ -181,11 +222,21 @@ struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) {
 /// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s
 /// passed on the stack.
 
+struct f16x1_i32x1_s {
+  _Float16 a;
+  int32_t b;
+};
+
 struct f32x1_i32x1_s {
   float a;
   int32_t b;
 };
 
+// CHECK-LABEL: define{{.*}} { half, i32 } @f_f16x1_i32x1_s(half %0, i32 %1)
+struct f16x1_i32x1_s f_f16x1_i32x1_s(struct f16x1_i32x1_s x) {
+  return x;
+}
+
 // CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1)
 struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) {
   return x;
@@ -253,6 +304,16 @@ struct f32x4_s f_f32x4_s(struct f32x4_s x) {
   return x;
 }
 
+struct f16x5_s {
+  _Float16 a, b, c, d;
+  __fp16 e;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x5_s([2 x i64] %x.coerce)
+struct f16x5_s f_f16x5_s(struct f16x5_s x) {
+  return x;
+}
+
 /// ii. The structure with two double members is passed in a pair of available
 /// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with
 /// one double member and one float member is same.
@@ -312,6 +373,16 @@ struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) {
   return x;
 }
 
+struct f16x4_i32x2_s {
+  _Float16 a, b, c, d;
+  int32_t e, f;
+};
+
+// CHECK-LABEL: define{{.*}} [2 x i64] @f_f16x4_i32x2_s([2 x i64] %x.coerce)
+struct f16x4_i32x2_s f_f16x4_i32x2_s(struct f16x4_i32x2_s x) {
+  return x;
+}
+
 /// 3. WOA > 2 × GRLEN
 /// a. It’s passed by reference and are replaced in the argument list with the
 /// address. If there is an available GAR, the reference is passed in the GAR,