[clang] [clang][CIR] Add lowering for fp16 intrinsics (PR #194865)

Wed Apr 29 07:00:00 PDT 2026

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/194865

This PR adds lowering for the following intrinsic groups:
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-1
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#reciprocal-estimate-1
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#reciprocal-estimate-1

It also moves the corresponding tests from:
  * clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c

to:
  * clang/test/CodeGen/AArch64/neon/fullfp16.c

The lowering follows the existing implementation in
CodeGen/TargetBuiltins/ARM.cpp.


>From f77f2f930d91074a1fdce877c1de4254eea323bf Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 29 Apr 2026 13:52:46 +0000
Subject: [PATCH] [clang][CIR] Add lowering for fp16 intrinsics

This PR adds lowering for the following intrinsic groups:
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-1
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#reciprocal-estimate-1
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#reciprocal-estimate-1

It also moves the corresponding tests from:
  * clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c

to:
  * clang/test/CodeGen/AArch64/neon/fullfp16.c

The lowering follows the existing implementation in
CodeGen/TargetBuiltins/ARM.cpp.
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  5 ++
 clang/test/CodeGen/AArch64/neon/fullfp16.c    | 68 +++++++++++++++++++
 .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c   | 35 ----------
 3 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index cac5f8eced8a7..602a310384a34 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -317,6 +317,11 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
   case NEON::BI__builtin_neon_vcvtd_n_f64_u64:
   case NEON::BI__builtin_neon_vcvtd_n_s64_f64:
   case NEON::BI__builtin_neon_vcvtd_n_u64_f64:
+  case NEON::BI__builtin_neon_vabdh_f16:
+  case NEON::BI__builtin_neon_vrecpeh_f16:
+  case NEON::BI__builtin_neon_vrecpxh_f16:
+  case NEON::BI__builtin_neon_vrsqrteh_f16:
+  case NEON::BI__builtin_neon_vrsqrtsh_f16:
     return emitNeonCall(cgf.cgm, cgf.getBuilder(),
                         {cgf.convertType(expr->getArg(0)->getType())}, ops,
                         llvmIntrName, cgf.convertType(expr->getType()), loc);
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index db0b20fe62799..056b0df253e48 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -102,6 +102,9 @@ uint16_t test_vceqzh_f16(float16_t a) {
   return vceqzh_f16(a);
 }
 
+//===------------------------------------------------------===//
+// 2.5.1.1.1. Absolute value
+//===------------------------------------------------------===//
 // ALL-LABEL: @test_vabsh_f16
 float16_t test_vabsh_f16(float16_t a) {
 // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.f16
@@ -112,6 +115,68 @@ float16_t test_vabsh_f16(float16_t a) {
   return vabsh_f16(a);
 }
 
+//===------------------------------------------------------===//
+// 2.5.1.1.2. Absolute difference
+//===------------------------------------------------------===//
+// ALL-LABEL: test_vabdh_f16
+float16_t test_vabdh_f16(float16_t a, float16_t b) {
+// CIR:  cir.call_llvm_intrinsic "aarch64.sisd.fabd" {{.*}} -> !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]], half {{.*}} [[B:%.*]])
+// LLVM:  [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half [[A]], half [[B]])
+// LLVM:  ret half [[ABD]]
+  return vabdh_f16(a, b);
+}
+
+//===------------------------------------------------------===//
+// 2.5.1.2.1.  Reciprocal estimate
+//===------------------------------------------------------===//
+// ALL-LABEL: test_vrecpeh_f16
+float16_t test_vrecpeh_f16(float16_t a) {
+// CIR:  cir.call_llvm_intrinsic "aarch64.neon.frecpe" {{.*}} -> !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]])
+// LLVM: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half [[A]])
+// LLVM: ret half [[VREC]]
+  return vrecpeh_f16(a);
+}
+
+// ALL-LABEL: test_vrecpxh_f16
+float16_t test_vrecpxh_f16(float16_t a) {
+// CIR:  cir.call_llvm_intrinsic "aarch64.neon.frecpx" {{.*}} -> !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]])
+// LLVM: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half [[A]])
+// LLVM: ret half [[VREC]]
+  return vrecpxh_f16(a);
+}
+
+//===------------------------------------------------------===//
+// 2.5.1.2.2.  Reciprocal square-root estimate
+//===------------------------------------------------------===//
+// ALL-LABEL: test_vrsqrteh_f16
+float16_t test_vrsqrteh_f16(float16_t a) {
+// CIR:  cir.call_llvm_intrinsic "aarch64.neon.frsqrte" {{.*}} -> !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]])
+// LLVM:  [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half [[A]])
+// LLVM:  ret half [[RND]]
+  return vrsqrteh_f16(a);
+}
+
+// ALL-LABEL: test_vrsqrtsh_f16
+float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
+// CIR:  cir.call_llvm_intrinsic "aarch64.neon.frsqrts" {{.*}} -> !cir.f16
+
+// LLVM-SAME: half {{.*}} [[A:%.*]], half {{.*}} [[B:%.*]])
+// LLVM:  [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half [[A]], half [[B]])
+// LLVM:  ret half [[RSQRTS]]
+  return vrsqrtsh_f16(a, b);
+}
+
+//===------------------------------------------------------===//
+// 2.5.4.1. Negate
+//===------------------------------------------------------===//
 // ALL-LABEL: @test_vnegh_f16
 float16_t test_vnegh_f16(float16_t a) {
 // CIR: cir.minus {{.*}} : !cir.f16
@@ -122,6 +187,9 @@ float16_t test_vnegh_f16(float16_t a) {
   return vnegh_f16(a);
 }
 
+//===------------------------------------------------------===//
+// 2.5.1.9.3 Fused multiply-accumulate
+//===------------------------------------------------------===//
 // ALL-LABEL: test_vfmah_f16
 float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
 // CIR: cir.call_llvm_intrinsic "fma" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index a6f830fd48ee3..0fad0e1c79fec 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -291,20 +291,6 @@ uint64_t test_vcvtph_u64_f16 (float16_t a) {
   return vcvtph_u64_f16(a);
 }
 
-// CHECK-LABEL: test_vrecpeh_f16
-// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a)
-// CHECK: ret half [[VREC]]
-float16_t test_vrecpeh_f16(float16_t a) {
-  return vrecpeh_f16(a);
-}
-
-// CHECK-LABEL: test_vrecpxh_f16
-// CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a)
-// CHECK: ret half [[VREC]]
-float16_t test_vrecpxh_f16(float16_t a) {
-  return vrecpxh_f16(a);
-}
-
 // CHECK-LABEL: test_vrndh_f16
 // CHECK:  [[RND:%.*]] =  call half @llvm.trunc.f16(half %a)
 // CHECK:  ret half [[RND]]
@@ -354,13 +340,6 @@ float16_t test_vrndxh_f16(float16_t a) {
   return vrndxh_f16(a);
 }
 
-// CHECK-LABEL: test_vrsqrteh_f16
-// CHECK:  [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a)
-// CHECK:  ret half [[RND]]
-float16_t test_vrsqrteh_f16(float16_t a) {
-  return vrsqrteh_f16(a);
-}
-
 // CHECK-LABEL: test_vsqrth_f16
 // CHECK:  [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a)
 // CHECK:  ret half [[SQR]]
@@ -368,13 +347,6 @@ float16_t test_vsqrth_f16(float16_t a) {
   return vsqrth_f16(a);
 }
 
-// CHECK-LABEL: test_vabdh_f16
-// CHECK:  [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)
-// CHECK:  ret half [[ABD]]
-float16_t test_vabdh_f16(float16_t a, float16_t b) {
-  return vabdh_f16(a, b);
-}
-
 // CHECK-LABEL: test_vcageh_f16
 // CHECK:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
 // CHECK: [[RET:%.*]] = trunc i32 [[FACG]] to i16
@@ -576,10 +548,3 @@ float16_t test_vmulxh_f16(float16_t a, float16_t b) {
 float16_t test_vrecpsh_f16(float16_t a, float16_t b) {
   return vrecpsh_f16(a, b);
 }
-
-// CHECK-LABEL: test_vrsqrtsh_f16
-// CHECK:  [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
-// CHECK:  ret half [[RSQRTS]]
-float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
-  return vrsqrtsh_f16(a, b);
-}