[clang] [clang][CIR][AArch64] Add lowering for conversion intrinsics (PR #199990)

Wed Jun 10 08:50:56 PDT 2026

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/199990

>From 48d8c6f89bd966e5d58a9dfa6c48cd1d856751a1 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 27 May 2026 13:46:42 +0000
Subject: [PATCH 1/3] [clang][CIR][AArch64] Add lowering for conversion
 intrinsics

This PR adds lowering for intrinsic from the following groups:
  * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions
  * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2

It continues the work started in #190961 and #193273. This PR implements
conversions from integer to FP types where the bit-wdith does not
change:
  * vcvt_s64_f64
  * vcvt_u64_f64
  * vcvt_s32_f32
  * vcvtq_s32_f32
  * vcvtq_s64_f64
  * vcvt_u32_f32
  * vcvtq_u32_f32
  * vcvtq_u64_f64
  * vcvt_s16_f16
  * vcvtq_s16_f16
  * vcvt_u16_f16
  * vcvtq_u16_f16

The corresponding tests are moved from:
  * clang/test/CodeGen/AArch64/

to:
  * clang/test/CodeGen/AArch64/neon/

The lowering follows the existing implementation in
CodeGen/TargetBuiltins/ARM.cpp.
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  15 ++-
 clang/test/CodeGen/AArch64/neon-intrinsics.c  |  28 -----
 clang/test/CodeGen/AArch64/neon-misc.c        |  78 ------------
 clang/test/CodeGen/AArch64/neon/fullfp16.c    |  59 +++++++++
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 114 ++++++++++++++++++
 .../CodeGen/AArch64/v8.2a-neon-intrinsics.c   |  52 --------
 6 files changed, 187 insertions(+), 159 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 4111df26d241d..c888b7cfce17e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -761,7 +761,20 @@ static mlir::Value emitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcvtq_s64_v:
   case NEON::BI__builtin_neon_vcvtq_u64_v:
   case NEON::BI__builtin_neon_vcvtq_s16_f16:
-  case NEON::BI__builtin_neon_vcvtq_u16_f16:
+  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
+    auto ty = getFloatNeonType(cgf, neonType);
+    // Undo the bitcast inserted by intrinsics that expand to this builtin
+    // (e.g. vcvt_u32_f32).
+    // TODO: While the bitcasts eventually cancel each other out, we should
+    // avoid them altogether.
+    ops[0] =
+        cgf.getBuilder().createCast(loc, cir::CastKind::bitcast, ops[0], ty);
+    assert(!cir::MissingFeatures::emitConstrainedFPCall());
+    // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
+    llvm::StringRef llvmIntrName = usgn ? "fptoui.sat" : "fptosi.sat";
+    return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
+                        /*argTypes=*/{ty}, ops, llvmIntrName, vTy, loc);
+  }
   case NEON::BI__builtin_neon_vcvta_s16_f16:
   case NEON::BI__builtin_neon_vcvta_s32_v:
   case NEON::BI__builtin_neon_vcvta_s64_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 5865c4cf61b50..4de430ad57c10 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -18645,34 +18645,6 @@ float64x1_t test_vneg_f64(float64x1_t a) {
   return vneg_f64(a);
 }
 
-// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT:    [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]])
-// CHECK-NEXT:    ret <1 x i64> [[VCVTZ_I]]
-//
-int64x1_t test_vcvt_s64_f64(float64x1_t a) {
-  return vcvt_s64_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT:    [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]])
-// CHECK-NEXT:    ret <1 x i64> [[VCVTZ_I]]
-//
-uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
-  return vcvt_u64_f64(a);
-}
-
 // CHECK-LABEL: define dso_local <1 x i64> @test_vcvtn_s64_f64(
 // CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index d4cfcfe4e9bdc..964b0059662d2 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -2839,84 +2839,6 @@ float64x2_t test_vrndiq_f64(float64x2_t a) {
   return vrndiq_f64(a);
 }
 
-// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_s32_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]])
-// CHECK-NEXT:    ret <2 x i32> [[VCVTZ_I]]
-//
-int32x2_t test_vcvt_s32_f32(float32x2_t a) {
-  return vcvt_s32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_s32_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i32> [[VCVTZ_I]]
-//
-int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
-  return vcvtq_s32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_s64_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]])
-// CHECK-NEXT:    ret <2 x i64> [[VCVTZ_I]]
-//
-int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
-  return vcvtq_s64_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_u32_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]])
-// CHECK-NEXT:    ret <2 x i32> [[VCVTZ_I]]
-//
-uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
-  return vcvt_u32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_u32_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i32> [[VCVTZ_I]]
-//
-uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
-  return vcvtq_u32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_u64_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]])
-// CHECK-NEXT:    ret <2 x i64> [[VCVTZ_I]]
-//
-uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
-  return vcvtq_u64_f64(a);
-}
-
 // CHECK-LABEL: define dso_local <2 x i32> @test_vcvtn_s32_f32(
 // CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index ba65f76e924c4..2fc2c47c1951e 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -32,6 +32,7 @@
 //=============================================================================
 
 #include <arm_fp16.h>
+#include <arm_neon.h>
 
 //===------------------------------------------------------===//
 // 2.5.1.1.  Addition
@@ -211,3 +212,61 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
 // LLVM:  ret half [[ADD]]
   return vfmsh_f16(a, b, c);
 }
+
+//===------------------------------------------------------===//
+// 2.6.3.1 Convearions
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
+//===------------------------------------------------------===//
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
+//
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+  return vcvt_s16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
+//
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+  return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
+//
+uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+  return vcvt_u16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
+//
+uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+  return vcvtq_u16_f16(a);
+}
+
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index e72d38cbdb5a8..ca3ae478d6546 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -4476,6 +4476,120 @@ uint64_t test_vcvtd_n_u64_f64(float64_t a) {
   return (uint64_t)vcvtd_n_u64_f64(a, 64);
 }
 
+// LLVM-LABEL: @test_vcvt_s32_f32
+// CIR-LABEL: @vcvt_s32_f32
+int32x2_t test_vcvt_s32_f32(float32x2_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM:    [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]])
+// LLVM:    ret <2 x i32> [[VCVTZ_I]]
+  return vcvt_s32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s32_f32
+// CIR-LABEL: @vcvtq_s32_f32
+int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM:    [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]])
+// LLVM:    ret <4 x i32> [[VCVTZ_I]]
+  return vcvtq_s32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u32_f32
+// CIR-LABEL: @vcvt_u32_f32
+uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM:    [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]])
+// LLVM:    ret <2 x i32> [[VCVTZ_I]]
+  return vcvt_u32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u32_f32
+// CIR-LABEL: @vcvtq_u32_f32
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM:    [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]])
+// LLVM:    ret <4 x i32> [[VCVTZ_I]]
+  return vcvtq_u32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvt_s64_f64
+// CIR-LABEL: @vcvt_s64_f64
+int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM:    [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM:    [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM:    [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]])
+// LLVM:    ret <1 x i64> [[VCVTZ_I]]
+  return vcvt_s64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s64_f64
+// CIR-LABEL: @vcvtq_s64_f64
+int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM:    [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]])
+// LLVM:    ret <2 x i64> [[VCVTZ_I]]
+  return vcvtq_s64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u64_f64
+// CIR-LABEL: @vcvt_u64_f64
+uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM:    [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM:    [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM:    [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]])
+// LLVM:    ret <1 x i64> [[VCVTZ_I]]
+  return vcvt_u64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u64_f64
+// CIR-LABEL: @vcvtq_u64_f64
+uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
+// CIR:     cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM:    [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]])
+// LLVM:    ret <2 x i64> [[VCVTZ_I]]
+  return vcvtq_u64_f64(a);
+}
+
 //===------------------------------------------------------===//
 // 2.1.3.2.3 Vector shift right and accumulate
 // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#vector-shift-right-and-accumulate
diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
index e8f1eead2a0d5..bb12179ee9828 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
@@ -222,58 +222,6 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
   return vcvtq_f16_u16(a);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
-//
-int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
-  return vcvt_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
-//
-int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
-  return vcvtq_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
-//
-uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
-  return vcvt_u16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
-//
-uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
-  return vcvtq_u16_f16(a);
-}
-
 // CHECK-LABEL: define {{[^@]+}}@test_vcvta_s16_f16
 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:

>From d053509bd0965ae461cb7da7b6449bb6007e85cd Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 29 May 2026 13:59:35 +0000
Subject: [PATCH 2/3] Move FP16 tests to a dedicated file

---
 .../AArch64/neon/conversion-fullfp16.c        | 87 +++++++++++++++++++
 clang/test/CodeGen/AArch64/neon/fullfp16.c    | 58 -------------
 2 files changed, 87 insertions(+), 58 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c

diff --git a/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c
new file mode 100644
index 0000000000000..9e0c65c9a77b8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c
@@ -0,0 +1,87 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN:                   %clang_cc1_cg_arm64_neon -target-feature +fullfp16           -emit-llvm  %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa  | FileCheck %s --check-prefixes=LLVM
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-llvm  %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa  | FileCheck %s --check-prefixes=LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-cir   %s -disable-O0-optnone |                                FileCheck %s --check-prefixes=CIR %}
+
+//=============================================================================
+// NOTES
+//
+// Tests for unconstrained conversion intrinsics that require the fullfp16 extension.
+//
+// This file contains FP16 tests that were originally located in
+//  *  clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
+// The main difference is the use of RUN lines that enable ClangIR lowering;
+// therefore only builtins currently supported by ClangIR are tested here.
+// Once ClangIR support is complete, this file is intended to replace the
+// original test file.
+//
+// ACLE section headings based on v2025Q2 of the ACLE specification:
+//  * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero
+//
+//=============================================================================
+
+#include <arm_fp16.h>
+#include <arm_neon.h>
+
+//===------------------------------------------------------===//
+// 2.6.3.1 Convearions
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vcvt_s16_f16
+// CIR-LABEL: @vcvt_s16_f16
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat"
+
+// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// LLVM-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// LLVM-NEXT:    ret <4 x i16> [[VCVTZ_I]]
+  return vcvt_s16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s16_f16
+// CIR-LABEL: @vcvtq_s16_f16
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat"
+
+// LLVM-SAME: (<8 x half> {{.*}} [[A:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// LLVM-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// LLVM-NEXT:    ret <8 x i16> [[VCVTZ_I]]
+  return vcvtq_s16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u16_f16
+// CIR-LABEL: @vcvt_u16_f16
+uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat"
+
+// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// LLVM-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// LLVM-NEXT:    ret <4 x i16> [[VCVTZ_I]]
+  return vcvt_u16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u16_f16
+// CIR-LABEL: @vcvtq_u16_f16
+uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat"
+
+// LLVM: (<8 x half> {{.*}} [[A:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// LLVM-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// LLVM-NEXT:    ret <8 x i16> [[VCVTZ_I]]
+  return vcvtq_u16_f16(a);
+}
+
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index 2fc2c47c1951e..c4dde1e3e11f4 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -212,61 +212,3 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
 // LLVM:  ret half [[ADD]]
   return vfmsh_f16(a, b, c);
 }
-
-//===------------------------------------------------------===//
-// 2.6.3.1 Convearions
-// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
-//===------------------------------------------------------===//
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
-//
-int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
-  return vcvt_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
-//
-int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
-  return vcvtq_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT:    ret <4 x i16> [[VCVTZ_I]]
-//
-uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
-  return vcvt_u16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT:    ret <8 x i16> [[VCVTZ_I]]
-//
-uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
-  return vcvtq_u16_f16(a);
-}
-

>From 3f620e8e36b7c4574550b2f7e5dbd7e44d7e3350 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 10 Jun 2026 15:50:33 +0000
Subject: [PATCH 3/3] Remove include

---
 clang/test/CodeGen/AArch64/neon/fullfp16.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index c4dde1e3e11f4..ba65f76e924c4 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -32,7 +32,6 @@
 //=============================================================================
 
 #include <arm_fp16.h>
-#include <arm_neon.h>
 
 //===------------------------------------------------------===//
 // 2.5.1.1.  Addition