[clang] [clang][CIR][AArch64] Add lowering for conversion intrinsics (PR #199990)
Andrzej WarzyĆski via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 10 08:50:56 PDT 2026
https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/199990
>From 48d8c6f89bd966e5d58a9dfa6c48cd1d856751a1 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 27 May 2026 13:46:42 +0000
Subject: [PATCH 1/3] [clang][CIR][AArch64] Add lowering for conversion
intrinsics
This PR adds lowering for intrinsic from the following groups:
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions
* https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
It continues the work started in #190961 and #193273. This PR implements
conversions from integer to FP types where the bit-wdith does not
change:
* vcvt_s64_f64
* vcvt_u64_f64
* vcvt_s32_f32
* vcvtq_s32_f32
* vcvtq_s64_f64
* vcvt_u32_f32
* vcvtq_u32_f32
* vcvtq_u64_f64
* vcvt_s16_f16
* vcvtq_s16_f16
* vcvt_u16_f16
* vcvtq_u16_f16
The corresponding tests are moved from:
* clang/test/CodeGen/AArch64/
to:
* clang/test/CodeGen/AArch64/neon/
The lowering follows the existing implementation in
CodeGen/TargetBuiltins/ARM.cpp.
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 15 ++-
clang/test/CodeGen/AArch64/neon-intrinsics.c | 28 -----
clang/test/CodeGen/AArch64/neon-misc.c | 78 ------------
clang/test/CodeGen/AArch64/neon/fullfp16.c | 59 +++++++++
clang/test/CodeGen/AArch64/neon/intrinsics.c | 114 ++++++++++++++++++
.../CodeGen/AArch64/v8.2a-neon-intrinsics.c | 52 --------
6 files changed, 187 insertions(+), 159 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 4111df26d241d..c888b7cfce17e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -761,7 +761,20 @@ static mlir::Value emitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vcvtq_s64_v:
case NEON::BI__builtin_neon_vcvtq_u64_v:
case NEON::BI__builtin_neon_vcvtq_s16_f16:
- case NEON::BI__builtin_neon_vcvtq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtq_u16_f16: {
+ auto ty = getFloatNeonType(cgf, neonType);
+ // Undo the bitcast inserted by intrinsics that expand to this builtin
+ // (e.g. vcvt_u32_f32).
+ // TODO: While the bitcasts eventually cancel each other out, we should
+ // avoid them altogether.
+ ops[0] =
+ cgf.getBuilder().createCast(loc, cir::CastKind::bitcast, ops[0], ty);
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ // AArch64: use fptosi.sat/fptoui.sat unless under strict FP.
+ llvm::StringRef llvmIntrName = usgn ? "fptoui.sat" : "fptosi.sat";
+ return emitNeonCall(cgf.getCIRGenModule(), cgf.getBuilder(),
+ /*argTypes=*/{ty}, ops, llvmIntrName, vTy, loc);
+ }
case NEON::BI__builtin_neon_vcvta_s16_f16:
case NEON::BI__builtin_neon_vcvta_s32_v:
case NEON::BI__builtin_neon_vcvta_s64_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 5865c4cf61b50..4de430ad57c10 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -18645,34 +18645,6 @@ float64x1_t test_vneg_f64(float64x1_t a) {
return vneg_f64(a);
}
-// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_s64_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]])
-// CHECK-NEXT: ret <1 x i64> [[VCVTZ_I]]
-//
-int64x1_t test_vcvt_s64_f64(float64x1_t a) {
- return vcvt_s64_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vcvt_u64_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]])
-// CHECK-NEXT: ret <1 x i64> [[VCVTZ_I]]
-//
-uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
- return vcvt_u64_f64(a);
-}
-
// CHECK-LABEL: define dso_local <1 x i64> @test_vcvtn_s64_f64(
// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index d4cfcfe4e9bdc..964b0059662d2 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -2839,84 +2839,6 @@ float64x2_t test_vrndiq_f64(float64x2_t a) {
return vrndiq_f64(a);
}
-// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_s32_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]])
-// CHECK-NEXT: ret <2 x i32> [[VCVTZ_I]]
-//
-int32x2_t test_vcvt_s32_f32(float32x2_t a) {
- return vcvt_s32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_s32_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x i32> [[VCVTZ_I]]
-//
-int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
- return vcvtq_s32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_s64_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]])
-// CHECK-NEXT: ret <2 x i64> [[VCVTZ_I]]
-//
-int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
- return vcvtq_s64_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vcvt_u32_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]])
-// CHECK-NEXT: ret <2 x i32> [[VCVTZ_I]]
-//
-uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
- return vcvt_u32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vcvtq_u32_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x i32> [[VCVTZ_I]]
-//
-uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
- return vcvtq_u32_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vcvtq_u64_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]])
-// CHECK-NEXT: ret <2 x i64> [[VCVTZ_I]]
-//
-uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
- return vcvtq_u64_f64(a);
-}
-
// CHECK-LABEL: define dso_local <2 x i32> @test_vcvtn_s32_f32(
// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index ba65f76e924c4..2fc2c47c1951e 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -32,6 +32,7 @@
//=============================================================================
#include <arm_fp16.h>
+#include <arm_neon.h>
//===------------------------------------------------------===//
// 2.5.1.1. Addition
@@ -211,3 +212,61 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
// LLVM: ret half [[ADD]]
return vfmsh_f16(a, b, c);
}
+
+//===------------------------------------------------------===//
+// 2.6.3.1 Convearions
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
+//===------------------------------------------------------===//
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
+//
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+ return vcvt_s16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
+//
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+ return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
+//
+uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+ return vcvt_u16_f16(a);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
+//
+uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+ return vcvtq_u16_f16(a);
+}
+
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index e72d38cbdb5a8..ca3ae478d6546 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -4476,6 +4476,120 @@ uint64_t test_vcvtd_n_u64_f64(float64_t a) {
return (uint64_t)vcvtd_n_u64_f64(a, 64);
}
+// LLVM-LABEL: @test_vcvt_s32_f32
+// CIR-LABEL: @vcvt_s32_f32
+int32x2_t test_vcvt_s32_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP2]])
+// LLVM: ret <2 x i32> [[VCVTZ_I]]
+ return vcvt_s32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s32_f32
+// CIR-LABEL: @vcvtq_s32_f32
+int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP2]])
+// LLVM: ret <4 x i32> [[VCVTZ_I]]
+ return vcvtq_s32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u32_f32
+// CIR-LABEL: @vcvt_u32_f32
+uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM: [[VCVTZ_I:%.*]] = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> [[TMP2]])
+// LLVM: ret <2 x i32> [[VCVTZ_I]]
+ return vcvt_u32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u32_f32
+// CIR-LABEL: @vcvtq_u32_f32
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM: [[VCVTZ_I:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP2]])
+// LLVM: ret <4 x i32> [[VCVTZ_I]]
+ return vcvtq_u32_f32(a);
+}
+
+// LLVM-LABEL: @test_vcvt_s64_f64
+// CIR-LABEL: @vcvt_s64_f64
+int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptosi.sat.v1i64.v1f64(<1 x double> [[TMP2]])
+// LLVM: ret <1 x i64> [[VCVTZ_I]]
+ return vcvt_s64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s64_f64
+// CIR-LABEL: @vcvtq_s64_f64
+int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> [[TMP2]])
+// LLVM: ret <2 x i64> [[VCVTZ_I]]
+ return vcvtq_s64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u64_f64
+// CIR-LABEL: @vcvt_u64_f64
+uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM: [[VCVTZ_I:%.*]] = call <1 x i64> @llvm.fptoui.sat.v1i64.v1f64(<1 x double> [[TMP2]])
+// LLVM: ret <1 x i64> [[VCVTZ_I]]
+ return vcvt_u64_f64(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u64_f64
+// CIR-LABEL: @vcvtq_u64_f64
+uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM: [[VCVTZ_I:%.*]] = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> [[TMP2]])
+// LLVM: ret <2 x i64> [[VCVTZ_I]]
+ return vcvtq_u64_f64(a);
+}
+
//===------------------------------------------------------===//
// 2.1.3.2.3 Vector shift right and accumulate
// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#vector-shift-right-and-accumulate
diff --git a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
index e8f1eead2a0d5..bb12179ee9828 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
@@ -222,58 +222,6 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
return vcvtq_f16_u16(a);
}
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
-//
-int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
- return vcvt_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
-//
-int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
- return vcvtq_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
-//
-uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
- return vcvt_u16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
-//
-uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
- return vcvtq_u16_f16(a);
-}
-
// CHECK-LABEL: define {{[^@]+}}@test_vcvta_s16_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
>From d053509bd0965ae461cb7da7b6449bb6007e85cd Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 29 May 2026 13:59:35 +0000
Subject: [PATCH 2/3] Move FP16 tests to a dedicated file
---
.../AArch64/neon/conversion-fullfp16.c | 87 +++++++++++++++++++
clang/test/CodeGen/AArch64/neon/fullfp16.c | 58 -------------
2 files changed, 87 insertions(+), 58 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c
diff --git a/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c
new file mode 100644
index 0000000000000..9e0c65c9a77b8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon/conversion-fullfp16.c
@@ -0,0 +1,87 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1_cg_arm64_neon -target-feature +fullfp16 -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +fullfp16 -fclangir -emit-cir %s -disable-O0-optnone | FileCheck %s --check-prefixes=CIR %}
+
+//=============================================================================
+// NOTES
+//
+// Tests for unconstrained conversion intrinsics that require the fullfp16 extension.
+//
+// This file contains FP16 tests that were originally located in
+// * clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c
+// The main difference is the use of RUN lines that enable ClangIR lowering;
+// therefore only builtins currently supported by ClangIR are tested here.
+// Once ClangIR support is complete, this file is intended to replace the
+// original test file.
+//
+// ACLE section headings based on v2025Q2 of the ACLE specification:
+// * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero
+//
+//=============================================================================
+
+#include <arm_fp16.h>
+#include <arm_neon.h>
+
+//===------------------------------------------------------===//
+// 2.6.3.1 Convearions
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vcvt_s16_f16
+// CIR-LABEL: @vcvt_s16_f16
+int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat"
+
+// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// LLVM-NEXT: ret <4 x i16> [[VCVTZ_I]]
+ return vcvt_s16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_s16_f16
+// CIR-LABEL: @vcvtq_s16_f16
+int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
+// CIR: cir.call_llvm_intrinsic "fptosi.sat"
+
+// LLVM-SAME: (<8 x half> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// LLVM-NEXT: ret <8 x i16> [[VCVTZ_I]]
+ return vcvtq_s16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvt_u16_f16
+// CIR-LABEL: @vcvt_u16_f16
+uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat"
+
+// LLVM-SAME: (<4 x half> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
+// LLVM-NEXT: ret <4 x i16> [[VCVTZ_I]]
+ return vcvt_u16_f16(a);
+}
+
+// LLVM-LABEL: @test_vcvtq_u16_f16
+// CIR-LABEL: @vcvtq_u16_f16
+uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
+// CIR: cir.call_llvm_intrinsic "fptoui.sat"
+
+// LLVM: (<8 x half> {{.*}} [[A:%.*]])
+// LLVM: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// LLVM-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
+// LLVM-NEXT: ret <8 x i16> [[VCVTZ_I]]
+ return vcvtq_u16_f16(a);
+}
+
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index 2fc2c47c1951e..c4dde1e3e11f4 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -212,61 +212,3 @@ float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
// LLVM: ret half [[ADD]]
return vfmsh_f16(a, b, c);
}
-
-//===------------------------------------------------------===//
-// 2.6.3.1 Convearions
-// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#conversions-2
-//===------------------------------------------------------===//
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_s16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
-//
-int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
- return vcvt_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_s16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
-//
-int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
- return vcvtq_s16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvt_u16_f16
-// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> [[TMP2]])
-// CHECK-NEXT: ret <4 x i16> [[VCVTZ_I]]
-//
-uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
- return vcvt_u16_f16(a);
-}
-
-// CHECK-LABEL: define {{[^@]+}}@test_vcvtq_u16_f16
-// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VCVTZ_I:%.*]] = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> [[TMP2]])
-// CHECK-NEXT: ret <8 x i16> [[VCVTZ_I]]
-//
-uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
- return vcvtq_u16_f16(a);
-}
-
>From 3f620e8e36b7c4574550b2f7e5dbd7e44d7e3350 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 10 Jun 2026 15:50:33 +0000
Subject: [PATCH 3/3] Remove include
---
clang/test/CodeGen/AArch64/neon/fullfp16.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c
index c4dde1e3e11f4..ba65f76e924c4 100644
--- a/clang/test/CodeGen/AArch64/neon/fullfp16.c
+++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c
@@ -32,7 +32,6 @@
//=============================================================================
#include <arm_fp16.h>
-#include <arm_neon.h>
//===------------------------------------------------------===//
// 2.5.1.1. Addition
More information about the cfe-commits
mailing list