[clang] [CIR] add vsqrt and vsqrtq support (PR #192282)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 17 11:09:28 PDT 2026
https://github.com/Kouunnn updated https://github.com/llvm/llvm-project/pull/192282
>From 5ec81800e2ceec6dd6821735982659ec744ce628 Mon Sep 17 00:00:00 2001
From: Zile Xiong <xiongzile99 at gmail.com>
Date: Tue, 14 Apr 2026 15:37:16 +0800
Subject: [PATCH 1/2] [CIR] add vsqrt and vsqrtq support
---
clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 75dd19d880444..c5fcc06f50092 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2571,8 +2571,17 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
case NEON::BI__builtin_neon_vpminnm_v:
case NEON::BI__builtin_neon_vpminnmq_v:
case NEON::BI__builtin_neon_vsqrth_f16:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
case NEON::BI__builtin_neon_vsqrt_v:
- case NEON::BI__builtin_neon_vsqrtq_v:
+ case NEON::BI__builtin_neon_vsqrtq_v: {
+ // TODO: implement vsqrt and vsqrtq
+ // refer:
+ // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#square-root
+ return emitNeonCall(cgm, builder, {ty}, ops, "sqrt", ty, loc);
+ }
case NEON::BI__builtin_neon_vrbit_v:
case NEON::BI__builtin_neon_vrbitq_v:
case NEON::BI__builtin_neon_vmaxv_f16:
>From 54609faea0bcb9c7ba2de355f0a31d7160d42fe2 Mon Sep 17 00:00:00 2001
From: ZCkouun <1765074320 at qq.com>
Date: Fri, 17 Apr 2026 17:14:39 +0800
Subject: [PATCH 2/2] [CIR] Add vsqrt/vsqrtq NEON tests
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 7 +--
clang/test/CodeGen/AArch64/neon-intrinsics.c | 14 +----
clang/test/CodeGen/AArch64/neon-misc.c | 39 ------------
clang/test/CodeGen/AArch64/neon/intrinsics.c | 62 +++++++++++++++++++
4 files changed, 65 insertions(+), 57 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index c5fcc06f50092..6c85f58388480 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2576,12 +2576,9 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
getContext().BuiltinInfo.getName(builtinID));
return mlir::Value{};
case NEON::BI__builtin_neon_vsqrt_v:
- case NEON::BI__builtin_neon_vsqrtq_v: {
- // TODO: implement vsqrt and vsqrtq
- // refer:
- // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#square-root
+ case NEON::BI__builtin_neon_vsqrtq_v:
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
return emitNeonCall(cgm, builder, {ty}, ops, "sqrt", ty, loc);
- }
case NEON::BI__builtin_neon_vrbit_v:
case NEON::BI__builtin_neon_vrbitq_v:
case NEON::BI__builtin_neon_vmaxv_f16:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 82a10b626c223..a22ea7d78b8fa 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -21532,19 +21532,7 @@ float64x1_t test_vrecpe_f64(float64x1_t a) {
return vrecpe_f64(a);
}
-// CHECK-LABEL: define dso_local <1 x double> @test_vsqrt_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP2]])
-// CHECK-NEXT: ret <1 x double> [[VSQRT_I]]
-//
-float64x1_t test_vsqrt_f64(float64x1_t a) {
- return vsqrt_f64(a);
-}
+
// CHECK-LABEL: define dso_local <1 x double> @test_vrecps_f64(
// CHECK-SAME: <1 x double> noundef [[A:%.*]], <1 x double> noundef [[B:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index ac2c83aa03ccf..b3b8f2bae9656 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -3639,45 +3639,6 @@ uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
return vrecpeq_u32(a);
}
-// CHECK-LABEL: define dso_local <2 x float> @test_vsqrt_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP2]])
-// CHECK-NEXT: ret <2 x float> [[VSQRT_I]]
-//
-float32x2_t test_vsqrt_f32(float32x2_t a) {
- return vsqrt_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vsqrtq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
-// CHECK-NEXT: ret <4 x float> [[VSQRT_I]]
-//
-float32x4_t test_vsqrtq_f32(float32x4_t a) {
- return vsqrtq_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x double> @test_vsqrtq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
-// CHECK-NEXT: ret <2 x double> [[VSQRT_I]]
-//
-float64x2_t test_vsqrtq_f64(float64x2_t a) {
- return vsqrtq_f64(a);
-}
-
// CHECK-LABEL: define dso_local <2 x float> @test_vcvt_f32_s32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 241ddce6fe978..4f1784726b6f6 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -1230,6 +1230,68 @@ float64x2_t test_vmaxnmq_f64(float64x2_t v1, float64x2_t v2) {
return vmaxnmq_f64(v1, v2);
}
+//===----------------------------------------------------------------------===//
+// 2.1.1.11. Square root
+// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#square-root
+//===----------------------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vsqrt_f32(
+// CIR-LABEL: @vsqrt_f32(
+float32x2_t test_vsqrt_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "sqrt" %{{.*}} : (!cir.vector<2 x !cir.float>) -> !cir.vector<2 x !cir.float>
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM-NEXT: [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP2]])
+// LLVM-NEXT: ret <2 x float> [[VSQRT_I]]
+ return vsqrt_f32(a);
+}
+
+// LLVM-LABEL: @test_vsqrtq_f32(
+// CIR-LABEL: @vsqrtq_f32(
+float32x4_t test_vsqrtq_f32(float32x4_t a) {
+// CIR: cir.call_llvm_intrinsic "sqrt" %{{.*}} : (!cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM-NEXT: [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
+// LLVM-NEXT: ret <4 x float> [[VSQRT_I]]
+ return vsqrtq_f32(a);
+}
+
+// LLVM-LABEL: @test_vsqrt_f64(
+// CIR-LABEL: @vsqrt_f64(
+float64x1_t test_vsqrt_f64(float64x1_t a) {
+// CIR: cir.call_llvm_intrinsic "sqrt" %{{.*}} : (!cir.vector<1 x !cir.double>) -> !cir.vector<1 x !cir.double>
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM-NEXT: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP2]])
+// LLVM-NEXT: ret <1 x double> [[VSQRT_I]]
+ return vsqrt_f64(a);
+}
+
+// LLVM-LABEL: @test_vsqrtq_f64(
+// CIR-LABEL: @vsqrtq_f64(
+float64x2_t test_vsqrtq_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "sqrt" %{{.*}} : (!cir.vector<2 x !cir.double>) -> !cir.vector<2 x !cir.double>
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM-NEXT: [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
+// LLVM-NEXT: ret <2 x double> [[VSQRT_I]]
+ return vsqrtq_f64(a);
+}
+
//===------------------------------------------------------===//
// 2.1.1.2.8. Widening Multiplication
//===------------------------------------------------------===//
More information about the cfe-commits
mailing list