[clang] 94f50ed - [CIR][AArch64] Add lowering for remaining `vabd_*` builtins (#185478)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 10 07:00:46 PDT 2026
Author: Andrzej WarzyĆski
Date: 2026-03-10T14:00:41Z
New Revision: 94f50edff80d0c0c928b71945c8e0362bc9e8c65
URL: https://github.com/llvm/llvm-project/commit/94f50edff80d0c0c928b71945c8e0362bc9e8c65
DIFF: https://github.com/llvm/llvm-project/commit/94f50edff80d0c0c928b71945c8e0362bc9e8c65.diff
LOG: [CIR][AArch64] Add lowering for remaining `vabd_*` builtins (#185478)
Implement the missing CIR lowerings for the AdvSIMD (Neon) `vabd_*`
(absolute difference) intrinsic group.
Most `vabd` variants were already supported (see
https://github.com/llvm/llvm-project/pull/183595); this patch
completes the remaining cases listed in [1].
Move the corresponding tests from:
* clang/test/CodeGen/AArch64/neon_intrinsics.c
to:
* clang/test/CodeGen/AArch64/neon/intrinsics.c
The implementation mirrors the existing lowering in
CodeGen/TargetBuiltins/ARM.cpp. To support this, add the
`emitCommonNeonSISDBuiltinExpr` helper.
Reference:
[1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference
Added:
Modified:
clang/include/clang/CIR/MissingFeatures.h
clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
clang/test/CodeGen/AArch64/neon-intrinsics.c
clang/test/CodeGen/AArch64/neon/intrinsics.c
Removed:
################################################################################
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 1e3a2c9af35d1..70a3dc3bdbfa5 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -311,7 +311,6 @@ struct MissingFeatures {
static bool metaDataNode() { return false; }
static bool moduleNameHash() { return false; }
static bool msabi() { return false; }
- static bool neonSISDIntrinsics() { return false; }
static bool nrvo() { return false; }
static bool objCBlocks() { return false; }
static bool objCGC() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 0a6d2a7ee3ea8..5534e69b5f8bc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -409,6 +409,301 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
};
+// Single-Instruction-Single-Data (SISD) intrinsics.
+//
+// The name is somewhat misleading: not all intrinsics in this table are
+// strictly SISD. While many builtins operate on scalars,
+// * some take vector operands (e.g. reduction builtins such as
+// `vminvq_u16` and `vaddvq_s32`), and
+// * some take both scalar and vector operands (e.g. crypto builtins
+// such as `vsha1cq_u32`).
+//
+// TODO: Either rename this table to better reflect its contents, or
+// restrict it to true SISD intrinsics only.
+static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+ NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
+ NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+ NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP0(vcvth_bf16_f32),
+ NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
+ NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
+ NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+ NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+ NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
+ NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
+ NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
+ NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
+ NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
+ NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
+ NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
+ NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
+ NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
+ NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
+ NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
+ NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
+ NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
+ NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
+ NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
+ NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
+ NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
+ NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
+ NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
+ NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
+ NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+ NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
+ NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
+ NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
+ NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
+ NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
+ NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
+ NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
+ NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
+ NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
+ NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
+ NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
+ NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
+ NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
+ NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
+ NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
+ NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
+ NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
+ NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
+ NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
+ NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+ NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+ // FP16 scalar intrinisics go here.
+ NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
+ NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+ NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
+ NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
+ NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
+ NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
+ NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
+};
+// clang-format on
+
#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
{#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier}
@@ -422,6 +717,7 @@ static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
};
static bool aarch64SIMDIntrinsicsProvenSorted = false;
+static bool aarch64SISDIntrinsicsProvenSorted = false;
static bool aarch64SVEIntrinsicsProvenSorted = false;
// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
@@ -1055,6 +1351,56 @@ static mlir::Value emitNeonCall(CIRGenModule &cgm, CIRGenBuilderTy &builder,
isConstrainedFPIntrinsic, shift, rightshift);
}
+static mlir::Value emitCommonNeonSISDBuiltinExpr(
+ CIRGenFunction &cgf, const ARMVectorIntrinsicInfo &info,
+ llvm::SmallVectorImpl<mlir::Value> &ops, const CallExpr *expr) {
+ assert(info.llvmIntrinsic && "Generic code assumes a valid intrinsic");
+
+ switch (info.builtinID) {
+ case NEON::BI__builtin_neon_vcled_s64:
+ case NEON::BI__builtin_neon_vcled_u64:
+ case NEON::BI__builtin_neon_vcles_f32:
+ case NEON::BI__builtin_neon_vcled_f64:
+ case NEON::BI__builtin_neon_vcltd_s64:
+ case NEON::BI__builtin_neon_vcltd_u64:
+ case NEON::BI__builtin_neon_vclts_f32:
+ case NEON::BI__builtin_neon_vcltd_f64:
+ case NEON::BI__builtin_neon_vcales_f32:
+ case NEON::BI__builtin_neon_vcaled_f64:
+ case NEON::BI__builtin_neon_vcalts_f32:
+ case NEON::BI__builtin_neon_vcaltd_f64:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ cgf.getContext().BuiltinInfo.getName(info.builtinID));
+ break;
+ }
+
+ llvm::StringRef llvmIntrName = getLLVMIntrNameNoPrefix(
+ static_cast<llvm::Intrinsic::ID>(info.llvmIntrinsic));
+ mlir::Location loc = cgf.getLoc(expr->getExprLoc());
+
+ // The switch stmt is intended to help catch NYI cases and will be removed
+ // once the CIR implementation is complete. Avoid adding specialized
+ // code in cases - that should only be required for a handful of examples.
+ switch (info.builtinID) {
+ default:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ cgf.getContext().BuiltinInfo.getName(info.builtinID));
+ break;
+ case NEON::BI__builtin_neon_vabdd_f64:
+ case NEON::BI__builtin_neon_vabds_f32:
+ return emitNeonCall(cgf.cgm, cgf.getBuilder(),
+ {cgf.convertType(expr->getArg(0)->getType())}, ops,
+ llvmIntrName, cgf.convertType(expr->getType()), loc);
+ }
+
+ return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE helpers
+//===----------------------------------------------------------------------===//
std::optional<mlir::Value>
CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
@@ -2218,7 +2564,10 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
emitScalarOrConstFoldImmArg(iceArguments, i, expr->getArg(i)));
}
- assert(!cir::MissingFeatures::neonSISDIntrinsics());
+ const ARMVectorIntrinsicInfo *builtin = findARMVectorIntrinsicInMap(
+ AArch64SISDIntrinsicMap, builtinID, aarch64SISDIntrinsicsProvenSorted);
+ if (builtin)
+ return emitCommonNeonSISDBuiltinExpr(*this, *builtin, ops, expr);
// Not all intrinsics handled by the common case work for AArch64 yet, so only
// defer to common code if it's been added to our special map.
@@ -2240,7 +2589,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
// Not all intrinsics handled by the common case work for AArch64 yet, so only
// defer to common code if it's been added to our special map.
- const ARMVectorIntrinsicInfo *builtin;
builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
aarch64SIMDIntrinsicsProvenSorted);
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index f251551dc2217..bfaea2b8ae909 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -22384,26 +22384,6 @@ poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
return vreinterpretq_p64_p16(a);
}
-// CHECK-LABEL: define dso_local float @test_vabds_f32(
-// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float [[A]], float [[B]])
-// CHECK-NEXT: ret float [[VABDS_F32_I]]
-//
-float32_t test_vabds_f32(float32_t a, float32_t b) {
- return vabds_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local double @test_vabdd_f64(
-// CHECK-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double [[A]], double [[B]])
-// CHECK-NEXT: ret double [[VABDD_F64_I]]
-//
-float64_t test_vabdd_f64(float64_t a, float64_t b) {
- return vabdd_f64(a, b);
-}
-
// CHECK-LABEL: define dso_local <16 x i8> @test_vuqaddq_s8(
// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index cf8b79d373985..b740c3b5b2310 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -720,9 +720,27 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
return vabdq_f64(v1, v2);
}
-// TODO SISD variants:
-// TODO @vabdd_f64(a, b);
-// TODO @test_vabds_f32(
+// LLVM-LABEL: @test_vabds_f32(
+// CIR-LABEL: @vabds_f32(
+float32_t test_vabds_f32(float32_t a, float32_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.sisd.fabd"
+
+// LLVM-SAME: float {{.*}} [[A:%.*]], float noundef [[B:%.*]])
+// LLVM: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float [[A]], float [[B]])
+// LLVM-NEXT: ret float [[VABDS_F32_I]]
+ return vabds_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vabdd_f64(
+// CIR-LABEL: @vabdd_f64(
+float64_t test_vabdd_f64(float64_t a, float64_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.sisd.fabd"
+
+// LLVM-SAME: double {{.*}} [[A:%.*]], double noundef [[B:%.*]])
+// LLVM: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double [[A]], double [[B]])
+// LLVM-NEXT: ret double [[VABDD_F64_I]]
+ return vabdd_f64(a, b);
+}
//===------------------------------------------------------===//
// 2.1.1.6.3. Absolute
diff erence and accumulate
More information about the cfe-commits
mailing list