[llvm-branch-commits] [clang] [CIR][AArch64] Add missing lowerings for vceqz_* Neon builtins (PR #184402)
Andrzej WarzyĆski via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Mar 5 08:17:01 PST 2026
https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/184402
>From aad35a51a63d8b7836e73d49bbaeb4e96739a30e Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Tue, 3 Mar 2026 16:38:13 +0000
Subject: [PATCH 1/2] [CIR][AArch64] Add missing lowerings for vceqz_* NEON
builtins
Implement the remaining CIR lowerings for the AdvSIMD (NEON)
`vceqz{|q|d|s}_*` intrinsic group (bitwise equal to zero).
The `vceqzd_s64` variant was already supported; this patch completes
the rest of the group.
Tests for these intrinsics are moved from:
test/CodeGen/AArch64/neon-misc.c
to:
test/CodeGen/AArch64/neon/intrinsics.c
The implementation largely mirrors the existing lowering in
CodeGen/TargetBuiltins/ARM.cpp.
`emitCommonNeonBuiltinExpr` is introduced to support these lowerings.
`getNeonType` is moved without functional changes.
Reference:
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#bitwise-equal-to-zero
---
.../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 794 ++++++++++++++++--
clang/lib/CIR/FrontendAction/CIRGenAction.cpp | 2 +
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 6 +
clang/test/CodeGen/AArch64/neon-misc.c | 307 +------
clang/test/CodeGen/AArch64/neon/intrinsics.c | 378 ++++++++-
5 files changed, 1105 insertions(+), 382 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index be825a5f2f234..d495b02eb7fbd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -47,12 +47,34 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
//
// TODO(cir): Share this code with ARM.cpp
//===----------------------------------------------------------------------===//
-static bool aarch64SVEIntrinsicsProvenSorted = false;
+enum {
+ AddRetType = (1 << 0),
+ Add1ArgType = (1 << 1),
+ Add2ArgTypes = (1 << 2),
+
+ VectorizeRetType = (1 << 3),
+ VectorizeArgTypes = (1 << 4),
+
+ InventFloatType = (1 << 5),
+ UnsignedAlts = (1 << 6),
+
+ Use64BitVectors = (1 << 7),
+ Use128BitVectors = (1 << 8),
+
+ Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+ VectorRet = AddRetType | VectorizeRetType,
+ VectorRetGetArgs01 =
+ AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+ FpCmpzModifiers =
+ AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
+};
namespace {
struct ARMVectorIntrinsicInfo {
+ const char *nameHint;
unsigned builtinID;
unsigned llvmIntrinsic;
+ unsigned altLLVMIntrinsic;
uint64_t typeModifier;
bool operator<(unsigned rhsBuiltinID) const {
@@ -64,17 +86,344 @@ struct ARMVectorIntrinsicInfo {
};
} // end anonymous namespace
-#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier) \
- {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier}
+#define NEONMAP0(NameBase) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier}
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
+ {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \
+ Intrinsic::AltLLVMIntrinsic, TypeModifier}
+
+static const armVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
+ NEONMAP1(vabs_v, aarch64_neon_abs, 0),
+ NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
+ NEONMAP0(vadd_v),
+ NEONMAP0(vaddhn_v),
+ NEONMAP0(vaddq_p128),
+ NEONMAP0(vaddq_v),
+ NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
+ NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
+ NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
+ NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
+ NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
+ NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcage_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcale_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
+ NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
+ NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
+ NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_s16),
+ NEONMAP0(vcvt_f16_u16),
+ NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_f16_s16),
+ NEONMAP0(vcvtq_f16_u16),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP0(vcvtq_high_bf16_f32),
+ NEONMAP0(vcvtq_low_bf16_f32),
+ NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+ 0),
+ NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+ 0),
+ NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+ NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
+ NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
+ NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
+ NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
+ NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
+ NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
+ NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
+ NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
+ NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
+ NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
+ NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
+ NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+ NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
+ NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
+ NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+ NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+ NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
+ NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+ NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
+ NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+ NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+ NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+ NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+ NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+ NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+ NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
+ NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
+ NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
+ NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
+ NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+ NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+ NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
+ NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+ NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+ NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+ NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+ NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
+ NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
+ NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
+ NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
+ NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
+ NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
+ NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
+ NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
+ NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
+ NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
+ NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
+ NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
+ NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
+ NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
+ NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
+ NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
+ NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
+ NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
+ NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
+ NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
+ NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+ NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
+ NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
+};
+
+#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier}
#define SVEMAP2(NameBase, TypeModifier) \
- {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
-static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
+ {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
+static const armVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
#define GET_SVE_LLVM_INTRINSIC_MAP
#include "clang/Basic/arm_sve_builtin_cg.inc"
#undef GET_SVE_LLVM_INTRINSIC_MAP
};
+static bool aarch64SIMDIntrinsicsProvenSorted = false;
+static bool aarch64SVEIntrinsicsProvenSorted = false;
+
// Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
// the corresponding info struct.
static const ARMVectorIntrinsicInfo *
@@ -110,16 +459,13 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
assert(!cast<cir::VectorType>(retTy).getIsScalable() &&
"This is only intended for fixed-width vectors");
// Vector types are cast to i8 vectors. Recover original type.
- cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+ src = builder.createBitcast(src, retTy);
}
mlir::Value zero = builder.getNullValue(src.getType(), loc);
- if (cir::isFPOrVectorOfFPType(src.getType())) {
- cgf.cgm.errorNYI(loc, std::string("unimplemented FP compare"));
- }
if (!scalarCmp)
- cgf.cgm.errorNYI(loc, std::string("unimplemented vector compare"));
+ return builder.createVecCompare(loc, kind, src, zero);
// For scalars, cast !cir.bool to !cir.int<s, 1> so that the compare
// result is sign- rather zero-extended when casting to the output
@@ -131,6 +477,364 @@ emitAArch64CompareBuiltinExpr(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
return builder.createCast(loc, cir::CastKind::integral, cmp, retTy);
}
+// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
+static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
+ mlir::Location loc,
+ bool hasLegalHalfType = true,
+ bool v1Ty = false,
+ bool allowBFloatArgsAndRet = true) {
+ int isQuad = typeFlags.isQuad();
+ switch (typeFlags.getEltType()) {
+ case NeonTypeFlags::Int8:
+ case NeonTypeFlags::Poly8:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
+ : cgf->sInt8Ty,
+ v1Ty ? 1 : (8 << isQuad));
+ case NeonTypeFlags::MFloat8:
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int16:
+ case NeonTypeFlags::Poly16:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
+ : cgf->sInt16Ty,
+ v1Ty ? 1 : (4 << isQuad));
+ case NeonTypeFlags::BFloat16:
+ if (allowBFloatArgsAndRet)
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
+ else
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float16:
+ if (hasLegalHalfType)
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
+ else
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
+ [[fallthrough]];
+ case NeonTypeFlags::Int32:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
+ : cgf->sInt32Ty,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Int64:
+ case NeonTypeFlags::Poly64:
+ return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
+ : cgf->sInt64Ty,
+ v1Ty ? 1 : (1 << isQuad));
+ case NeonTypeFlags::Poly128:
+ // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+ // There is a lot of i128 and f128 API missing.
+ // so we use v16i8 to represent poly128 and get pattern matched.
+ cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
+ [[fallthrough]];
+ case NeonTypeFlags::Float32:
+ return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
+ v1Ty ? 1 : (2 << isQuad));
+ case NeonTypeFlags::Float64:
+ return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
+ v1Ty ? 1 : (1 << isQuad));
+ }
+ llvm_unreachable("Unknown vector element type!");
+}
+
+static mlir::Value emitCommonNeonBuiltinExpr(
+ CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
+ unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
+ const CallExpr *expr, llvm::SmallVectorImpl<mlir::Value> &ops) {
+
+ mlir::Location loc = cgf.getLoc(expr->getExprLoc());
+ clang::ASTContext &ctx = cgf.getContext();
+
+ // Extract the trailing immediate argument that encodes the type discriminator
+ // for this overloaded intrinsic.
+ // TODO: Move to the parent code that takes care of argument processing.
+ const clang::Expr *arg = expr->getArg(expr->getNumArgs() - 1);
+ std::optional<llvm::APSInt> neonTypeConst = arg->getIntegerConstantExpr(ctx);
+ if (!neonTypeConst)
+ return nullptr;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags neonType(neonTypeConst->getZExtValue());
+ const bool hasLegalHalfType = cgf.getTarget().hasFastHalfType();
+
+ // The value of allowBFloatArgsAndRet is true for AArch64, but it should
+ // come from ABI info.
+ const bool allowBFloatArgsAndRet = false;
+ // FIXME
+ // getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
+
+ cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
+ false, allowBFloatArgsAndRet);
+ mlir::Type ty = vTy;
+ if (!ty)
+ return nullptr;
+
+ switch (builtinID) {
+ case NEON::BI__builtin_neon_splat_lane_v:
+ case NEON::BI__builtin_neon_splat_laneq_v:
+ case NEON::BI__builtin_neon_splatq_lane_v:
+ case NEON::BI__builtin_neon_splatq_laneq_v:
+ case NEON::BI__builtin_neon_vpadd_v:
+ case NEON::BI__builtin_neon_vpaddq_v:
+ case NEON::BI__builtin_neon_vabs_v:
+ case NEON::BI__builtin_neon_vabsq_v:
+ case NEON::BI__builtin_neon_vadd_v:
+ case NEON::BI__builtin_neon_vaddq_v:
+ case NEON::BI__builtin_neon_vaddhn_v:
+ case NEON::BI__builtin_neon_vcale_v:
+ case NEON::BI__builtin_neon_vcaleq_v:
+ case NEON::BI__builtin_neon_vcalt_v:
+ case NEON::BI__builtin_neon_vcaltq_v:
+ case NEON::BI__builtin_neon_vcage_v:
+ case NEON::BI__builtin_neon_vcageq_v:
+ case NEON::BI__builtin_neon_vcagt_v:
+ case NEON::BI__builtin_neon_vcagtq_v:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ case NEON::BI__builtin_neon_vceqz_v:
+ case NEON::BI__builtin_neon_vceqzq_v:
+ return emitAArch64CompareBuiltinExpr(cgf, cgf.getBuilder(), loc, ops[0],
+ vTy, cir::CmpOpKind::eq);
+ case NEON::BI__builtin_neon_vcgez_v:
+ case NEON::BI__builtin_neon_vcgezq_v:
+ case NEON::BI__builtin_neon_vclez_v:
+ case NEON::BI__builtin_neon_vclezq_v:
+ case NEON::BI__builtin_neon_vcgtz_v:
+ case NEON::BI__builtin_neon_vcgtzq_v:
+ case NEON::BI__builtin_neon_vcltz_v:
+ case NEON::BI__builtin_neon_vcltzq_v:
+ case NEON::BI__builtin_neon_vclz_v:
+ case NEON::BI__builtin_neon_vclzq_v:
+ case NEON::BI__builtin_neon_vcvt_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_f32_v:
+ case NEON::BI__builtin_neon_vcvt_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_f16_u16:
+ case NEON::BI__builtin_neon_vcvt_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvt_n_f16_u16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
+ case NEON::BI__builtin_neon_vcvtq_n_f16_u16:
+ case NEON::BI__builtin_neon_vcvt_n_f32_v:
+ case NEON::BI__builtin_neon_vcvt_n_f64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_f64_v:
+ case NEON::BI__builtin_neon_vcvt_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_n_s32_v:
+ case NEON::BI__builtin_neon_vcvt_n_u16_f16:
+ case NEON::BI__builtin_neon_vcvt_n_u32_v:
+ case NEON::BI__builtin_neon_vcvt_n_s64_v:
+ case NEON::BI__builtin_neon_vcvt_n_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_n_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
+ case NEON::BI__builtin_neon_vcvtq_n_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_n_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_n_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s32_v:
+ case NEON::BI__builtin_neon_vcvt_u32_v:
+ case NEON::BI__builtin_neon_vcvt_s64_v:
+ case NEON::BI__builtin_neon_vcvt_u64_v:
+ case NEON::BI__builtin_neon_vcvt_s16_f16:
+ case NEON::BI__builtin_neon_vcvt_u16_f16:
+ case NEON::BI__builtin_neon_vcvtq_s32_v:
+ case NEON::BI__builtin_neon_vcvtq_u32_v:
+ case NEON::BI__builtin_neon_vcvtq_s64_v:
+ case NEON::BI__builtin_neon_vcvtq_u64_v:
+ case NEON::BI__builtin_neon_vcvtq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtq_u16_f16:
+ case NEON::BI__builtin_neon_vcvta_s16_f16:
+ case NEON::BI__builtin_neon_vcvta_s32_v:
+ case NEON::BI__builtin_neon_vcvta_s64_v:
+ case NEON::BI__builtin_neon_vcvta_u16_f16:
+ case NEON::BI__builtin_neon_vcvta_u32_v:
+ case NEON::BI__builtin_neon_vcvta_u64_v:
+ case NEON::BI__builtin_neon_vcvtaq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtaq_s32_v:
+ case NEON::BI__builtin_neon_vcvtaq_s64_v:
+ case NEON::BI__builtin_neon_vcvtaq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtaq_u32_v:
+ case NEON::BI__builtin_neon_vcvtaq_u64_v:
+ case NEON::BI__builtin_neon_vcvtn_s16_f16:
+ case NEON::BI__builtin_neon_vcvtn_s32_v:
+ case NEON::BI__builtin_neon_vcvtn_s64_v:
+ case NEON::BI__builtin_neon_vcvtn_u16_f16:
+ case NEON::BI__builtin_neon_vcvtn_u32_v:
+ case NEON::BI__builtin_neon_vcvtn_u64_v:
+ case NEON::BI__builtin_neon_vcvtnq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtnq_s32_v:
+ case NEON::BI__builtin_neon_vcvtnq_s64_v:
+ case NEON::BI__builtin_neon_vcvtnq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtnq_u32_v:
+ case NEON::BI__builtin_neon_vcvtnq_u64_v:
+ case NEON::BI__builtin_neon_vcvtp_s16_f16:
+ case NEON::BI__builtin_neon_vcvtp_s32_v:
+ case NEON::BI__builtin_neon_vcvtp_s64_v:
+ case NEON::BI__builtin_neon_vcvtp_u16_f16:
+ case NEON::BI__builtin_neon_vcvtp_u32_v:
+ case NEON::BI__builtin_neon_vcvtp_u64_v:
+ case NEON::BI__builtin_neon_vcvtpq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtpq_s32_v:
+ case NEON::BI__builtin_neon_vcvtpq_s64_v:
+ case NEON::BI__builtin_neon_vcvtpq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtpq_u32_v:
+ case NEON::BI__builtin_neon_vcvtpq_u64_v:
+ case NEON::BI__builtin_neon_vcvtm_s16_f16:
+ case NEON::BI__builtin_neon_vcvtm_s32_v:
+ case NEON::BI__builtin_neon_vcvtm_s64_v:
+ case NEON::BI__builtin_neon_vcvtm_u16_f16:
+ case NEON::BI__builtin_neon_vcvtm_u32_v:
+ case NEON::BI__builtin_neon_vcvtm_u64_v:
+ case NEON::BI__builtin_neon_vcvtmq_s16_f16:
+ case NEON::BI__builtin_neon_vcvtmq_s32_v:
+ case NEON::BI__builtin_neon_vcvtmq_s64_v:
+ case NEON::BI__builtin_neon_vcvtmq_u16_f16:
+ case NEON::BI__builtin_neon_vcvtmq_u32_v:
+ case NEON::BI__builtin_neon_vcvtmq_u64_v:
+ case NEON::BI__builtin_neon_vcvtx_f32_v:
+ case NEON::BI__builtin_neon_vext_v:
+ case NEON::BI__builtin_neon_vextq_v:
+ case NEON::BI__builtin_neon_vfma_v:
+ case NEON::BI__builtin_neon_vfmaq_v:
+ case NEON::BI__builtin_neon_vld1_v:
+ case NEON::BI__builtin_neon_vld1q_v:
+ case NEON::BI__builtin_neon_vld1_x2_v:
+ case NEON::BI__builtin_neon_vld1q_x2_v:
+ case NEON::BI__builtin_neon_vld1_x3_v:
+ case NEON::BI__builtin_neon_vld1q_x3_v:
+ case NEON::BI__builtin_neon_vld1_x4_v:
+ case NEON::BI__builtin_neon_vld1q_x4_v:
+ case NEON::BI__builtin_neon_vld2_v:
+ case NEON::BI__builtin_neon_vld2q_v:
+ case NEON::BI__builtin_neon_vld3_v:
+ case NEON::BI__builtin_neon_vld3q_v:
+ case NEON::BI__builtin_neon_vld4_v:
+ case NEON::BI__builtin_neon_vld4q_v:
+ case NEON::BI__builtin_neon_vld2_dup_v:
+ case NEON::BI__builtin_neon_vld2q_dup_v:
+ case NEON::BI__builtin_neon_vld3_dup_v:
+ case NEON::BI__builtin_neon_vld3q_dup_v:
+ case NEON::BI__builtin_neon_vld4_dup_v:
+ case NEON::BI__builtin_neon_vld4q_dup_v:
+ case NEON::BI__builtin_neon_vld1_dup_v:
+ case NEON::BI__builtin_neon_vld1q_dup_v:
+ case NEON::BI__builtin_neon_vld2_lane_v:
+ case NEON::BI__builtin_neon_vld2q_lane_v:
+ case NEON::BI__builtin_neon_vld3_lane_v:
+ case NEON::BI__builtin_neon_vld3q_lane_v:
+ case NEON::BI__builtin_neon_vld4_lane_v:
+ case NEON::BI__builtin_neon_vld4q_lane_v:
+ case NEON::BI__builtin_neon_vmovl_v:
+ case NEON::BI__builtin_neon_vmovn_v:
+ case NEON::BI__builtin_neon_vmull_v:
+ case NEON::BI__builtin_neon_vpadal_v:
+ case NEON::BI__builtin_neon_vpadalq_v:
+ case NEON::BI__builtin_neon_vpaddl_v:
+ case NEON::BI__builtin_neon_vpaddlq_v:
+ case NEON::BI__builtin_neon_vqdmlal_v:
+ case NEON::BI__builtin_neon_vqdmlsl_v:
+ case NEON::BI__builtin_neon_vqdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqdmulh_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
+ case NEON::BI__builtin_neon_vqrdmulh_lane_v:
+ case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqdmulh_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
+ case NEON::BI__builtin_neon_vqrdmulh_laneq_v:
+ case NEON::BI__builtin_neon_vqshl_n_v:
+ case NEON::BI__builtin_neon_vqshlq_n_v:
+ case NEON::BI__builtin_neon_vqshlu_n_v:
+ case NEON::BI__builtin_neon_vqshluq_n_v:
+ case NEON::BI__builtin_neon_vrecpe_v:
+ case NEON::BI__builtin_neon_vrecpeq_v:
+ case NEON::BI__builtin_neon_vrsqrte_v:
+ case NEON::BI__builtin_neon_vrsqrteq_v:
+ case NEON::BI__builtin_neon_vrndi_v:
+ case NEON::BI__builtin_neon_vrndiq_v:
+ case NEON::BI__builtin_neon_vrshr_n_v:
+ case NEON::BI__builtin_neon_vrshrq_n_v:
+ case NEON::BI__builtin_neon_vsha512hq_u64:
+ case NEON::BI__builtin_neon_vsha512h2q_u64:
+ case NEON::BI__builtin_neon_vsha512su0q_u64:
+ case NEON::BI__builtin_neon_vsha512su1q_u64:
+ case NEON::BI__builtin_neon_vshl_n_v:
+ case NEON::BI__builtin_neon_vshlq_n_v:
+ case NEON::BI__builtin_neon_vshll_n_v:
+ case NEON::BI__builtin_neon_vshrn_n_v:
+ case NEON::BI__builtin_neon_vshr_n_v:
+ case NEON::BI__builtin_neon_vshrq_n_v:
+ case NEON::BI__builtin_neon_vst1_v:
+ case NEON::BI__builtin_neon_vst1q_v:
+ case NEON::BI__builtin_neon_vst2_v:
+ case NEON::BI__builtin_neon_vst2q_v:
+ case NEON::BI__builtin_neon_vst3_v:
+ case NEON::BI__builtin_neon_vst3q_v:
+ case NEON::BI__builtin_neon_vst4_v:
+ case NEON::BI__builtin_neon_vst4q_v:
+ case NEON::BI__builtin_neon_vst2_lane_v:
+ case NEON::BI__builtin_neon_vst2q_lane_v:
+ case NEON::BI__builtin_neon_vst3_lane_v:
+ case NEON::BI__builtin_neon_vst3q_lane_v:
+ case NEON::BI__builtin_neon_vst4_lane_v:
+ case NEON::BI__builtin_neon_vst4q_lane_v:
+ case NEON::BI__builtin_neon_vsm3partw1q_u32:
+ case NEON::BI__builtin_neon_vsm3partw2q_u32:
+ case NEON::BI__builtin_neon_vsm3ss1q_u32:
+ case NEON::BI__builtin_neon_vsm4ekeyq_u32:
+ case NEON::BI__builtin_neon_vsm4eq_u32:
+ case NEON::BI__builtin_neon_vsm3tt1aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt1bq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2aq_u32:
+ case NEON::BI__builtin_neon_vsm3tt2bq_u32:
+ case NEON::BI__builtin_neon_vst1_x2_v:
+ case NEON::BI__builtin_neon_vst1q_x2_v:
+ case NEON::BI__builtin_neon_vst1_x3_v:
+ case NEON::BI__builtin_neon_vst1q_x3_v:
+ case NEON::BI__builtin_neon_vst1_x4_v:
+ case NEON::BI__builtin_neon_vst1q_x4_v:
+ case NEON::BI__builtin_neon_vsubhn_v:
+ case NEON::BI__builtin_neon_vtrn_v:
+ case NEON::BI__builtin_neon_vtrnq_v:
+ case NEON::BI__builtin_neon_vtst_v:
+ case NEON::BI__builtin_neon_vtstq_v:
+ case NEON::BI__builtin_neon_vuzp_v:
+ case NEON::BI__builtin_neon_vuzpq_v:
+ case NEON::BI__builtin_neon_vxarq_u64:
+ case NEON::BI__builtin_neon_vzip_v:
+ case NEON::BI__builtin_neon_vzipq_v:
+ case NEON::BI__builtin_neon_vdot_s32:
+ case NEON::BI__builtin_neon_vdot_u32:
+ case NEON::BI__builtin_neon_vdotq_s32:
+ case NEON::BI__builtin_neon_vdotq_u32:
+ case NEON::BI__builtin_neon_vfmlal_low_f16:
+ case NEON::BI__builtin_neon_vfmlalq_low_f16:
+ case NEON::BI__builtin_neon_vfmlsl_low_f16:
+ case NEON::BI__builtin_neon_vfmlslq_low_f16:
+ case NEON::BI__builtin_neon_vfmlal_high_f16:
+ case NEON::BI__builtin_neon_vfmlalq_high_f16:
+ case NEON::BI__builtin_neon_vfmlsl_high_f16:
+ case NEON::BI__builtin_neon_vfmlslq_high_f16:
+ case NEON::BI__builtin_neon_vmmlaq_s32:
+ case NEON::BI__builtin_neon_vmmlaq_u32:
+ case NEON::BI__builtin_neon_vusmmlaq_s32:
+ case NEON::BI__builtin_neon_vusdot_s32:
+ case NEON::BI__builtin_neon_vusdotq_s32:
+ case NEON::BI__builtin_neon_vbfdot_f32:
+ case NEON::BI__builtin_neon_vbfdotq_f32:
+ case NEON::BI__builtin_neon___a32_vcvt_bf16_f32:
+ default:
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+
+ cgf.cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented AArch64 builtin call: ") +
+ ctx.BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ }
+}
+
// Emit an intrinsic where all operands are of the same type as the result.
// Depending on mode, this may be a constrained floating-point intrinsic.
static mlir::Value
@@ -298,64 +1002,6 @@ static bool hasExtraNeonArgument(unsigned builtinID) {
return mask != 0;
}
-// TODO(cir): Remove `loc` from the list of arguments once all NYIs are gone.
-static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
- mlir::Location loc,
- bool hasLegalHalfType = true,
- bool v1Ty = false,
- bool allowBFloatArgsAndRet = true) {
- int isQuad = typeFlags.isQuad();
- switch (typeFlags.getEltType()) {
- case NeonTypeFlags::Int8:
- case NeonTypeFlags::Poly8:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
- : cgf->sInt8Ty,
- v1Ty ? 1 : (8 << isQuad));
- case NeonTypeFlags::MFloat8:
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: MFloat8"));
- [[fallthrough]];
- case NeonTypeFlags::Int16:
- case NeonTypeFlags::Poly16:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
- : cgf->sInt16Ty,
- v1Ty ? 1 : (4 << isQuad));
- case NeonTypeFlags::BFloat16:
- if (allowBFloatArgsAndRet)
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
- else
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: BFloat16"));
- [[fallthrough]];
- case NeonTypeFlags::Float16:
- if (hasLegalHalfType)
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
- else
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Float16"));
- [[fallthrough]];
- case NeonTypeFlags::Int32:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
- : cgf->sInt32Ty,
- v1Ty ? 1 : (2 << isQuad));
- case NeonTypeFlags::Int64:
- case NeonTypeFlags::Poly64:
- return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
- : cgf->sInt64Ty,
- v1Ty ? 1 : (1 << isQuad));
- case NeonTypeFlags::Poly128:
- // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
- // There is a lot of i128 and f128 API missing.
- // so we use v16i8 to represent poly128 and get pattern matched.
- cgf->getCIRGenModule().errorNYI(loc, std::string("NEON type: Poly128"));
- [[fallthrough]];
- case NeonTypeFlags::Float32:
- return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
- v1Ty ? 1 : (2 << isQuad));
- case NeonTypeFlags::Float64:
- return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
- v1Ty ? 1 : (1 << isQuad));
- }
- llvm_unreachable("Unknown vector element type!");
-}
-
// TODO(cir): Remove `cgm` from the list of arguments once all NYI(s) are gone.
template <typename Operation>
static mlir::Value
@@ -1585,6 +2231,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
mlir::Location loc = getLoc(expr->getExprLoc());
+ // Not all intrinsics handled by the common case work for AArch64 yet, so only
+ // defer to common code if it's been added to our special map.
+ const armVectorIntrinsicInfo *builtin;
+ builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
+ aarch64SIMDIntrinsicsProvenSorted);
+
+ if (builtin)
+ return emitCommonNeonBuiltinExpr(
+ *this, builtin->builtinID, builtin->llvmIntrinsic,
+ builtin->altLLVMIntrinsic, builtin->nameHint, builtin->typeModifier,
+ expr, ops);
+
// Handle non-overloaded intrinsics first.
switch (builtinID) {
default:
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
index 19c407545b961..b15d7f699119c 100644
--- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -102,6 +102,8 @@ class CIRGenConsumer : public clang::ASTConsumer {
if (!FEOptions.ClangIRDisableCIRVerifier) {
if (!Gen->verifyModule()) {
+ // HACK!!
+ Gen->getModule().dump();
CI.getDiagnostics().Report(
diag::err_cir_verification_failed_pre_passes);
llvm::report_fatal_error(
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index aa95e92b9f2e9..52261c97b0d2e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -561,6 +561,12 @@ enum {
AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
};
+//===----------------------------------------------------------------------===//
+// Intrinsic maps
+//
+// Maps that help automate code-generation.
+//===----------------------------------------------------------------------===//
+
namespace {
struct ARMVectorIntrinsicInfo {
const char *NameHint;
diff --git a/clang/test/CodeGen/AArch64/neon-misc.c b/clang/test/CodeGen/AArch64/neon-misc.c
index 6eadaaf27a210..ac2c83aa03ccf 100644
--- a/clang/test/CodeGen/AArch64/neon-misc.c
+++ b/clang/test/CodeGen/AArch64/neon-misc.c
@@ -7,313 +7,8 @@
#include <arm_neon.h>
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_s8(int8x8_t a) {
- return vceqz_s8(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
-//
-uint16x4_t test_vceqz_s16(int16x4_t a) {
- return vceqz_s16(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_s32(int32x2_t a) {
- return vceqz_s32(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_s64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_s64(int64x1_t a) {
- return vceqz_s64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_u64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_u64(uint64x1_t a) {
- return vceqz_u64(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_p64(
-// CHECK-SAME: <1 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_p64(poly64x1_t a) {
- return vceqz_p64(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_s8(int8x16_t a) {
- return vceqzq_s8(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
-//
-uint16x8_t test_vceqzq_s16(int16x8_t a) {
- return vceqzq_s16(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_s32(int32x4_t a) {
- return vceqzq_s32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_s64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_s64(int64x2_t a) {
- return vceqzq_s64(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_u8(uint8x8_t a) {
- return vceqz_u8(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vceqz_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: ret <4 x i16> [[VCEQZ_I]]
-//
-uint16x4_t test_vceqz_u16(uint16x4_t a) {
- return vceqz_u16(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_u32(uint32x2_t a) {
- return vceqz_u32(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_u8(uint8x16_t a) {
- return vceqzq_u8(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vceqzq_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[VCEQZ_I]]
-//
-uint16x8_t test_vceqzq_u16(uint16x8_t a) {
- return vceqzq_u16(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_u32(uint32x4_t a) {
- return vceqzq_u32(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_u64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_u64(uint64x2_t a) {
- return vceqzq_u64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vceqz_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
-// CHECK-NEXT: ret <2 x i32> [[VCEQZ_I]]
-//
-uint32x2_t test_vceqz_f32(float32x2_t a) {
- return vceqz_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <1 x i64> @test_vceqz_f64(
-// CHECK-SAME: <1 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
-// CHECK-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
-// CHECK-NEXT: ret <1 x i64> [[VCEQZ_I]]
-//
-uint64x1_t test_vceqz_f64(float64x1_t a) {
- return vceqz_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vceqzq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[VCEQZ_I]]
-//
-uint32x4_t test_vceqzq_f32(float32x4_t a) {
- return vceqzq_f32(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vceqz_p8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[VCEQZ_I]]
-//
-uint8x8_t test_vceqz_p8(poly8x8_t a) {
- return vceqz_p8(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vceqzq_p8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: ret <16 x i8> [[VCEQZ_I]]
-//
-uint8x16_t test_vceqzq_p8(poly8x16_t a) {
- return vceqzq_p8(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
-// CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_f64(float64x2_t a) {
- return vceqzq_f64(a);
-}
-
-// CHECK-LABEL: define dso_local <2 x i64> @test_vceqzq_p64(
-// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-// CHECK-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[VCEQZ_I]]
-//
-uint64x2_t test_vceqzq_p64(poly64x2_t a) {
- return vceqzq_p64(a);
-}
-
// CHECK-LABEL: define dso_local <8 x i8> @test_vcgez_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <8 x i8> [[A]], zeroinitializer
// CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 87f56f7997ce9..41f3ba6fda260 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -16,6 +16,20 @@
#include <arm_neon.h>
+// LLVM-LABEL: @test_vnegd_s64
+// CIR-LABEL: @vnegd_s64
+int64_t test_vnegd_s64(int64_t a) {
+// CIR: cir.unary(minus, {{.*}}) : !s64
+
+// LLVM-SAME: i64 {{.*}} [[A:%.*]])
+// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]]
+// LLVM-NEXT: ret i64 [[VNEGD_I]]
+ return (int64_t)vnegd_s64(a);
+}
+
+//===------------------------------------------------------===//
+// 2.1.2.2 Bitwise equal to zero
+//===------------------------------------------------------===//
// LLVM-LABEL: @test_vceqzd_s64
// CIR-LABEL: @vceqzd_s64
uint64_t test_vceqzd_s64(int64_t a) {
@@ -31,15 +45,363 @@ uint64_t test_vceqzd_s64(int64_t a) {
return (uint64_t)vceqzd_s64(a);
}
-// LLVM-LABEL: @test_vnegd_s64
-// CIR-LABEL: @vnegd_s64
-int64_t test_vnegd_s64(int64_t a) {
-// CIR: cir.unary(minus, {{.*}}) : !s64
+// LLVM-LABEL: @test_vceqz_s8(
+// CIR-LABEL: @vceqz_s8(
+uint8x8_t test_vceqz_s8(int8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_s8(a);
+}
-// LLVM-SAME: i64{{.*}} [[A:%.*]])
-// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]]
-// LLVM-NEXT: ret i64 [[VNEGD_I]]
- return (int64_t)vnegd_s64(a);
+// LLVM-LABEL: @test_vceqz_s16(
+// CIR-LABEL: @vceqz_s16(
+uint16x4_t test_vceqz_s16(int16x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !s16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !s16i>, !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]]
+ return vceqz_s16(a);
+}
+
+// LLVM-LABEL: @test_vceqz_s32(
+// CIR-LABEL: @vceqz_s32(
+uint32x2_t test_vceqz_s32(int32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s32i>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_s32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_s64(
+// CIR-LABEL: @vceqz_s64(
+uint64x1_t test_vceqz_s64(int64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_s64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u64(
+// CIR-LABEL: @vceqz_u64(
+uint64x1_t test_vceqz_u64(uint64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !u64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !u64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !u64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_u64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_p64(
+// CIR-LABEL: @vceqz_p64(
+uint64x1_t test_vceqz_p64(poly64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !s64i>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_p64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s8(
+// CIR-LABEL: @vceqzq_s8(
+uint8x16_t test_vceqzq_s8(int8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_s8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s16(
+// CIR-LABEL: @vceqzq_s16(
+uint16x8_t test_vceqzq_s16(int16x8_t a) {
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]]
+ return vceqzq_s16(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s32(
+// CIR-LABEL: @vceqzq_s32(
+uint32x4_t test_vceqzq_s32(int32x4_t a) {
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_s32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_s64(
+// CIR-LABEL: @vceqzq_s64(
+uint64x2_t test_vceqzq_s64(int64x2_t a) {
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_s64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u8(
+// CIR-LABEL: @vceqz_u8(
+uint8x8_t test_vceqz_u8(uint8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_u8(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u16(
+// CIR-LABEL: @vceqz_u16(
+uint16x4_t test_vceqz_u16(uint16x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u16i>, !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
+// LLVM-NEXT: ret <4 x i16> [[VCEQZ_I]]
+ return vceqz_u16(a);
+}
+
+// LLVM-LABEL: @test_vceqz_u32(
+// CIR-LABEL: @vceqz_u32(
+uint32x2_t test_vceqz_u32(uint32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u32i>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_u32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u8(
+// CIR-LABEL: @vceqzq_u8(
+uint8x16_t test_vceqzq_u8(uint8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !u8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !u8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_u8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u16(
+// CIR-LABEL: @vceqzq_u16(
+uint16x8_t test_vceqzq_u16(uint16x8_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !u16i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
+// LLVM-NEXT: ret <8 x i16> [[VCEQZ_I]]
+ return vceqzq_u16(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u32(
+// CIR-LABEL: @vceqzq_u32(
+uint32x4_t test_vceqzq_u32(uint32x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !u32i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_u32(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_u64(
+// CIR-LABEL: @vceqzq_u64(
+uint64x2_t test_vceqzq_u64(uint64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !u64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !u64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_u64(a);
+}
+
+// LLVM-LABEL: @test_vceqz_f32(
+// CIR-LABEL: @vceqz_f32(
+uint32x2_t test_vceqz_f32(float32x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.float>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.float>, !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// LLVM-NEXT: ret <2 x i32> [[VCEQZ_I]]
+ return vceqz_f32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_f64(
+// CIR-LABEL: @vceqz_f64(
+uint64x1_t test_vceqz_f64(float64x1_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !s8i> -> !cir.vector<1 x !cir.double>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<1 x !cir.double>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<1 x !cir.double>, !cir.vector<1 x !s64i>
+
+// LLVM-SAME: <1 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[A]] to i64
+// LLVM-NEXT: [[__P0_ADDR_I_SROA_0_0_VEC_INSERT:%.*]] = insertelement <1 x i64> undef, i64 [[TMP0]], i32 0
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[__P0_ADDR_I_SROA_0_0_VEC_INSERT]] to <8 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <1 x double> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// LLVM-NEXT: ret <1 x i64> [[VCEQZ_I]]
+ return vceqz_f64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_f32(
+// CIR-LABEL: @vceqzq_f32(
+uint32x4_t test_vceqzq_f32(float32x4_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.float>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<4 x !cir.float>, !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// LLVM-NEXT: ret <4 x i32> [[VCEQZ_I]]
+ return vceqzq_f32(a);
+}
+
+// LLVM-LABEL: @test_vceqz_p8(
+// CIR-LABEL: @vceqz_p8(
+uint8x8_t test_vceqz_p8(poly8x8_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<8 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <8 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
+// LLVM-NEXT: ret <8 x i8> [[VCEQZ_I]]
+ return vceqz_p8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_p8(
+// CIR-LABEL: @vceqzq_p8(
+uint8x16_t test_vceqzq_p8(poly8x16_t a) {
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<16 x !s8i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = icmp eq <16 x i8> [[A]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: ret <16 x i8> [[VCEQZ_I]]
+ return vceqzq_p8(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_f64(
+// CIR-LABEL: @vceqzq_f64(
+uint64x2_t test_vceqzq_f64(float64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !cir.double>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.double>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !cir.double>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// LLVM-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x double> [[TMP2]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_f64(a);
+}
+
+// LLVM-LABEL: @test_vceqzq_p64(
+// CIR-LABEL: @vceqzq_p64(
+uint64x2_t test_vceqzq_p64(poly64x2_t a) {
+// CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
+// CIR: [[C_0:%.*]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+// CIR: cir.vec.cmp(eq, {{%.*}}, [[C_0]]) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
+
+// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]]) #[[ATTR0]] {
+// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// LLVM-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+// LLVM-NEXT: [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
+// LLVM-NEXT: ret <2 x i64> [[VCEQZ_I]]
+ return vceqzq_p64(a);
}
//===------------------------------------------------------===//
>From ea1bbc72e19c220ca6866c429659eb84ed4faa95 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 5 Mar 2026 16:15:50 +0000
Subject: [PATCH 2/2] Address PR comments
---
clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 6 +++---
clang/lib/CIR/FrontendAction/CIRGenAction.cpp | 2 --
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index d495b02eb7fbd..693845e09e82e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -97,7 +97,7 @@ struct ARMVectorIntrinsicInfo {
{#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \
Intrinsic::AltLLVMIntrinsic, TypeModifier}
-static const armVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(splat_lane_v),
NEONMAP0(splat_laneq_v),
NEONMAP0(splatq_lane_v),
@@ -415,7 +415,7 @@ static const armVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
#define SVEMAP2(NameBase, TypeModifier) \
{#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
-static const armVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
+static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
#define GET_SVE_LLVM_INTRINSIC_MAP
#include "clang/Basic/arm_sve_builtin_cg.inc"
#undef GET_SVE_LLVM_INTRINSIC_MAP
@@ -2233,7 +2233,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
// Not all intrinsics handled by the common case work for AArch64 yet, so only
// defer to common code if it's been added to our special map.
- const armVectorIntrinsicInfo *builtin;
+ const ARMVectorIntrinsicInfo *builtin;
builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
aarch64SIMDIntrinsicsProvenSorted);
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
index b15d7f699119c..19c407545b961 100644
--- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -102,8 +102,6 @@ class CIRGenConsumer : public clang::ASTConsumer {
if (!FEOptions.ClangIRDisableCIRVerifier) {
if (!Gen->verifyModule()) {
- // HACK!!
- Gen->getModule().dump();
CI.getDiagnostics().Report(
diag::err_cir_verification_failed_pre_passes);
llvm::report_fatal_error(
More information about the llvm-branch-commits
mailing list