r201848 - ARM & AArch64: use table for EmitCommonNeonBuiltinExpr
Tim Northover
tnorthover at apple.com
Fri Feb 21 03:57:25 PST 2014
Author: tnorthover
Date: Fri Feb 21 05:57:24 2014
New Revision: 201848
URL: http://llvm.org/viewvc/llvm-project?rev=201848&view=rev
Log:
ARM & AArch64: use table for EmitCommonNeonBuiltinExpr
This extends the intrinsic lookup table format slightly, and adds
entries for use the shared ARM/AArch64 definitions. The benefit is
currently smaller than for the SISD intrinsics (there's more custom
code implementing this set), but a few lines are saved and there's
scope for future expansion.
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=201848&r1=201847&r2=201848&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Feb 21 05:57:24 2014
@@ -1754,18 +1754,858 @@ CodeGenFunction::EmitPointerWithAlignmen
return std::make_pair(EmitScalarExpr(Addr), Align);
}
-Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E,
- SmallVectorImpl<Value *> &Ops,
- llvm::Value *Align) {
+enum {
+ AddRetType = (1 << 0),
+ Add1ArgType = (1 << 1),
+ Add2ArgTypes = (1 << 2),
+
+ VectorizeRetType = (1 << 3),
+ VectorizeArgTypes = (1 << 4),
+
+ InventFloatType = (1 << 5),
+ UnsignedAlts = (1 << 6),
+
+ Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+ VectorRet = AddRetType | VectorizeRetType,
+ VectorRetGetArgs01 =
+ AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+ FpCmpzModifiers =
+ AddRetType | VectorizeRetType | Add1ArgType | InventFloatType,
+};
+
+ struct NeonIntrinsicInfo {
+ unsigned BuiltinID;
+ unsigned LLVMIntrinsic;
+ unsigned AltLLVMIntrinsic;
+ const char *NameHint;
+ unsigned TypeModifier;
+
+ bool operator<(unsigned RHSBuiltinID) const {
+ return BuiltinID < RHSBuiltinID;
+ }
+};
+
+#define NEONMAP0(NameBase) \
+ { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ { NEON:: BI__builtin_neon_ ## NameBase, \
+ Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
+ { NEON:: BI__builtin_neon_ ## NameBase, \
+ Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
+ #NameBase, TypeModifier }
+
+static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
+ NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType),
+ NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType),
+ NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0),
+ NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0),
+ NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0),
+ NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+ NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
+ NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers),
+ NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
+ NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers),
+ NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01),
+ NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers),
+ NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers),
+ NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
+ NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers),
+ NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers),
+ NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01),
+ NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01),
+ NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
+ NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers),
+ NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01),
+ NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers),
+ NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
+ NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
+ NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
+ NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers),
+ NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01),
+ NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers),
+ NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+ NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+ NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+ NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0),
+ NEONMAP0(vdupb_lane_i8),
+ NEONMAP0(vdupb_laneq_i8),
+ NEONMAP0(vdupd_lane_f64),
+ NEONMAP0(vdupd_lane_i64),
+ NEONMAP0(vdupd_laneq_f64),
+ NEONMAP0(vdupd_laneq_i64),
+ NEONMAP0(vduph_lane_i16),
+ NEONMAP0(vduph_laneq_i16),
+ NEONMAP0(vdups_lane_f32),
+ NEONMAP0(vdups_lane_i32),
+ NEONMAP0(vdups_laneq_f32),
+ NEONMAP0(vdups_laneq_i32),
+ NEONMAP0(vfmad_lane_f64),
+ NEONMAP0(vfmad_laneq_f64),
+ NEONMAP0(vfmas_lane_f32),
+ NEONMAP0(vfmas_laneq_f32),
+ NEONMAP0(vget_lane_f32),
+ NEONMAP0(vget_lane_f64),
+ NEONMAP0(vget_lane_i16),
+ NEONMAP0(vget_lane_i32),
+ NEONMAP0(vget_lane_i64),
+ NEONMAP0(vget_lane_i8),
+ NEONMAP0(vgetq_lane_f32),
+ NEONMAP0(vgetq_lane_f64),
+ NEONMAP0(vgetq_lane_i16),
+ NEONMAP0(vgetq_lane_i32),
+ NEONMAP0(vgetq_lane_i64),
+ NEONMAP0(vgetq_lane_i8),
+ NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0),
+ NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0),
+ NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+ NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0),
+ NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0),
+ NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
+ NEONMAP0(vmul_n_f64),
+ NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0),
+ NEONMAP0(vmulxd_f64),
+ NEONMAP0(vmulxs_f32),
+ NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0),
+ NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0),
+ NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0),
+ NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+ NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+ NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet),
+ NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet),
+ NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet),
+ NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet),
+ NEONMAP0(vqdmlalh_lane_s16),
+ NEONMAP0(vqdmlalh_laneq_s16),
+ NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet),
+ NEONMAP0(vqdmlals_lane_s32),
+ NEONMAP0(vqdmlals_laneq_s32),
+ NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet),
+ NEONMAP0(vqdmlslh_lane_s16),
+ NEONMAP0(vqdmlslh_laneq_s16),
+ NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet),
+ NEONMAP0(vqdmlsls_lane_s32),
+ NEONMAP0(vqdmlsls_laneq_s32),
+ NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet),
+ NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet),
+ NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet),
+ NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet),
+ NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet),
+ NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet),
+ NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet),
+ NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet),
+ NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet),
+ NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet),
+ NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet),
+ NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet),
+ NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet),
+ NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet),
+ NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet),
+ NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet),
+ NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet),
+ NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet),
+ NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet),
+ NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet),
+ NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet),
+ NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet),
+ NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet),
+ NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet),
+ NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet),
+ NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet),
+ NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType),
+ NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType),
+ NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType),
+ NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType),
+ NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType),
+ NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType),
+ NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0),
+ NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0),
+ NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet),
+ NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet),
+ NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType),
+ NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType),
+ NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType),
+ NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType),
+ NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0),
+ NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0),
+ NEONMAP0(vset_lane_f32),
+ NEONMAP0(vset_lane_f64),
+ NEONMAP0(vset_lane_i16),
+ NEONMAP0(vset_lane_i32),
+ NEONMAP0(vset_lane_i64),
+ NEONMAP0(vset_lane_i8),
+ NEONMAP0(vsetq_lane_f32),
+ NEONMAP0(vsetq_lane_f64),
+ NEONMAP0(vsetq_lane_i16),
+ NEONMAP0(vsetq_lane_i32),
+ NEONMAP0(vsetq_lane_i64),
+ NEONMAP0(vsetq_lane_i8),
+ NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0),
+ NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0),
+ NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0),
+ NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0),
+ NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0),
+ NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0),
+ NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0),
+ NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0),
+ NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0),
+ NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0),
+ NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet),
+ NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet),
+ NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet),
+ NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0),
+ NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0),
+ NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet),
+ NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet),
+ NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0),
+ NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0),
+ NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01),
+ NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01),
+ NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet),
+ NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet)
+};
+
+static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+ NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vabs_v, arm_neon_vabs, 0),
+ NEONMAP1(vabsq_v, arm_neon_vabs, 0),
+ NEONMAP0(vaddhn_v),
+ NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
+ NEONMAP1(vaeseq_v, arm_neon_aese, 0),
+ NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
+ NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
+ NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vcage_v, arm_neon_vacge, 0),
+ NEONMAP1(vcageq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcale_v, arm_neon_vacge, 0),
+ NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0),
+ NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvt_s32_v),
+ NEONMAP0(vcvt_s64_v),
+ NEONMAP0(vcvt_u32_v),
+ NEONMAP0(vcvt_u64_v),
+ NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_s32_v),
+ NEONMAP0(vcvtq_s64_v),
+ NEONMAP0(vcvtq_u32_v),
+ NEONMAP0(vcvtq_u64_v),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vld1_dup_v),
+ NEONMAP1(vld1_v, arm_neon_vld1, 0),
+ NEONMAP0(vld1q_dup_v),
+ NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+ NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2_v, arm_neon_vld2, 0),
+ NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+ NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3_v, arm_neon_vld3, 0),
+ NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+ NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4_v, arm_neon_vld4, 0),
+ NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4q_v, arm_neon_vld4, 0),
+ NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP0(vmull_v),
+ NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
+ NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
+ NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
+ NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
+ NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
+ NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
+ NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
+ NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
+ NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
+ NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP1(vst1_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+ NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2_v, arm_neon_vst2, 0),
+ NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2q_v, arm_neon_vst2, 0),
+ NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3_v, arm_neon_vst3, 0),
+ NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3q_v, arm_neon_vst3, 0),
+ NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4_v, arm_neon_vst4, 0),
+ NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4q_v, arm_neon_vst4, 0),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtrn_v),
+ NEONMAP0(vtrnq_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+ NEONMAP0(vuzp_v),
+ NEONMAP0(vuzpq_v),
+ NEONMAP0(vzip_v),
+ NEONMAP0(vzipq_v),
+};
+
+#undef NEONMAP0
+#undef NEONMAP1
+#undef NEONMAP2
+
+static bool NEONSIMDIntrinsicsProvenSorted = false;
+
+static bool AArch64SISDIntrinsicInfoProvenSorted = false;
+
+static const NeonIntrinsicInfo *
+findNeonIntrinsicInMap(llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
+ unsigned BuiltinID, bool &MapProvenSorted) {
+
+#ifndef NDEBUG
+ if (!MapProvenSorted) {
+ // FIXME: use std::is_sorted once C++11 is allowed
+ for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
+ assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
+ MapProvenSorted = true;
+ }
+#endif
+
+ const NeonIntrinsicInfo *Builtin =
+ std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
+
+ if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
+ return Builtin;
+
+ return 0;
+}
+
+Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+ unsigned Modifier,
+ llvm::Type *ArgType,
+ const CallExpr *E) {
+ // Return type.
+ SmallVector<llvm::Type *, 3> Tys;
+ if (Modifier & AddRetType) {
+ llvm::Type *Ty = ConvertType(E->getCallReturnType());
+ if (Modifier & VectorizeRetType)
+ Ty = llvm::VectorType::get(Ty, 1);
+
+ Tys.push_back(Ty);
+ }
+
+ // Arguments.
+ if (Modifier & VectorizeArgTypes)
+ ArgType = llvm::VectorType::get(ArgType, 1);
+
+ if (Modifier & (Add1ArgType | Add2ArgTypes))
+ Tys.push_back(ArgType);
+
+ if (Modifier & Add2ArgTypes)
+ Tys.push_back(ArgType);
+
+ if (Modifier & InventFloatType)
+ Tys.push_back(FloatTy);
+
+ return CGM.getIntrinsic(IntrinsicID, Tys);
+}
+
+
+static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
+ const NeonIntrinsicInfo &SISDInfo,
+ const CallExpr *E) {
+ unsigned BuiltinID = SISDInfo.BuiltinID;
+ unsigned int Int = SISDInfo.LLVMIntrinsic;
+ unsigned IntTypes = SISDInfo.TypeModifier;
+ const char *s = SISDInfo.NameHint;
+
+ SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+ }
+
+ // AArch64 scalar builtins are not overloaded, they do not have an extra
+ // argument that specifies the vector type, need to handle each case.
+ switch (BuiltinID) {
+ default: break;
+ case NEON::BI__builtin_neon_vdups_lane_f32:
+ case NEON::BI__builtin_neon_vdupd_lane_f64:
+ case NEON::BI__builtin_neon_vdups_laneq_f32:
+ case NEON::BI__builtin_neon_vdupd_laneq_f64: {
+ return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
+ }
+ case NEON::BI__builtin_neon_vdupb_lane_i8:
+ case NEON::BI__builtin_neon_vduph_lane_i16:
+ case NEON::BI__builtin_neon_vdups_lane_i32:
+ case NEON::BI__builtin_neon_vdupd_lane_i64:
+ case NEON::BI__builtin_neon_vdupb_laneq_i8:
+ case NEON::BI__builtin_neon_vduph_laneq_i16:
+ case NEON::BI__builtin_neon_vdups_laneq_i32:
+ case NEON::BI__builtin_neon_vdupd_laneq_i64: {
+ // The backend treats Neon scalar types as v1ix types
+ // So we want to dup lane from any vector to v1ix vector
+ // with shufflevector
+ s = "vdup_lane";
+ Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
+ Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ // AArch64 intrinsic one-element vector type cast to
+ // scalar type expected by the builtin
+ return CGF.Builder.CreateBitCast(Result, Ty, s);
+ }
+ case NEON::BI__builtin_neon_vqdmlalh_lane_s16 :
+ case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 :
+ case NEON::BI__builtin_neon_vqdmlals_lane_s32 :
+ case NEON::BI__builtin_neon_vqdmlals_laneq_s32 :
+ case NEON::BI__builtin_neon_vqdmlslh_lane_s16 :
+ case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 :
+ case NEON::BI__builtin_neon_vqdmlsls_lane_s32 :
+ case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : {
+ Int = Intrinsic::arm_neon_vqadds;
+ if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 ||
+ BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) {
+ Int = Intrinsic::arm_neon_vqsubs;
+ }
+ // create vqdmull call with b * c[i]
+ llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
+ llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
+ Ty = CGF.ConvertType(E->getArg(0)->getType());
+ llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
+ Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
+ Value *V = UndefValue::get(OpVTy);
+ llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
+ SmallVector<Value *, 2> MulOps;
+ MulOps.push_back(Ops[1]);
+ MulOps.push_back(Ops[2]);
+ MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
+ MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
+ MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
+ Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
+ // create vqadds call with a +/- vqdmull result
+ F = CGF.CGM.getIntrinsic(Int, ResVTy);
+ SmallVector<Value *, 2> AddOps;
+ AddOps.push_back(Ops[0]);
+ AddOps.push_back(MulRes);
+ V = UndefValue::get(ResVTy);
+ AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
+ Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
+ return CGF.Builder.CreateBitCast(AddRes, Ty);
+ }
+ case NEON::BI__builtin_neon_vfmas_lane_f32:
+ case NEON::BI__builtin_neon_vfmas_laneq_f32:
+ case NEON::BI__builtin_neon_vfmad_lane_f64:
+ case NEON::BI__builtin_neon_vfmad_laneq_f64: {
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ // Scalar Floating-point Multiply Extended
+ case NEON::BI__builtin_neon_vmulxs_f32:
+ case NEON::BI__builtin_neon_vmulxd_f64: {
+ Int = Intrinsic::aarch64_neon_vmulx;
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+ }
+ case NEON::BI__builtin_neon_vmul_n_f64: {
+ // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane
+ llvm::Type *VTy = GetNeonType(&CGF,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, false));
+ Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
+ llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
+ Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
+ return CGF.Builder.CreateBitCast(Result, VTy);
+ }
+ case NEON::BI__builtin_neon_vget_lane_i8:
+ case NEON::BI__builtin_neon_vget_lane_i16:
+ case NEON::BI__builtin_neon_vget_lane_i32:
+ case NEON::BI__builtin_neon_vget_lane_i64:
+ case NEON::BI__builtin_neon_vget_lane_f32:
+ case NEON::BI__builtin_neon_vget_lane_f64:
+ case NEON::BI__builtin_neon_vgetq_lane_i8:
+ case NEON::BI__builtin_neon_vgetq_lane_i16:
+ case NEON::BI__builtin_neon_vgetq_lane_i32:
+ case NEON::BI__builtin_neon_vgetq_lane_i64:
+ case NEON::BI__builtin_neon_vgetq_lane_f32:
+ case NEON::BI__builtin_neon_vgetq_lane_f64:
+ return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E);
+ case NEON::BI__builtin_neon_vset_lane_i8:
+ case NEON::BI__builtin_neon_vset_lane_i16:
+ case NEON::BI__builtin_neon_vset_lane_i32:
+ case NEON::BI__builtin_neon_vset_lane_i64:
+ case NEON::BI__builtin_neon_vset_lane_f32:
+ case NEON::BI__builtin_neon_vset_lane_f64:
+ case NEON::BI__builtin_neon_vsetq_lane_i8:
+ case NEON::BI__builtin_neon_vsetq_lane_i16:
+ case NEON::BI__builtin_neon_vsetq_lane_i32:
+ case NEON::BI__builtin_neon_vsetq_lane_i64:
+ case NEON::BI__builtin_neon_vsetq_lane_f32:
+ case NEON::BI__builtin_neon_vsetq_lane_f64:
+ return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
+
+ case NEON::BI__builtin_neon_vcled_s64:
+ case NEON::BI__builtin_neon_vcled_u64:
+ case NEON::BI__builtin_neon_vcles_f32:
+ case NEON::BI__builtin_neon_vcled_f64:
+ case NEON::BI__builtin_neon_vcltd_s64:
+ case NEON::BI__builtin_neon_vcltd_u64:
+ case NEON::BI__builtin_neon_vclts_f32:
+ case NEON::BI__builtin_neon_vcltd_f64:
+ case NEON::BI__builtin_neon_vcales_f32:
+ case NEON::BI__builtin_neon_vcaled_f64:
+ case NEON::BI__builtin_neon_vcalts_f32:
+ case NEON::BI__builtin_neon_vcaltd_f64:
+ // Only one direction of comparisons actually exist, cmle is actually a cmge
+ // with swapped operands. The table gives us the right intrinsic but we
+ // still need to do the swap.
+ std::swap(Ops[0], Ops[1]);
+ break;
+ case NEON::BI__builtin_neon_vceqzd_s64:
+ case NEON::BI__builtin_neon_vceqzd_u64:
+ case NEON::BI__builtin_neon_vcgezd_s64:
+ case NEON::BI__builtin_neon_vcgtzd_s64:
+ case NEON::BI__builtin_neon_vclezd_s64:
+ case NEON::BI__builtin_neon_vcltzd_s64:
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ break;
+ case NEON::BI__builtin_neon_vceqzs_f32:
+ case NEON::BI__builtin_neon_vceqzd_f64:
+ case NEON::BI__builtin_neon_vcgezs_f32:
+ case NEON::BI__builtin_neon_vcgezd_f64:
+ case NEON::BI__builtin_neon_vcgtzs_f32:
+ case NEON::BI__builtin_neon_vcgtzd_f64:
+ case NEON::BI__builtin_neon_vclezs_f32:
+ case NEON::BI__builtin_neon_vclezd_f64:
+ case NEON::BI__builtin_neon_vcltzs_f32:
+ case NEON::BI__builtin_neon_vcltzd_f64:
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+ break;
+ }
+
+
+ assert(Int && "Generic code assumes a valid intrinsic");
+
+ // Determine the type(s) of this overloaded AArch64 intrinsic.
+ const Expr *Arg = E->getArg(0);
+ llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
+ Function *F = CGF.LookupNeonLLVMIntrinsic(Int, IntTypes, ArgTy, E);
+
+ Value *Result = CGF.EmitNeonCall(F, Ops, s);
+ llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ // AArch64 intrinsic one-element vector type cast to
+ // scalar type expected by the builtin
+ return CGF.Builder.CreateBitCast(Result, ResultType, s);
+}
+
+Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
+ unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
+ const char *NameHint, unsigned Modifier, const CallExpr *E,
+ SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) {
// Get the last argument, which specifies the vector type.
- llvm::APSInt Result;
+ llvm::APSInt NeonTypeConst;
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
- if (!Arg->isIntegerConstantExpr(Result, getContext()))
+ if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
return 0;
// Determine the type of this overloaded NEON intrinsic.
- NeonTypeFlags Type(Result.getZExtValue());
+ NeonTypeFlags Type(NeonTypeConst.getZExtValue());
bool Usgn = Type.isUnsigned();
bool Quad = Type.isQuad();
@@ -1774,31 +2614,17 @@ Value *CodeGenFunction::EmitCommonNeonBu
if (!Ty)
return 0;
- unsigned Int;
+ unsigned Int = LLVMIntrinsic;
+ if ((Modifier & UnsignedAlts) && !Usgn)
+ Int = AltLLVMIntrinsic;
+
switch (BuiltinID) {
default: break;
case NEON::BI__builtin_neon_vabs_v:
case NEON::BI__builtin_neon_vabsq_v:
if (VTy->getElementType()->isFloatingPointTy())
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty), Ops,
- "vabs");
- case NEON::BI__builtin_neon_vaeseq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese),
- Ops, "aese");
- case NEON::BI__builtin_neon_vaesdq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd),
- Ops, "aesd");
- case NEON::BI__builtin_neon_vaesmcq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc),
- Ops, "aesmc");
- case NEON::BI__builtin_neon_vaesimcq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc),
- Ops, "aesimc");
- case NEON::BI__builtin_neon_vabd_v:
- case NEON::BI__builtin_neon_vabdq_v:
- Int = Usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
case NEON::BI__builtin_neon_vaddhn_v: {
llvm::VectorType *SrcTy =
llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -1817,65 +2643,28 @@ Value *CodeGenFunction::EmitCommonNeonBu
// %res = trunc <4 x i32> %high to <4 x i16>
return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
}
- case NEON::BI__builtin_neon_vbsl_v:
- case NEON::BI__builtin_neon_vbslq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
- Ops, "vbsl");
case NEON::BI__builtin_neon_vcale_v:
case NEON::BI__builtin_neon_vcaleq_v:
- std::swap(Ops[0], Ops[1]);
- case NEON::BI__builtin_neon_vcage_v:
- case NEON::BI__builtin_neon_vcageq_v: {
- llvm::Type *VecFlt = llvm::VectorType::get(
- VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
- VTy->getNumElements());
- llvm::Type *Tys[] = { VTy, VecFlt };
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacge, Tys);
- return EmitNeonCall(F, Ops, "vcage");
- }
case NEON::BI__builtin_neon_vcalt_v:
case NEON::BI__builtin_neon_vcaltq_v:
std::swap(Ops[0], Ops[1]);
+ case NEON::BI__builtin_neon_vcage_v:
+ case NEON::BI__builtin_neon_vcageq_v:
case NEON::BI__builtin_neon_vcagt_v:
case NEON::BI__builtin_neon_vcagtq_v: {
llvm::Type *VecFlt = llvm::VectorType::get(
VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
VTy->getNumElements());
llvm::Type *Tys[] = { VTy, VecFlt };
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgt, Tys);
- return EmitNeonCall(F, Ops, "vcagt");
- }
- case NEON::BI__builtin_neon_vcls_v:
- case NEON::BI__builtin_neon_vclsq_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
- return EmitNeonCall(F, Ops, "vcls");
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+ return EmitNeonCall(F, Ops, NameHint);
}
case NEON::BI__builtin_neon_vclz_v:
- case NEON::BI__builtin_neon_vclzq_v: {
- // Generate target-independent intrinsic; also need to add second argument
+ case NEON::BI__builtin_neon_vclzq_v:
+ // We generate target-independent intrinsic, which needs a second argument
// for whether or not clz of zero is undefined; on ARM it isn't.
- Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
- return EmitNeonCall(F, Ops, "vclz");
- }
- case NEON::BI__builtin_neon_vcnt_v:
- case NEON::BI__builtin_neon_vcntq_v: {
- // generate target-independent intrinsic
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
- return EmitNeonCall(F, Ops, "vctpop");
- }
- case NEON::BI__builtin_neon_vcvt_f16_v: {
- assert(Type.getEltType() == NeonTypeFlags::Float16 && !Quad &&
- "unexpected vcvt_f16_v builtin");
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
- return EmitNeonCall(F, Ops, "vcvt");
- }
- case NEON::BI__builtin_neon_vcvt_f32_f16: {
- assert(Type.getEltType() == NeonTypeFlags::Float16 && !Quad &&
- "unexpected vcvt_f32_f16 builtin");
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
- return EmitNeonCall(F, Ops, "vcvt");
- }
+ break;
case NEON::BI__builtin_neon_vcvt_f32_v:
case NEON::BI__builtin_neon_vcvtq_f32_v:
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -1884,11 +2673,14 @@ Value *CodeGenFunction::EmitCommonNeonBu
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
case NEON::BI__builtin_neon_vcvt_n_f32_v:
case NEON::BI__builtin_neon_vcvtq_n_f32_v: {
+ bool Double =
+ (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
llvm::Type *FloatTy =
- GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+ GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
+ : NeonTypeFlags::Float32,
+ false, Quad));
llvm::Type *Tys[2] = { FloatTy, Ty };
- Int = Usgn ? Intrinsic::arm_neon_vcvtfxu2fp
- : Intrinsic::arm_neon_vcvtfxs2fp;
+ Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
Function *F = CGM.getIntrinsic(Int, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
@@ -1907,9 +2699,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
: NeonTypeFlags::Float32,
false, Quad));
llvm::Type *Tys[2] = { Ty, FloatTy };
- Int = Usgn ? Intrinsic::arm_neon_vcvtfp2fxu
- : Intrinsic::arm_neon_vcvtfp2fxs;
- Function *F = CGM.getIntrinsic(Int, Tys);
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
return EmitNeonCall(F, Ops, "vcvt_n");
}
case NEON::BI__builtin_neon_vcvt_s32_v:
@@ -1937,17 +2727,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
case NEON::BI__builtin_neon_vcvtaq_s32_v:
case NEON::BI__builtin_neon_vcvtaq_s64_v:
case NEON::BI__builtin_neon_vcvtaq_u32_v:
- case NEON::BI__builtin_neon_vcvtaq_u64_v: {
- Int = Usgn ? Intrinsic::arm_neon_vcvtau : Intrinsic::arm_neon_vcvtas;
- bool Double =
- (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
- llvm::Type *InTy =
- GetNeonType(this,
- NeonTypeFlags(Double ? NeonTypeFlags::Float64
- : NeonTypeFlags::Float32, false, Quad));
- llvm::Type *Tys[2] = { Ty, InTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
- }
+ case NEON::BI__builtin_neon_vcvtaq_u64_v:
case NEON::BI__builtin_neon_vcvtn_s32_v:
case NEON::BI__builtin_neon_vcvtn_s64_v:
case NEON::BI__builtin_neon_vcvtn_u32_v:
@@ -1955,17 +2735,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
case NEON::BI__builtin_neon_vcvtnq_s32_v:
case NEON::BI__builtin_neon_vcvtnq_s64_v:
case NEON::BI__builtin_neon_vcvtnq_u32_v:
- case NEON::BI__builtin_neon_vcvtnq_u64_v: {
- Int = Usgn ? Intrinsic::arm_neon_vcvtnu : Intrinsic::arm_neon_vcvtns;
- bool Double =
- (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
- llvm::Type *InTy =
- GetNeonType(this,
- NeonTypeFlags(Double ? NeonTypeFlags::Float64
- : NeonTypeFlags::Float32, false, Quad));
- llvm::Type *Tys[2] = { Ty, InTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
- }
+ case NEON::BI__builtin_neon_vcvtnq_u64_v:
case NEON::BI__builtin_neon_vcvtp_s32_v:
case NEON::BI__builtin_neon_vcvtp_s64_v:
case NEON::BI__builtin_neon_vcvtp_u32_v:
@@ -1973,17 +2743,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
case NEON::BI__builtin_neon_vcvtpq_s32_v:
case NEON::BI__builtin_neon_vcvtpq_s64_v:
case NEON::BI__builtin_neon_vcvtpq_u32_v:
- case NEON::BI__builtin_neon_vcvtpq_u64_v: {
- Int = Usgn ? Intrinsic::arm_neon_vcvtpu : Intrinsic::arm_neon_vcvtps;
- bool Double =
- (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
- llvm::Type *InTy =
- GetNeonType(this,
- NeonTypeFlags(Double ? NeonTypeFlags::Float64
- : NeonTypeFlags::Float32, false, Quad));
- llvm::Type *Tys[2] = { Ty, InTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
- }
+ case NEON::BI__builtin_neon_vcvtpq_u64_v:
case NEON::BI__builtin_neon_vcvtm_s32_v:
case NEON::BI__builtin_neon_vcvtm_s64_v:
case NEON::BI__builtin_neon_vcvtm_u32_v:
@@ -1992,7 +2752,6 @@ Value *CodeGenFunction::EmitCommonNeonBu
case NEON::BI__builtin_neon_vcvtmq_s64_v:
case NEON::BI__builtin_neon_vcvtmq_u32_v:
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
- Int = Usgn ? Intrinsic::arm_neon_vcvtmu : Intrinsic::arm_neon_vcvtms;
bool Double =
(cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
llvm::Type *InTy =
@@ -2000,7 +2759,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
NeonTypeFlags(Double ? NeonTypeFlags::Float64
: NeonTypeFlags::Float32, false, Quad));
llvm::Type *Tys[2] = { Ty, InTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
}
case NEON::BI__builtin_neon_vext_v:
case NEON::BI__builtin_neon_vextq_v: {
@@ -2024,39 +2783,18 @@ Value *CodeGenFunction::EmitCommonNeonBu
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
}
- case NEON::BI__builtin_neon_vhadd_v:
- case NEON::BI__builtin_neon_vhaddq_v:
- Int = Usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
- case NEON::BI__builtin_neon_vhsub_v:
- case NEON::BI__builtin_neon_vhsubq_v:
- Int = Usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v:
Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
- Ops, "vld1");
+ return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
case NEON::BI__builtin_neon_vld2_v:
- case NEON::BI__builtin_neon_vld2q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
+ case NEON::BI__builtin_neon_vld2q_v:
case NEON::BI__builtin_neon_vld3_v:
- case NEON::BI__builtin_neon_vld3q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
+ case NEON::BI__builtin_neon_vld3q_v:
case NEON::BI__builtin_neon_vld4_v:
case NEON::BI__builtin_neon_vld4q_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
- Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+ Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
@@ -2073,49 +2811,20 @@ Value *CodeGenFunction::EmitCommonNeonBu
return EmitNeonSplat(Ops[0], CI);
}
case NEON::BI__builtin_neon_vld2_lane_v:
- case NEON::BI__builtin_neon_vld2q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
+ case NEON::BI__builtin_neon_vld2q_lane_v:
case NEON::BI__builtin_neon_vld3_lane_v:
- case NEON::BI__builtin_neon_vld3q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
- Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- return Builder.CreateStore(Ops[1], Ops[0]);
- }
+ case NEON::BI__builtin_neon_vld3q_lane_v:
case NEON::BI__builtin_neon_vld4_lane_v:
case NEON::BI__builtin_neon_vld4q_lane_v: {
- Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
- Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
- Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
- Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
- Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
+ Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+ for (unsigned I = 2; I < Ops.size() - 1; ++I)
+ Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
Ops.push_back(Align);
- Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
+ Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateStore(Ops[1], Ops[0]);
}
- case NEON::BI__builtin_neon_vmax_v:
- case NEON::BI__builtin_neon_vmaxq_v:
- Int = Usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
- case NEON::BI__builtin_neon_vmin_v:
- case NEON::BI__builtin_neon_vminq_v:
- Int = Usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
case NEON::BI__builtin_neon_vmovl_v: {
llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
@@ -2128,11 +2837,6 @@ Value *CodeGenFunction::EmitCommonNeonBu
Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
}
- case NEON::BI__builtin_neon_vmul_v:
- case NEON::BI__builtin_neon_vmulq_v:
- assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
- Ops, "vmul");
case NEON::BI__builtin_neon_vmull_v:
// FIXME: the integer vmull operations could be emitted in terms of pure
// LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
@@ -2144,7 +2848,6 @@ Value *CodeGenFunction::EmitCommonNeonBu
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
case NEON::BI__builtin_neon_vpadal_v:
case NEON::BI__builtin_neon_vpadalq_v: {
- Int = Usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy =
@@ -2152,14 +2855,10 @@ Value *CodeGenFunction::EmitCommonNeonBu
llvm::Type *NarrowTy =
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
llvm::Type *Tys[2] = { Ty, NarrowTy };
- return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
}
- case NEON::BI__builtin_neon_vpadd_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
- Ops, "vpadd");
case NEON::BI__builtin_neon_vpaddl_v:
case NEON::BI__builtin_neon_vpaddlq_v: {
- Int = Usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
// The source operand type has twice as many elements of half the size.
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
@@ -2168,128 +2867,29 @@ Value *CodeGenFunction::EmitCommonNeonBu
llvm::Type *Tys[2] = { Ty, NarrowTy };
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
}
- case NEON::BI__builtin_neon_vpmax_v:
- Int = Usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
- case NEON::BI__builtin_neon_vpmin_v:
- Int = Usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
- case NEON::BI__builtin_neon_vqabs_v:
- case NEON::BI__builtin_neon_vqabsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
- Ops, "vqabs");
- case NEON::BI__builtin_neon_vqadd_v:
- case NEON::BI__builtin_neon_vqaddq_v:
- Int = Usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
- case NEON::BI__builtin_neon_vqmovn_v:
- Int = Usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
- case NEON::BI__builtin_neon_vqmovun_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
- Ops, "vqdmull");
- case NEON::BI__builtin_neon_vqneg_v:
- case NEON::BI__builtin_neon_vqnegq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
- Ops, "vqneg");
- case NEON::BI__builtin_neon_vqsub_v:
- case NEON::BI__builtin_neon_vqsubq_v:
- Int = Usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
- case NEON::BI__builtin_neon_vqdmlal_v: {
+ case NEON::BI__builtin_neon_vqdmlal_v:
+ case NEON::BI__builtin_neon_vqdmlsl_v: {
SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
- Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
+ Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty),
MulOps, "vqdmlal");
- SmallVector<Value *, 2> AddOps;
- AddOps.push_back(Ops[0]);
- AddOps.push_back(Mul);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
- AddOps, "vqdmlal");
+ SmallVector<Value *, 2> AccumOps;
+ AccumOps.push_back(Ops[0]);
+ AccumOps.push_back(Mul);
+ return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty),
+ AccumOps, NameHint);
}
- case NEON::BI__builtin_neon_vqdmlsl_v: {
- SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
- Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
- MulOps, "vqdmlsl");
-
- SmallVector<Value *, 2> SubOps;
- SubOps.push_back(Ops[0]);
- SubOps.push_back(Mul);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
- SubOps, "vqdmlsl");
- }
- case NEON::BI__builtin_neon_vqdmulh_v:
- case NEON::BI__builtin_neon_vqdmulhq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
- Ops, "vqdmulh");
- case NEON::BI__builtin_neon_vqdmull_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
- Ops, "vqdmull");
case NEON::BI__builtin_neon_vqshl_n_v:
case NEON::BI__builtin_neon_vqshlq_n_v:
- Int = Usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1, false);
- case NEON::BI__builtin_neon_vqrdmulh_v:
- case NEON::BI__builtin_neon_vqrdmulhq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
- Ops, "vqrdmulh");
- case NEON::BI__builtin_neon_vqrshl_v:
- case NEON::BI__builtin_neon_vqrshlq_v:
- Int = Usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
- case NEON::BI__builtin_neon_vqshl_v:
- case NEON::BI__builtin_neon_vqshlq_v:
- Int = Usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
- case NEON::BI__builtin_neon_vraddhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
- Ops, "vraddhn");
case NEON::BI__builtin_neon_vrecpe_v:
case NEON::BI__builtin_neon_vrecpeq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
- Ops, "vrecpe");
- case NEON::BI__builtin_neon_vrecps_v:
- case NEON::BI__builtin_neon_vrecpsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
- Ops, "vrecps");
- case NEON::BI__builtin_neon_vrhadd_v:
- case NEON::BI__builtin_neon_vrhaddq_v:
- Int = Usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
- case NEON::BI__builtin_neon_vrshl_v:
- case NEON::BI__builtin_neon_vrshlq_v:
- Int = Usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
case NEON::BI__builtin_neon_vrsqrte_v:
case NEON::BI__builtin_neon_vrsqrteq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
- Ops, "vrsqrte");
- case NEON::BI__builtin_neon_vrsqrts_v:
- case NEON::BI__builtin_neon_vrsqrtsq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
- Ops, "vrsqrts");
- case NEON::BI__builtin_neon_vrsubhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
- Ops, "vrsubhn");
- case NEON::BI__builtin_neon_vsha1su1q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1),
- Ops, "sha1su1");
- case NEON::BI__builtin_neon_vsha256su0q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0),
- Ops, "sha256su0");
- case NEON::BI__builtin_neon_vsha1su0q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0),
- Ops, "sha1su0");
- case NEON::BI__builtin_neon_vsha256hq_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h),
- Ops, "sha256h");
- case NEON::BI__builtin_neon_vsha256h2q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2),
- Ops, "sha256h2");
- case NEON::BI__builtin_neon_vsha256su1q_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1),
- Ops, "sha256su1");
+ Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
+
case NEON::BI__builtin_neon_vshl_n_v:
case NEON::BI__builtin_neon_vshlq_n_v:
Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
@@ -2305,10 +2905,6 @@ Value *CodeGenFunction::EmitCommonNeonBu
Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
}
- case NEON::BI__builtin_neon_vshl_v:
- case NEON::BI__builtin_neon_vshlq_v:
- Int = Usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
case NEON::BI__builtin_neon_vshrn_n_v: {
llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
@@ -2324,39 +2920,20 @@ Value *CodeGenFunction::EmitCommonNeonBu
return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
case NEON::BI__builtin_neon_vst1_v:
case NEON::BI__builtin_neon_vst1q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst2_v:
case NEON::BI__builtin_neon_vst2q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst3_v:
case NEON::BI__builtin_neon_vst3q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst4_v:
case NEON::BI__builtin_neon_vst4q_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst2_lane_v:
case NEON::BI__builtin_neon_vst2q_lane_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst3_lane_v:
case NEON::BI__builtin_neon_vst3q_lane_v:
- Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
- Ops, "");
case NEON::BI__builtin_neon_vst4_lane_v:
case NEON::BI__builtin_neon_vst4q_lane_v:
Ops.push_back(Align);
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
- Ops, "");
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
case NEON::BI__builtin_neon_vsubhn_v: {
llvm::VectorType *SrcTy =
llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -2445,620 +3022,16 @@ Value *CodeGenFunction::EmitCommonNeonBu
}
}
- return 0;
-}
-
-enum {
- AddRetType = (1 << 0),
- Add1ArgType = (1 << 1),
- Add2ArgTypes = (1 << 2),
-
- VectorizeRetType = (1 << 3),
- VectorizeArgTypes = (1 << 4),
-
- InventFloatType = (1 << 5),
-
- Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
- VectorRet = AddRetType | VectorizeRetType,
- VectorRetGetArgs01 =
- AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
- FpCmpzModifiers =
- AddRetType | VectorizeRetType | Add1ArgType | InventFloatType,
-};
-
- struct NeonSISDIntrinsicInfo {
- unsigned BuiltinID;
- unsigned LLVMIntrinsic;
- const char *NameHint;
- unsigned TypeModifier;
-
- bool operator<(unsigned RHSBuiltinID) const {
- return BuiltinID < RHSBuiltinID;
- }
-};
-
-#define SISDMAP0(NameBase, Type) \
- { NEON::BI__builtin_neon_ ## NameBase ## _ ## Type, 0, #NameBase, 0 }
-
-#define SISDMAP1(NameBase, Type, LLVMIntrinsic, TypeModifier) \
- { NEON:: BI__builtin_neon_ ## NameBase ## _ ## Type, \
- Intrinsic::LLVMIntrinsic, #NameBase, TypeModifier }
-
-static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
- SISDMAP1(vabdd, f64, aarch64_neon_vabd, AddRetType),
- SISDMAP1(vabds, f32, aarch64_neon_vabd, AddRetType),
- SISDMAP1(vabsd, s64, aarch64_neon_vabs, 0),
- SISDMAP1(vaddd, s64, aarch64_neon_vaddds, 0),
- SISDMAP1(vaddd, u64, aarch64_neon_vadddu, 0),
- SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
- SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
- SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
- SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
- SISDMAP1(vceqd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
- SISDMAP1(vceqd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
- SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
- SISDMAP1(vceqzd, f64, aarch64_neon_fceq, FpCmpzModifiers),
- SISDMAP1(vceqzd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
- SISDMAP1(vceqzd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
- SISDMAP1(vceqzs, f32, aarch64_neon_fceq, FpCmpzModifiers),
- SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- SISDMAP1(vcged, s64, aarch64_neon_vcge, VectorRetGetArgs01),
- SISDMAP1(vcged, u64, aarch64_neon_vchs, VectorRetGetArgs01),
- SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- SISDMAP1(vcgezd, f64, aarch64_neon_fcge, FpCmpzModifiers),
- SISDMAP1(vcgezd, s64, aarch64_neon_vcge, VectorRetGetArgs01),
- SISDMAP1(vcgezs, f32, aarch64_neon_fcge, FpCmpzModifiers),
- SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcgtd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- SISDMAP1(vcgtd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
- SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcgtzd, f64, aarch64_neon_fcgt, FpCmpzModifiers),
- SISDMAP1(vcgtzd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- SISDMAP1(vcgtzs, f32, aarch64_neon_fcgt, FpCmpzModifiers),
- SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- SISDMAP1(vcled, s64, aarch64_neon_vcge, VectorRetGetArgs01),
- SISDMAP1(vcled, u64, aarch64_neon_vchs, VectorRetGetArgs01),
- SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
- SISDMAP1(vclezd, f64, aarch64_neon_fclez, FpCmpzModifiers),
- SISDMAP1(vclezd, s64, aarch64_neon_vclez, VectorRetGetArgs01),
- SISDMAP1(vclezs, f32, aarch64_neon_fclez, FpCmpzModifiers),
- SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcltd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
- SISDMAP1(vcltd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
- SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
- SISDMAP1(vcltzd, f64, aarch64_neon_fcltz, FpCmpzModifiers),
- SISDMAP1(vcltzd, s64, aarch64_neon_vcltz, VectorRetGetArgs01),
- SISDMAP1(vcltzs, f32, aarch64_neon_fcltz, FpCmpzModifiers),
- SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
- SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
- SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
- SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
- SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
- SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
- SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
- SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
- SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
- SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
- SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
- SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
- SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
- SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
- SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
- SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
- SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
- SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
- SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
- SISDMAP1(vcvtxd_f32, f64, aarch64_neon_fcvtxn, 0),
- SISDMAP0(vdupb_lane, i8),
- SISDMAP0(vdupb_laneq, i8),
- SISDMAP0(vdupd_lane, f64),
- SISDMAP0(vdupd_lane, i64),
- SISDMAP0(vdupd_laneq, f64),
- SISDMAP0(vdupd_laneq, i64),
- SISDMAP0(vduph_lane, i16),
- SISDMAP0(vduph_laneq, i16),
- SISDMAP0(vdups_lane, f32),
- SISDMAP0(vdups_lane, i32),
- SISDMAP0(vdups_laneq, f32),
- SISDMAP0(vdups_laneq, i32),
- SISDMAP0(vfmad_lane, f64),
- SISDMAP0(vfmad_laneq, f64),
- SISDMAP0(vfmas_lane, f32),
- SISDMAP0(vfmas_laneq, f32),
- SISDMAP0(vget_lane, f32),
- SISDMAP0(vget_lane, f64),
- SISDMAP0(vget_lane, i16),
- SISDMAP0(vget_lane, i32),
- SISDMAP0(vget_lane, i64),
- SISDMAP0(vget_lane, i8),
- SISDMAP0(vgetq_lane, f32),
- SISDMAP0(vgetq_lane, f64),
- SISDMAP0(vgetq_lane, i16),
- SISDMAP0(vgetq_lane, i32),
- SISDMAP0(vgetq_lane, i64),
- SISDMAP0(vgetq_lane, i8),
- SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- SISDMAP1(vmaxnmvq, f32, aarch64_neon_vmaxnmv, 0),
- SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, f32, aarch64_neon_vmaxv, 0),
- SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
- SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- SISDMAP1(vminnmvq, f32, aarch64_neon_vminnmv, 0),
- SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- SISDMAP1(vminv, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, f32, aarch64_neon_vminv, 0),
- SISDMAP1(vminvq, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
- SISDMAP0(vmul_n, f64),
- SISDMAP1(vmull, p64, aarch64_neon_vmull_p64, 0),
- SISDMAP0(vmulxd, f64),
- SISDMAP0(vmulxs, f32),
- SISDMAP1(vnegd, s64, aarch64_neon_vneg, 0),
- SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- SISDMAP1(vpaddd, s64, aarch64_neon_vpadd, 0),
- SISDMAP1(vpaddd, u64, aarch64_neon_vpadd, 0),
- SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
- SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
- SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
- SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
- SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- SISDMAP1(vpmins, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
- SISDMAP1(vqabsb, s8, arm_neon_vqabs, VectorRet),
- SISDMAP1(vqabsd, s64, arm_neon_vqabs, VectorRet),
- SISDMAP1(vqabsh, s16, arm_neon_vqabs, VectorRet),
- SISDMAP1(vqabss, s32, arm_neon_vqabs, VectorRet),
- SISDMAP1(vqaddb, s8, arm_neon_vqadds, VectorRet),
- SISDMAP1(vqaddb, u8, arm_neon_vqaddu, VectorRet),
- SISDMAP1(vqaddd, s64, arm_neon_vqadds, VectorRet),
- SISDMAP1(vqaddd, u64, arm_neon_vqaddu, VectorRet),
- SISDMAP1(vqaddh, s16, arm_neon_vqadds, VectorRet),
- SISDMAP1(vqaddh, u16, arm_neon_vqaddu, VectorRet),
- SISDMAP1(vqadds, s32, arm_neon_vqadds, VectorRet),
- SISDMAP1(vqadds, u32, arm_neon_vqaddu, VectorRet),
- SISDMAP0(vqdmlalh_lane, s16),
- SISDMAP0(vqdmlalh_laneq, s16),
- SISDMAP1(vqdmlalh, s16, aarch64_neon_vqdmlal, VectorRet),
- SISDMAP0(vqdmlals_lane, s32),
- SISDMAP0(vqdmlals_laneq, s32),
- SISDMAP1(vqdmlals, s32, aarch64_neon_vqdmlal, VectorRet),
- SISDMAP0(vqdmlslh_lane, s16),
- SISDMAP0(vqdmlslh_laneq, s16),
- SISDMAP1(vqdmlslh, s16, aarch64_neon_vqdmlsl, VectorRet),
- SISDMAP0(vqdmlsls_lane, s32),
- SISDMAP0(vqdmlsls_laneq, s32),
- SISDMAP1(vqdmlsls, s32, aarch64_neon_vqdmlsl, VectorRet),
- SISDMAP1(vqdmulhh, s16, arm_neon_vqdmulh, VectorRet),
- SISDMAP1(vqdmulhs, s32, arm_neon_vqdmulh, VectorRet),
- SISDMAP1(vqdmullh, s16, arm_neon_vqdmull, VectorRet),
- SISDMAP1(vqdmulls, s32, arm_neon_vqdmull, VectorRet),
- SISDMAP1(vqmovnd, s64, arm_neon_vqmovns, VectorRet),
- SISDMAP1(vqmovnd, u64, arm_neon_vqmovnu, VectorRet),
- SISDMAP1(vqmovnh, s16, arm_neon_vqmovns, VectorRet),
- SISDMAP1(vqmovnh, u16, arm_neon_vqmovnu, VectorRet),
- SISDMAP1(vqmovns, s32, arm_neon_vqmovns, VectorRet),
- SISDMAP1(vqmovns, u32, arm_neon_vqmovnu, VectorRet),
- SISDMAP1(vqmovund, s64, arm_neon_vqmovnsu, VectorRet),
- SISDMAP1(vqmovunh, s16, arm_neon_vqmovnsu, VectorRet),
- SISDMAP1(vqmovuns, s32, arm_neon_vqmovnsu, VectorRet),
- SISDMAP1(vqnegb, s8, arm_neon_vqneg, VectorRet),
- SISDMAP1(vqnegd, s64, arm_neon_vqneg, VectorRet),
- SISDMAP1(vqnegh, s16, arm_neon_vqneg, VectorRet),
- SISDMAP1(vqnegs, s32, arm_neon_vqneg, VectorRet),
- SISDMAP1(vqrdmulhh, s16, arm_neon_vqrdmulh, VectorRet),
- SISDMAP1(vqrdmulhs, s32, arm_neon_vqrdmulh, VectorRet),
- SISDMAP1(vqrshlb, s8, aarch64_neon_vqrshls, VectorRet),
- SISDMAP1(vqrshlb, u8, aarch64_neon_vqrshlu, VectorRet),
- SISDMAP1(vqrshld, s64, aarch64_neon_vqrshls, VectorRet),
- SISDMAP1(vqrshld, u64, aarch64_neon_vqrshlu, VectorRet),
- SISDMAP1(vqrshlh, s16, aarch64_neon_vqrshls, VectorRet),
- SISDMAP1(vqrshlh, u16, aarch64_neon_vqrshlu, VectorRet),
- SISDMAP1(vqrshls, s32, aarch64_neon_vqrshls, VectorRet),
- SISDMAP1(vqrshls, u32, aarch64_neon_vqrshlu, VectorRet),
- SISDMAP1(vqrshrnd_n, s64, aarch64_neon_vsqrshrn, VectorRet),
- SISDMAP1(vqrshrnd_n, u64, aarch64_neon_vuqrshrn, VectorRet),
- SISDMAP1(vqrshrnh_n, s16, aarch64_neon_vsqrshrn, VectorRet),
- SISDMAP1(vqrshrnh_n, u16, aarch64_neon_vuqrshrn, VectorRet),
- SISDMAP1(vqrshrns_n, s32, aarch64_neon_vsqrshrn, VectorRet),
- SISDMAP1(vqrshrns_n, u32, aarch64_neon_vuqrshrn, VectorRet),
- SISDMAP1(vqrshrund_n, s64, aarch64_neon_vsqrshrun, VectorRet),
- SISDMAP1(vqrshrunh_n, s16, aarch64_neon_vsqrshrun, VectorRet),
- SISDMAP1(vqrshruns_n, s32, aarch64_neon_vsqrshrun, VectorRet),
- SISDMAP1(vqshlb_n, s8, aarch64_neon_vqshls_n, VectorRet),
- SISDMAP1(vqshlb_n, u8, aarch64_neon_vqshlu_n, VectorRet),
- SISDMAP1(vqshlb, s8, aarch64_neon_vqshls, VectorRet),
- SISDMAP1(vqshlb, u8, aarch64_neon_vqshlu, VectorRet),
- SISDMAP1(vqshld_n, s64, aarch64_neon_vqshls_n, VectorRet),
- SISDMAP1(vqshld_n, u64, aarch64_neon_vqshlu_n, VectorRet),
- SISDMAP1(vqshld, s64, aarch64_neon_vqshls, VectorRet),
- SISDMAP1(vqshld, u64, aarch64_neon_vqshlu, VectorRet),
- SISDMAP1(vqshlh_n, s16, aarch64_neon_vqshls_n, VectorRet),
- SISDMAP1(vqshlh_n, u16, aarch64_neon_vqshlu_n, VectorRet),
- SISDMAP1(vqshlh, s16, aarch64_neon_vqshls, VectorRet),
- SISDMAP1(vqshlh, u16, aarch64_neon_vqshlu, VectorRet),
- SISDMAP1(vqshls_n, s32, aarch64_neon_vqshls_n, VectorRet),
- SISDMAP1(vqshls_n, u32, aarch64_neon_vqshlu_n, VectorRet),
- SISDMAP1(vqshls, s32, aarch64_neon_vqshls, VectorRet),
- SISDMAP1(vqshls, u32, aarch64_neon_vqshlu, VectorRet),
- SISDMAP1(vqshlub_n, s8, aarch64_neon_vsqshlu, VectorRet),
- SISDMAP1(vqshlud_n, s64, aarch64_neon_vsqshlu, VectorRet),
- SISDMAP1(vqshluh_n, s16, aarch64_neon_vsqshlu, VectorRet),
- SISDMAP1(vqshlus_n, s32, aarch64_neon_vsqshlu, VectorRet),
- SISDMAP1(vqshrnd_n, s64, aarch64_neon_vsqshrn, VectorRet),
- SISDMAP1(vqshrnd_n, u64, aarch64_neon_vuqshrn, VectorRet),
- SISDMAP1(vqshrnh_n, s16, aarch64_neon_vsqshrn, VectorRet),
- SISDMAP1(vqshrnh_n, u16, aarch64_neon_vuqshrn, VectorRet),
- SISDMAP1(vqshrns_n, s32, aarch64_neon_vsqshrn, VectorRet),
- SISDMAP1(vqshrns_n, u32, aarch64_neon_vuqshrn, VectorRet),
- SISDMAP1(vqshrund_n, s64, aarch64_neon_vsqshrun, VectorRet),
- SISDMAP1(vqshrunh_n, s16, aarch64_neon_vsqshrun, VectorRet),
- SISDMAP1(vqshruns_n, s32, aarch64_neon_vsqshrun, VectorRet),
- SISDMAP1(vqsubb, s8, arm_neon_vqsubs, VectorRet),
- SISDMAP1(vqsubb, u8, arm_neon_vqsubu, VectorRet),
- SISDMAP1(vqsubd, s64, arm_neon_vqsubs, VectorRet),
- SISDMAP1(vqsubd, u64, arm_neon_vqsubu, VectorRet),
- SISDMAP1(vqsubh, s16, arm_neon_vqsubs, VectorRet),
- SISDMAP1(vqsubh, u16, arm_neon_vqsubu, VectorRet),
- SISDMAP1(vqsubs, s32, arm_neon_vqsubs, VectorRet),
- SISDMAP1(vqsubs, u32, arm_neon_vqsubu, VectorRet),
- SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, AddRetType),
- SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, AddRetType),
- SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, AddRetType),
- SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, AddRetType),
- SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, AddRetType),
- SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, AddRetType),
- SISDMAP1(vrshld, s64, aarch64_neon_vrshlds, 0),
- SISDMAP1(vrshld, u64, aarch64_neon_vrshldu, 0),
- SISDMAP1(vrshrd_n, s64, aarch64_neon_vsrshr, VectorRet),
- SISDMAP1(vrshrd_n, u64, aarch64_neon_vurshr, VectorRet),
- SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, AddRetType),
- SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, AddRetType),
- SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, AddRetType),
- SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, AddRetType),
- SISDMAP1(vrsrad_n, s64, aarch64_neon_vrsrads_n, 0),
- SISDMAP1(vrsrad_n, u64, aarch64_neon_vrsradu_n, 0),
- SISDMAP0(vset_lane, f32),
- SISDMAP0(vset_lane, f64),
- SISDMAP0(vset_lane, i16),
- SISDMAP0(vset_lane, i32),
- SISDMAP0(vset_lane, i64),
- SISDMAP0(vset_lane, i8),
- SISDMAP0(vsetq_lane, f32),
- SISDMAP0(vsetq_lane, f64),
- SISDMAP0(vsetq_lane, i16),
- SISDMAP0(vsetq_lane, i32),
- SISDMAP0(vsetq_lane, i64),
- SISDMAP0(vsetq_lane, i8),
- SISDMAP1(vsha1cq, u32, arm_neon_sha1c, 0),
- SISDMAP1(vsha1h, u32, arm_neon_sha1h, 0),
- SISDMAP1(vsha1mq, u32, arm_neon_sha1m, 0),
- SISDMAP1(vsha1pq, u32, arm_neon_sha1p, 0),
- SISDMAP1(vshld_n, s64, aarch64_neon_vshld_n, 0),
- SISDMAP1(vshld_n, u64, aarch64_neon_vshld_n, 0),
- SISDMAP1(vshld, s64, aarch64_neon_vshlds, 0),
- SISDMAP1(vshld, u64, aarch64_neon_vshldu, 0),
- SISDMAP1(vshrd_n, s64, aarch64_neon_vshrds_n, 0),
- SISDMAP1(vshrd_n, u64, aarch64_neon_vshrdu_n, 0),
- SISDMAP1(vslid_n, s64, aarch64_neon_vsli, VectorRet),
- SISDMAP1(vslid_n, u64, aarch64_neon_vsli, VectorRet),
- SISDMAP1(vsqaddb, u8, aarch64_neon_vsqadd, VectorRet),
- SISDMAP1(vsqaddd, u64, aarch64_neon_vsqadd, VectorRet),
- SISDMAP1(vsqaddh, u16, aarch64_neon_vsqadd, VectorRet),
- SISDMAP1(vsqadds, u32, aarch64_neon_vsqadd, VectorRet),
- SISDMAP1(vsrad_n, s64, aarch64_neon_vsrads_n, 0),
- SISDMAP1(vsrad_n, u64, aarch64_neon_vsradu_n, 0),
- SISDMAP1(vsrid_n, s64, aarch64_neon_vsri, VectorRet),
- SISDMAP1(vsrid_n, u64, aarch64_neon_vsri, VectorRet),
- SISDMAP1(vsubd, s64, aarch64_neon_vsubds, 0),
- SISDMAP1(vsubd, u64, aarch64_neon_vsubdu, 0),
- SISDMAP1(vtstd, s64, aarch64_neon_vtstd, VectorRetGetArgs01),
- SISDMAP1(vtstd, u64, aarch64_neon_vtstd, VectorRetGetArgs01),
- SISDMAP1(vuqaddb, s8, aarch64_neon_vuqadd, VectorRet),
- SISDMAP1(vuqaddd, s64, aarch64_neon_vuqadd, VectorRet),
- SISDMAP1(vuqaddh, s16, aarch64_neon_vuqadd, VectorRet),
- SISDMAP1(vuqadds, s32, aarch64_neon_vuqadd, VectorRet),
-};
-
-#undef SISDMAP0
-#undef SISDMAP1
-
-#ifndef NDEBUG
-static bool AArch64SISDIntrinsicInfoProvenSorted = false;
-#endif
-
-Function *CodeGenFunction::LookupNeonIntrinsic(unsigned IntrinsicID,
- unsigned Modifier,
- llvm::Type *ArgType,
- const CallExpr *E) {
- // Return type.
- SmallVector<llvm::Type *, 3> Tys;
- if (Modifier & AddRetType) {
- llvm::Type *Ty = ConvertType(E->getCallReturnType());
- if (Modifier & VectorizeRetType)
- Ty = llvm::VectorType::get(Ty, 1);
-
- Tys.push_back(Ty);
- }
-
- // Arguments.
- if (Modifier & VectorizeArgTypes)
- ArgType = llvm::VectorType::get(ArgType, 1);
-
- if (Modifier & (Add1ArgType | Add2ArgTypes))
- Tys.push_back(ArgType);
-
- if (Modifier & Add2ArgTypes)
- Tys.push_back(ArgType);
-
- if (Modifier & InventFloatType)
- Tys.push_back(FloatTy);
-
- return CGM.getIntrinsic(IntrinsicID, Tys);
-}
-
-static Value *
-EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
- const NeonSISDIntrinsicInfo &SISDInfo,
- const CallExpr *E) {
- unsigned BuiltinID = SISDInfo.BuiltinID;
- unsigned int Int = SISDInfo.LLVMIntrinsic;
- unsigned IntTypes = SISDInfo.TypeModifier;
- const char *s = SISDInfo.NameHint;
-
- SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
- }
-
- // AArch64 scalar builtins are not overloaded, they do not have an extra
- // argument that specifies the vector type, need to handle each case.
- switch (BuiltinID) {
- default: break;
- case NEON::BI__builtin_neon_vdups_lane_f32:
- case NEON::BI__builtin_neon_vdupd_lane_f64:
- case NEON::BI__builtin_neon_vdups_laneq_f32:
- case NEON::BI__builtin_neon_vdupd_laneq_f64: {
- return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
- }
- case NEON::BI__builtin_neon_vdupb_lane_i8:
- case NEON::BI__builtin_neon_vduph_lane_i16:
- case NEON::BI__builtin_neon_vdups_lane_i32:
- case NEON::BI__builtin_neon_vdupd_lane_i64:
- case NEON::BI__builtin_neon_vdupb_laneq_i8:
- case NEON::BI__builtin_neon_vduph_laneq_i16:
- case NEON::BI__builtin_neon_vdups_laneq_i32:
- case NEON::BI__builtin_neon_vdupd_laneq_i64: {
- // The backend treats Neon scalar types as v1ix types
- // So we want to dup lane from any vector to v1ix vector
- // with shufflevector
- s = "vdup_lane";
- Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
- Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- // AArch64 intrinsic one-element vector type cast to
- // scalar type expected by the builtin
- return CGF.Builder.CreateBitCast(Result, Ty, s);
- }
- case NEON::BI__builtin_neon_vqdmlalh_lane_s16 :
- case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 :
- case NEON::BI__builtin_neon_vqdmlals_lane_s32 :
- case NEON::BI__builtin_neon_vqdmlals_laneq_s32 :
- case NEON::BI__builtin_neon_vqdmlslh_lane_s16 :
- case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 :
- case NEON::BI__builtin_neon_vqdmlsls_lane_s32 :
- case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : {
- Int = Intrinsic::arm_neon_vqadds;
- if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 ||
- BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) {
- Int = Intrinsic::arm_neon_vqsubs;
- }
- // create vqdmull call with b * c[i]
- llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
- llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
- Ty = CGF.ConvertType(E->getArg(0)->getType());
- llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
- Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
- Value *V = UndefValue::get(OpVTy);
- llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
- SmallVector<Value *, 2> MulOps;
- MulOps.push_back(Ops[1]);
- MulOps.push_back(Ops[2]);
- MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
- MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
- MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
- Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
- // create vqadds call with a +/- vqdmull result
- F = CGF.CGM.getIntrinsic(Int, ResVTy);
- SmallVector<Value *, 2> AddOps;
- AddOps.push_back(Ops[0]);
- AddOps.push_back(MulRes);
- V = UndefValue::get(ResVTy);
- AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
- Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
- return CGF.Builder.CreateBitCast(AddRes, Ty);
- }
- case NEON::BI__builtin_neon_vfmas_lane_f32:
- case NEON::BI__builtin_neon_vfmas_laneq_f32:
- case NEON::BI__builtin_neon_vfmad_lane_f64:
- case NEON::BI__builtin_neon_vfmad_laneq_f64: {
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
- Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
- return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
- }
- // Scalar Floating-point Multiply Extended
- case NEON::BI__builtin_neon_vmulxs_f32:
- case NEON::BI__builtin_neon_vmulxd_f64: {
- Int = Intrinsic::aarch64_neon_vmulx;
- llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
- return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
- }
- case NEON::BI__builtin_neon_vmul_n_f64: {
- // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane
- llvm::Type *VTy = GetNeonType(&CGF,
- NeonTypeFlags(NeonTypeFlags::Float64, false, false));
- Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
- llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
- Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
- Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
- return CGF.Builder.CreateBitCast(Result, VTy);
- }
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vget_lane_f32:
- case NEON::BI__builtin_neon_vget_lane_f64:
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vgetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E);
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vset_lane_f64:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_f64:
- return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
-
- case NEON::BI__builtin_neon_vcled_s64:
- case NEON::BI__builtin_neon_vcled_u64:
- case NEON::BI__builtin_neon_vcles_f32:
- case NEON::BI__builtin_neon_vcled_f64:
- case NEON::BI__builtin_neon_vcltd_s64:
- case NEON::BI__builtin_neon_vcltd_u64:
- case NEON::BI__builtin_neon_vclts_f32:
- case NEON::BI__builtin_neon_vcltd_f64:
- case NEON::BI__builtin_neon_vcales_f32:
- case NEON::BI__builtin_neon_vcaled_f64:
- case NEON::BI__builtin_neon_vcalts_f32:
- case NEON::BI__builtin_neon_vcaltd_f64:
- // Only one direction of comparisons actually exist, cmle is actually a cmge
- // with swapped operands. The table gives us the right intrinsic but we
- // still need to do the swap.
- std::swap(Ops[0], Ops[1]);
- break;
- case NEON::BI__builtin_neon_vceqzd_s64:
- case NEON::BI__builtin_neon_vceqzd_u64:
- case NEON::BI__builtin_neon_vcgezd_s64:
- case NEON::BI__builtin_neon_vcgtzd_s64:
- case NEON::BI__builtin_neon_vclezd_s64:
- case NEON::BI__builtin_neon_vcltzd_s64:
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- break;
- case NEON::BI__builtin_neon_vceqzs_f32:
- case NEON::BI__builtin_neon_vceqzd_f64:
- case NEON::BI__builtin_neon_vcgezs_f32:
- case NEON::BI__builtin_neon_vcgezd_f64:
- case NEON::BI__builtin_neon_vcgtzs_f32:
- case NEON::BI__builtin_neon_vcgtzd_f64:
- case NEON::BI__builtin_neon_vclezs_f32:
- case NEON::BI__builtin_neon_vclezd_f64:
- case NEON::BI__builtin_neon_vcltzs_f32:
- case NEON::BI__builtin_neon_vcltzd_f64:
- // Add implicit zero operand.
- Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
- break;
- }
-
- if (!Int)
- return 0;
+ assert(Int && "Expected valid intrinsic number");
// Determine the type(s) of this overloaded AArch64 intrinsic.
- const Expr *Arg = E->getArg(0);
- llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
- Function *F = CGF.LookupNeonIntrinsic(Int, IntTypes, ArgTy, E);
+ Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
- Value *Result = CGF.EmitNeonCall(F, Ops, s);
- llvm::Type *ResultType = CGF.ConvertType(E->getType());
+ Value *Result = EmitNeonCall(F, Ops, NameHint);
+ llvm::Type *ResultType = ConvertType(E->getType());
// AArch64 intrinsic one-element vector type cast to
// scalar type expected by the builtin
- return CGF.Builder.CreateBitCast(Result, ResultType, s);
+ return Builder.CreateBitCast(Result, ResultType, NameHint);
}
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
@@ -3304,21 +3277,12 @@ Value *CodeGenFunction::EmitAArch64Built
const CallExpr *E) {
// Process AArch64 scalar builtins
- llvm::ArrayRef<NeonSISDIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo);
-#ifndef NDEBUG
- if (!AArch64SISDIntrinsicInfoProvenSorted) {
- // FIXME: use std::is_sorted once C++11 is allowed
- for (unsigned i = 0; i < SISDInfo.size() - 1; ++i)
- assert(SISDInfo[i].BuiltinID <= SISDInfo[i + 1].BuiltinID);
- AArch64SISDIntrinsicInfoProvenSorted = true;
- }
-#endif
-
- const NeonSISDIntrinsicInfo *SISDIntrin =
- std::lower_bound(SISDInfo.begin(), SISDInfo.end(), BuiltinID);
+ llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo);
+ const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted);
- if (SISDIntrin != SISDInfo.end() && SISDIntrin->BuiltinID == BuiltinID) {
- Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *SISDIntrin, E);
+ if (Builtin) {
+ Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E);
assert(Result && "SISD intrinsic should have been handled");
return Result;
}
@@ -3461,10 +3425,16 @@ Value *CodeGenFunction::EmitAArch64Built
if (!Ty)
return 0;
+
// Many NEON builtins have identical semantics and uses in ARM and
// AArch64. Emit these in a single function.
- if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, Align))
- return Result;
+ llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
+ Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID,
+ NEONSIMDIntrinsicsProvenSorted);
+ if (Builtin)
+ return EmitCommonNeonBuiltinExpr(
+ Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
unsigned Int;
switch (BuiltinID) {
@@ -3814,10 +3784,6 @@ Value *CodeGenFunction::EmitAArch64Built
Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
}
- case NEON::BI__builtin_neon_vpaddq_v: {
- Int = Intrinsic::arm_neon_vpadd;
- return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
- }
case NEON::BI__builtin_neon_vmulx_v:
case NEON::BI__builtin_neon_vmulxq_v: {
Int = Intrinsic::aarch64_neon_vmulx;
@@ -4220,8 +4186,13 @@ Value *CodeGenFunction::EmitARMBuiltinEx
// Many NEON builtins have identical semantics and uses in ARM and
// AArch64. Emit these in a single function.
- if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, Align))
- return Result;
+ llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
+ const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
+ IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
+ if (Builtin)
+ return EmitCommonNeonBuiltinExpr(
+ Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
+ Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
unsigned Int;
switch (BuiltinID) {
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=201848&r1=201847&r2=201848&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Fri Feb 21 05:57:24 2014
@@ -2179,11 +2179,18 @@ public:
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
- llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+
+ llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
+ unsigned LLVMIntrinsic,
+ unsigned AltLLVMIntrinsic,
+ const char *NameHint,
+ unsigned Modifier,
+ const CallExpr *E,
SmallVectorImpl<llvm::Value *> &Ops,
llvm::Value *Align = 0);
- llvm::Function *LookupNeonIntrinsic(unsigned IntrinsicID, unsigned Modifier,
- llvm::Type *ArgTy, const CallExpr *E);
+ llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
+ unsigned Modifier, llvm::Type *ArgTy,
+ const CallExpr *E);
llvm::Value *EmitNeonCall(llvm::Function *F,
SmallVectorImpl<llvm::Value*> &O,
const char *name,
More information about the cfe-commits
mailing list