r201847 - AArch64: refactor table-driven NEON lookup.

Tim Northover tnorthover at apple.com
Fri Feb 21 03:57:20 PST 2014


Author: tnorthover
Date: Fri Feb 21 05:57:20 2014
New Revision: 201847

URL: http://llvm.org/viewvc/llvm-project?rev=201847&view=rev
Log:
AArch64: refactor table-driven NEON lookup.

This extracts the table-driven intrinsic lookup phase into a separate
function, to be used by EmitCommonNeonBuiltinExpr soon.

It also simplifies the logic used in that lookup, since VectorCastArgN
and ScalarArgN were actually identical.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=201847&r1=201846&r2=201847&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Feb 21 05:57:20 2014
@@ -2448,22 +2448,25 @@ Value *CodeGenFunction::EmitCommonNeonBu
   return 0;
 }
 
-enum NeonScalarTypeMod {
-  ScalarRet = (1 << 0),
-  VectorRet = (1 << 1),
-  ScalarArg0 = (1 << 2),
-  VectorGetArg0 = (1 << 3),
-  VectorCastArg0 = (1 << 4),
-  ScalarArg1 = (1 << 5),
-  VectorGetArg1 = (1 << 6),
-  VectorCastArg1 = (1 << 7),
-  ScalarFpCmpzArg1 = (1 << 8),
-
-  VectorRetGetArgs01 = VectorRet | VectorGetArg0 | VectorGetArg1,
-  FpCmpzModifiers = VectorRet | ScalarArg0 | ScalarFpCmpzArg1
+enum {
+  AddRetType = (1 << 0),
+  Add1ArgType = (1 << 1),
+  Add2ArgTypes = (1 << 2),
+
+  VectorizeRetType = (1 << 3),
+  VectorizeArgTypes = (1 << 4),
+
+  InventFloatType = (1 << 5),
+
+  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+  VectorRet = AddRetType | VectorizeRetType,
+  VectorRetGetArgs01 =
+      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+  FpCmpzModifiers =
+      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType,
 };
 
-struct NeonSISDIntrinsicInfo {
+ struct NeonSISDIntrinsicInfo {
   unsigned BuiltinID;
   unsigned LLVMIntrinsic;
   const char *NameHint;
@@ -2482,116 +2485,116 @@ struct NeonSISDIntrinsicInfo {
     Intrinsic::LLVMIntrinsic, #NameBase, TypeModifier }
 
 static const NeonSISDIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
-  SISDMAP1(vabdd, f64, aarch64_neon_vabd, ScalarRet),
-  SISDMAP1(vabds, f32, aarch64_neon_vabd, ScalarRet),
+  SISDMAP1(vabdd, f64, aarch64_neon_vabd, AddRetType),
+  SISDMAP1(vabds, f32, aarch64_neon_vabd, AddRetType),
   SISDMAP1(vabsd, s64, aarch64_neon_vabs, 0),
   SISDMAP1(vaddd, s64, aarch64_neon_vaddds, 0),
   SISDMAP1(vaddd, u64, aarch64_neon_vadddu, 0),
-  SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
-  SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
-  SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
-  SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | VectorCastArg1),
-  SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | ScalarArg0 | ScalarArg1),
-  SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vaddlv, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlv, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlv, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlv, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlv, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlv, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddlvq, u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+  SISDMAP1(vaddv, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddv, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+  SISDMAP1(vaddvq, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+  SISDMAP1(vaddvq, s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vaddvq, u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
+  SISDMAP1(vcaged, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcages, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcagtd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcagts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcaled, f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcales, f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcaltd, f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+  SISDMAP1(vcalts, f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
+  SISDMAP1(vceqd, f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
   SISDMAP1(vceqd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
   SISDMAP1(vceqd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
-  SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vceqs, f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
   SISDMAP1(vceqzd, f64, aarch64_neon_fceq, FpCmpzModifiers),
   SISDMAP1(vceqzd, s64, aarch64_neon_vceq, VectorRetGetArgs01),
   SISDMAP1(vceqzd, u64, aarch64_neon_vceq, VectorRetGetArgs01),
   SISDMAP1(vceqzs, f32, aarch64_neon_fceq, FpCmpzModifiers),
-  SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcged, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
   SISDMAP1(vcged, s64, aarch64_neon_vcge, VectorRetGetArgs01),
   SISDMAP1(vcged, u64, aarch64_neon_vchs, VectorRetGetArgs01),
-  SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcges, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
   SISDMAP1(vcgezd, f64, aarch64_neon_fcge, FpCmpzModifiers),
   SISDMAP1(vcgezd, s64, aarch64_neon_vcge, VectorRetGetArgs01),
   SISDMAP1(vcgezs, f32, aarch64_neon_fcge, FpCmpzModifiers),
-  SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcgtd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
   SISDMAP1(vcgtd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
   SISDMAP1(vcgtd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
-  SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcgts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
   SISDMAP1(vcgtzd, f64, aarch64_neon_fcgt, FpCmpzModifiers),
   SISDMAP1(vcgtzd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
   SISDMAP1(vcgtzs, f32, aarch64_neon_fcgt, FpCmpzModifiers),
-  SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcled, f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
   SISDMAP1(vcled, s64, aarch64_neon_vcge, VectorRetGetArgs01),
   SISDMAP1(vcled, u64, aarch64_neon_vchs, VectorRetGetArgs01),
-  SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcles, f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
   SISDMAP1(vclezd, f64, aarch64_neon_fclez, FpCmpzModifiers),
   SISDMAP1(vclezd, s64, aarch64_neon_vclez, VectorRetGetArgs01),
   SISDMAP1(vclezs, f32, aarch64_neon_fclez, FpCmpzModifiers),
-  SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vcltd, f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
   SISDMAP1(vcltd, s64, aarch64_neon_vcgt, VectorRetGetArgs01),
   SISDMAP1(vcltd, u64, aarch64_neon_vchi, VectorRetGetArgs01),
-  SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | ScalarArg0 | ScalarArg1),
+  SISDMAP1(vclts, f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
   SISDMAP1(vcltzd, f64, aarch64_neon_fcltz, FpCmpzModifiers),
   SISDMAP1(vcltzd, s64, aarch64_neon_vcltz, VectorRetGetArgs01),
   SISDMAP1(vcltzs, f32, aarch64_neon_fcltz, FpCmpzModifiers),
-  SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
-  SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
-  SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | ScalarArg1),
-  SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | ScalarArg1),
-  SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, ScalarRet | VectorGetArg0),
-  SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | ScalarArg0),
-  SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | ScalarArg0),
-  SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | ScalarArg1),
-  SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | ScalarArg1),
+  SISDMAP1(vcvtad_s64, f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtad_u64, f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtas_s32, f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtas_u32, f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtd_f64, s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvtd_f64, u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvtd_n_f64, s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvtd_n_f64, u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvtd_n_s64, f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtd_n_u64, f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtd_s64, f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtd_u64, f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtmd_s64, f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtmd_u64, f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtms_s32, f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtms_u32, f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtnd_s64, f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtnd_u64, f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtns_s32, f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtns_u32, f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtpd_s64, f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtpd_u64, f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtps_s32, f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
+  SISDMAP1(vcvtps_u32, f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
+  SISDMAP1(vcvts_f32, s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvts_f32, u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvts_n_f32, s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvts_n_f32, u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
+  SISDMAP1(vcvts_n_s32, f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
+  SISDMAP1(vcvts_n_u32, f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
+  SISDMAP1(vcvts_s32, f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
+  SISDMAP1(vcvts_u32, f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
   SISDMAP1(vcvtxd_f32, f64, aarch64_neon_fcvtxn, 0),
   SISDMAP0(vdupb_lane, i8),
   SISDMAP0(vdupb_laneq, i8),
@@ -2621,59 +2624,59 @@ static const NeonSISDIntrinsicInfo AArch
   SISDMAP0(vgetq_lane, i32),
   SISDMAP0(vgetq_lane, i64),
   SISDMAP0(vgetq_lane, i8),
-  SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
+  SISDMAP1(vmaxnmv, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
   SISDMAP1(vmaxnmvq, f32, aarch64_neon_vmaxnmv, 0),
-  SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
-  SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
+  SISDMAP1(vmaxnmvq, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+  SISDMAP1(vmaxv, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+  SISDMAP1(vmaxv, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxv, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxv, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxv, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxv, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxv, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
   SISDMAP1(vmaxvq, f32, aarch64_neon_vmaxv, 0),
-  SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
-  SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
+  SISDMAP1(vmaxvq, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+  SISDMAP1(vmaxvq, s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxvq, s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxvq, s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxvq, u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxvq, u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vmaxvq, u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
+  SISDMAP1(vminnmv, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
   SISDMAP1(vminnmvq, f32, aarch64_neon_vminnmv, 0),
-  SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vminv, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
-  SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
+  SISDMAP1(vminnmvq, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+  SISDMAP1(vminv, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+  SISDMAP1(vminv, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminv, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminv, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminv, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminv, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminv, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
   SISDMAP1(vminvq, f32, aarch64_neon_vminv, 0),
-  SISDMAP1(vminvq, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
-  SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | VectorCastArg1),
-  SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | VectorCastArg1),
+  SISDMAP1(vminvq, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+  SISDMAP1(vminvq, s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminvq, s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminvq, s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminvq, u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminvq, u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
+  SISDMAP1(vminvq, u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
   SISDMAP0(vmul_n, f64),
   SISDMAP1(vmull, p64, aarch64_neon_vmull_p64, 0),
   SISDMAP0(vmulxd, f64),
   SISDMAP0(vmulxs, f32),
   SISDMAP1(vnegd, s64, aarch64_neon_vneg, 0),
-  SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
+  SISDMAP1(vpaddd, f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
   SISDMAP1(vpaddd, s64, aarch64_neon_vpadd, 0),
   SISDMAP1(vpaddd, u64, aarch64_neon_vpadd, 0),
-  SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
-  SISDMAP1(vpmins, f32, aarch64_neon_vpmin, ScalarRet | VectorCastArg0),
+  SISDMAP1(vpadds, f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
+  SISDMAP1(vpmaxnmqd, f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+  SISDMAP1(vpmaxnms, f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
+  SISDMAP1(vpmaxqd, f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+  SISDMAP1(vpmaxs, f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
+  SISDMAP1(vpminnmqd, f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+  SISDMAP1(vpminnms, f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
+  SISDMAP1(vpminqd, f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
+  SISDMAP1(vpmins, f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
   SISDMAP1(vqabsb, s8, arm_neon_vqabs, VectorRet),
   SISDMAP1(vqabsd, s64, arm_neon_vqabs, VectorRet),
   SISDMAP1(vqabsh, s16, arm_neon_vqabs, VectorRet),
@@ -2771,20 +2774,20 @@ static const NeonSISDIntrinsicInfo AArch
   SISDMAP1(vqsubh, u16, arm_neon_vqsubu, VectorRet),
   SISDMAP1(vqsubs, s32, arm_neon_vqsubs, VectorRet),
   SISDMAP1(vqsubs, u32, arm_neon_vqsubu, VectorRet),
-  SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, ScalarRet),
-  SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, ScalarRet),
-  SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, ScalarRet),
-  SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, ScalarRet),
-  SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, ScalarRet),
-  SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, ScalarRet),
+  SISDMAP1(vrecped, f64, aarch64_neon_vrecpe, AddRetType),
+  SISDMAP1(vrecpes, f32, aarch64_neon_vrecpe, AddRetType),
+  SISDMAP1(vrecpsd, f64, aarch64_neon_vrecps, AddRetType),
+  SISDMAP1(vrecpss, f32, aarch64_neon_vrecps, AddRetType),
+  SISDMAP1(vrecpxd, f64, aarch64_neon_vrecpx, AddRetType),
+  SISDMAP1(vrecpxs, f32, aarch64_neon_vrecpx, AddRetType),
   SISDMAP1(vrshld, s64, aarch64_neon_vrshlds, 0),
   SISDMAP1(vrshld, u64, aarch64_neon_vrshldu, 0),
   SISDMAP1(vrshrd_n, s64, aarch64_neon_vsrshr, VectorRet),
   SISDMAP1(vrshrd_n, u64, aarch64_neon_vurshr, VectorRet),
-  SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, ScalarRet),
-  SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, ScalarRet),
-  SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, ScalarRet),
-  SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, ScalarRet),
+  SISDMAP1(vrsqrted, f64, aarch64_neon_vrsqrte, AddRetType),
+  SISDMAP1(vrsqrtes, f32, aarch64_neon_vrsqrte, AddRetType),
+  SISDMAP1(vrsqrtsd, f64, aarch64_neon_vrsqrts, AddRetType),
+  SISDMAP1(vrsqrtss, f32, aarch64_neon_vrsqrts, AddRetType),
   SISDMAP1(vrsrad_n, s64, aarch64_neon_vrsrads_n, 0),
   SISDMAP1(vrsrad_n, u64, aarch64_neon_vrsradu_n, 0),
   SISDMAP0(vset_lane, f32),
@@ -2836,6 +2839,36 @@ static const NeonSISDIntrinsicInfo AArch
 static bool AArch64SISDIntrinsicInfoProvenSorted = false;
 #endif
 
+Function *CodeGenFunction::LookupNeonIntrinsic(unsigned IntrinsicID,
+                                               unsigned Modifier,
+                                               llvm::Type *ArgType,
+                                               const CallExpr *E) {
+  // Return type.
+  SmallVector<llvm::Type *, 3> Tys;
+  if (Modifier & AddRetType) {
+    llvm::Type *Ty = ConvertType(E->getCallReturnType());
+    if (Modifier & VectorizeRetType)
+      Ty = llvm::VectorType::get(Ty, 1);
+
+    Tys.push_back(Ty);
+  }
+
+  // Arguments.
+  if (Modifier & VectorizeArgTypes)
+    ArgType = llvm::VectorType::get(ArgType, 1);
+
+  if (Modifier & (Add1ArgType | Add2ArgTypes))
+    Tys.push_back(ArgType);
+
+  if (Modifier & Add2ArgTypes)
+    Tys.push_back(ArgType);
+
+  if (Modifier & InventFloatType)
+    Tys.push_back(FloatTy);
+
+  return CGM.getIntrinsic(IntrinsicID, Tys);
+}
+
 static Value *
 EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
                              const NeonSISDIntrinsicInfo &SISDInfo,
@@ -3017,63 +3050,9 @@ EmitAArch64ScalarBuiltinExpr(CodeGenFunc
     return 0;
 
   // Determine the type(s) of this overloaded AArch64 intrinsic.
-  Function *F = 0;
-  SmallVector<llvm::Type *, 3> Tys;
-
-  // Return type.
-  if (IntTypes & (ScalarRet | VectorRet)) {
-     llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    if (IntTypes & ScalarRet) {
-      // Scalar return value.
-      Tys.push_back(Ty);
-    } else if (IntTypes & VectorRet) {
-      // Convert the scalar return type to one-vector element type.
-      Tys.push_back(llvm::VectorType::get(Ty, 1));
-    }
-  }
-
-  // Arguments.
-  if (IntTypes & (ScalarArg0 | VectorGetArg0 | VectorCastArg0)) {
-    const Expr *Arg = E->getArg(0);
-    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
-    if (IntTypes & ScalarArg0) {
-      // Scalar argument.
-      Tys.push_back(Ty);
-    } else if (IntTypes & VectorGetArg0) {
-      // Convert the scalar argument to one-vector element type.
-      Tys.push_back(llvm::VectorType::get(Ty, 1));
-    } else if (IntTypes & VectorCastArg0) {
-      // Cast the argument to vector type.
-      Tys.push_back(cast<llvm::VectorType>(Ty));
-    }
-  }
- 
-  // The only intrinsics that require a 2nd argument are the compare intrinsics.
-  // However, the builtins don't always have a 2nd argument (e.g.,
-  // floating-point compare to zero), so we inspect the first argument to
-  // determine the type.
-  if (IntTypes & (ScalarArg1 | VectorGetArg1 | VectorCastArg1)) {
-    const Expr *Arg = E->getArg(0);
-    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
-    if (IntTypes & ScalarArg1) {
-      // Scalar argument.
-      Tys.push_back(Ty);
-    } else if (IntTypes & VectorGetArg1) {
-      // Convert the scalar argument to one-vector element type.
-      Tys.push_back(llvm::VectorType::get(Ty, 1));
-    } else if (IntTypes & VectorCastArg1) {
-      // Cast the argument to a vector type.
-      Tys.push_back(cast<llvm::VectorType>(Ty));
-    }
-  } else if (IntTypes & ScalarFpCmpzArg1) {
-    // Floating-point zero argument.
-    Tys.push_back(CGF.FloatTy);
-  }
- 
-  if (IntTypes)
-     F = CGF.CGM.getIntrinsic(Int, Tys);
-  else
-     F = CGF.CGM.getIntrinsic(Int);
+  const Expr *Arg = E->getArg(0);
+  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
+  Function *F = CGF.LookupNeonIntrinsic(Int, IntTypes, ArgTy, E);
 
   Value *Result = CGF.EmitNeonCall(F, Ops, s);
   llvm::Type *ResultType = CGF.ConvertType(E->getType());

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=201847&r1=201846&r2=201847&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Fri Feb 21 05:57:20 2014
@@ -2182,6 +2182,8 @@ public:
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
                                          SmallVectorImpl<llvm::Value *> &Ops,
                                          llvm::Value *Align = 0);
+  llvm::Function *LookupNeonIntrinsic(unsigned IntrinsicID, unsigned Modifier,
+                                      llvm::Type *ArgTy, const CallExpr *E);
   llvm::Value *EmitNeonCall(llvm::Function *F,
                             SmallVectorImpl<llvm::Value*> &O,
                             const char *name,





More information about the cfe-commits mailing list