[llvm] r240256 - AVX-512: Added intrinsics for VPERMT2W/D/Q/PS/PD and
Elena Demikhovsky
elena.demikhovsky at intel.com
Sun Jun 21 23:45:48 PDT 2015
Author: delena
Date: Mon Jun 22 01:45:48 2015
New Revision: 240256
URL: http://llvm.org/viewvc/llvm-project?rev=240256&view=rev
Log:
AVX-512: Added intrinsics for VPERMT2W/D/Q/PS/PD and
VPERMI2W/D/Q/PS/PD instructions.
Added tests.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon Jun 22 01:45:48 2015
@@ -1132,26 +1132,271 @@ let TargetPrefix = "x86" in { // All in
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpermt_d_512:
+ def int_x86_avx512_mask_vpermi2var_d_128 :
+ GCCBuiltin<"__builtin_ia32_vpermi2vard128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_d_256 :
+ GCCBuiltin<"__builtin_ia32_vpermi2vard256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_d_512 :
+ GCCBuiltin<"__builtin_ia32_vpermi2vard512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_hi_128 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varhi128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_hi_256 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varhi256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_hi_512 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varhi512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_q_128 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_q_256 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermi2var_q_512 :
+ GCCBuiltin<"__builtin_ia32_vpermi2varq512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_d_512:
GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpermt_q_512:
+ def int_x86_avx512_mask_vpermt2var_q_512:
GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpermt_ps_512:
+ def int_x86_avx512_mask_vpermt2var_ps_512:
GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty,
llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpermt_pd_512:
+ def int_x86_avx512_mask_vpermt2var_pd_512:
GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty,
llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vpermt2var_d_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2vard128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_d_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2vard128_maskz">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_d_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2vard256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_d_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2vard256_maskz">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_d_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2vard512_maskz">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_hi_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_hi_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi128_maskz">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_hi_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_hi_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi256_maskz">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_hi_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_hi_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varhi512_maskz">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_pd_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd128_maskz">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd256_maskz">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_pd_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd512_maskz">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_ps_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varps128_maskz">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varps256_maskz">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_ps_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varps512_maskz">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_q_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_q_128 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varq128_maskz">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt2var_q_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_q_256 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varq256_maskz">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_maskz_vpermt2var_q_512 :
+ GCCBuiltin<"__builtin_ia32_vpermt2varq512_maskz">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Vector blend
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jun 22 01:45:48 2015
@@ -15224,11 +15224,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASK:
+ case FMA_OP_MASKZ:
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ SDValue PassThru =
+ (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) ?
+ getZeroVector(VT, Subtarget, DAG, dl) : Src1;
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15240,12 +15247,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src1, Src2, Src3),
- Mask, Src1, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
}
case CMP_MASK:
case CMP_MASK_CC: {
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon Jun 22 01:45:48 2015
@@ -22,7 +22,8 @@ enum IntrinsicType {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK,
+ INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK,
+ VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
};
@@ -667,12 +668,124 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon Jun 22 01:45:48 2015
@@ -3013,3 +3013,122 @@ define <8 x i64>@test_int_x86_avx512_mas
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2d {{.*}}{%k1}
+define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd {{.*}}{%k1}
+define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+ %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps {{.*}}{%k1}
+define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2q {{.*}}{%k1}
+define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d {{.*}}{%k1} {z}
+define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+ %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2pd {{.*}}{%k1} {z}
+define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+ %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2ps {{.*}}{%k1} {z}
+define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+ %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+
+declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2q {{.*}}{%k1} {z}
+define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d {{.*}}{%k1}
+; CHECK-NOT: {z}
+define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Mon Jun 22 01:45:48 2015
@@ -893,6 +893,45 @@ define <32 x i16>@test_int_x86_avx512_ma
ret <32 x i16> %res2
}
+declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %zmm{{.*}}{%k1}
+define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+ %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
+define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+ %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2w %zmm{{.*}}{%k1}
+define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+ %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon Jun 22 01:45:48 2015
@@ -2877,6 +2877,85 @@ define <16 x i16>@test_int_x86_avx512_ma
ret <16 x i16> %res2
}
+declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %xmm{{.*}}{%k1}
+; CHECK-NOT: {z}
+define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %xmm{{.*}}{%k1} {z}
+define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+ %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %ymm{{.*}}{%k1}
+define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+ %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2w %ymm{{.*}}{%k1} {z}
+define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+ %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2w %xmm{{.*}}{%k1}
+define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2w %ymm{{.*}}{%k1}
+define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+ %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=240256&r1=240255&r2=240256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon Jun 22 01:45:48 2015
@@ -2794,4 +2794,112 @@ define <4 x i64>@test_int_x86_avx512_mas
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
-}
\ No newline at end of file
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d %xmm{{.*}}{%k1}
+; CHECK-NOT: {z}
+define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
+define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+ %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d %ymm{{.*}}{%k1}
+; CHECK-NOT: {z}
+define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermt2d {{.*}}{%k1} {z}
+define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+ %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %xmm{{.*}}{%k1}
+define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %ymm{{.*}}{%k1}
+define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
+ %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %xmm{{.*}}{%k1}
+define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %ymm{{.*}}{%k1}
+define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
+ %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+
More information about the llvm-commits
mailing list