[llvm] r222786 - [AVX512] Add 512b integer shift by variable intrinsics and patterns.

Robert Khasanov rob.khasanov at gmail.com
Fri Dec 5 03:39:20 PST 2014


Hi Cameron,

Thanks for this patch. Sorry for delay on review, I was in the hospital
last weeks.

2014-11-25 23:41 GMT+03:00 Cameron McInally <cameron.mcinally at nyu.edu>:

> Author: mcinally
> Date: Tue Nov 25 14:41:51 2014
> New Revision: 222786
>
> URL: http://llvm.org/viewvc/llvm-project?rev=222786&view=rev
> Log:
> [AVX512] Add 512b integer shift by variable intrinsics and patterns.
>
> Modified:
>     llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>     llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>     llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=222786&r1=222785&r2=222786&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Nov 25 14:41:51 2014
> @@ -1603,6 +1603,25 @@ let TargetPrefix = "x86" in {  // All in
>    def int_x86_avx512_mask_psrai_q :
> GCCBuiltin<"__builtin_ia32_psraqi512">,
>                Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
>                           llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
> [IntrNoMem]>;
> +
> +  def int_x86_avx512_mask_psll_d :
> GCCBuiltin<"__builtin_ia32_pslld512_mask">,
> +              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
> +                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty],
> [IntrNoMem]>;
> +  def int_x86_avx512_mask_psll_q :
> GCCBuiltin<"__builtin_ia32_psllq512_mask">,
> +              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
> +                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty],
> [IntrNoMem]>;
> +  def int_x86_avx512_mask_psrl_d :
> GCCBuiltin<"__builtin_ia32_psrld512_mask">,
> +              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
> +                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty],
> [IntrNoMem]>;
> +  def int_x86_avx512_mask_psrl_q :
> GCCBuiltin<"__builtin_ia32_psrlq512_mask">,
> +              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
> +                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty],
> [IntrNoMem]>;
> +  def int_x86_avx512_mask_psra_d :
> GCCBuiltin<"__builtin_ia32_psrad512_mask">,
> +              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
> +                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty],
> [IntrNoMem]>;
> +  def int_x86_avx512_mask_psra_q :
> GCCBuiltin<"__builtin_ia32_psraq512_mask">,
> +              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
> +                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty],
> [IntrNoMem]>;
>  }
>
>  // Pack ops.
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=222786&r1=222785&r2=222786&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 25 14:41:51 2014
> @@ -16872,7 +16872,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>                                                RoundingMode),
>                                    Mask, Src0, Subtarget, DAG);
>      }
> -
> +    case INTR_TYPE_2OP_MASK: {
> +      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
> Op.getOperand(1),
> +                                              Op.getOperand(2)),
> +                                  Op.getOperand(4), Op.getOperand(3),
> Subtarget, DAG);
> +    }
>      case CMP_MASK:
>      case CMP_MASK_CC: {
>        // Comparison intrinsics with masks.
> @@ -16924,7 +16928,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
>      case VSHIFT_MASK:
>        return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl,
> Op.getSimpleValueType(),
>                                                        Op.getOperand(1),
> Op.getOperand(2), DAG),
> -                                  Op.getOperand(4), Op.getOperand(3),
> Subtarget, DAG);;
> +                                  Op.getOperand(4), Op.getOperand(3),
> Subtarget, DAG);
>      default:
>        break;
>      }
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=222786&r1=222785&r2=222786&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 25 14:41:51 2014
> @@ -3196,6 +3196,7 @@ def : Pat <(i16 (int_x86_avx512_mask_pte
>  def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
>                   (v8i64 VR512:$src2), (i8 -1))),
>                   (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1,
> VR512:$src2), GR8)>;
> +
>
>  //===----------------------------------------------------------------------===//
>  // AVX-512  Shift instructions
>
>  //===----------------------------------------------------------------------===//
> @@ -3214,73 +3215,57 @@ multiclass avx512_shift_rmi<bits<8> opc,
>  }
>
>  multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
> -                          RegisterClass RC, ValueType vt, ValueType SrcVT,
> -                          PatFrag bc_frag, RegisterClass KRC> {
> -  // src2 is always 128-bit
> -  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
> -       (ins RC:$src1, VR128X:$src2),
> -           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
> $src2}"),
> -       [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
> -        SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
> -  def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
> -       (ins KRC:$mask, RC:$src1, VR128X:$src2),
> -           !strconcat(OpcodeStr,
> -                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1,
> $src2}"),
> -       [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
> -  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
> -       (ins RC:$src1, i128mem:$src2),
> -           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1,
> $src2}"),
> -       [(set RC:$dst, (vt (OpNode RC:$src1,
> -                       (bc_frag (memopv2i64 addr:$src2)))))],
> -                        SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
> -  def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
> -       (ins KRC:$mask, RC:$src1, i128mem:$src2),
> -           !strconcat(OpcodeStr,
> -                "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1,
> $src2}"),
> -       [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
> +                            ValueType SrcVT, PatFrag bc_frag,
> X86VectorVTInfo _> {
> +   // src2 is always 128-bit
> +  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
> +                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
> +                      "$src2, $src1", "$src1, $src2",
> +                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
> +                   " ",  SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
> +  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
> +                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
> +                       "$src2, $src1", "$src1, $src2",
> +                   (_.VT (OpNode _.RC:$src1, (bc_frag (memopv2i64
> addr:$src2)))),
> +                   " ",  SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
> +}
> +
> +multiclass avx512_varshift_sizes<bits<8> opc, string OpcodeStr, SDNode
> OpNode,
> +                                  ValueType SrcVT, PatFrag bc_frag,
> X86VectorVTInfo _> {
> +  defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>,
> EVEX_V512;
> +}
> +
> +multiclass avx512_varshift_types<bits<8> opcd, bits<8> opcq, string
> OpcodeStr,
> +                                 SDNode OpNode> {
> +  defm D : avx512_varshift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32,
> bc_v4i32,
> +                                 v16i32_info>, EVEX_CD8<32, CD8VQ>;
> +  defm Q : avx512_varshift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64,
> bc_v2i64,
> +                                 v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
>  }
>
>  defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
>                             v16i32_info>,
>                             EVEX_V512, EVEX_CD8<32, CD8VF>;
> -defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
> -                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>,
> EVEX_V512,
> -                           EVEX_CD8<32, CD8VQ>;
> -
>  defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
>                             v8i64_info>, EVEX_V512,
>                             EVEX_CD8<64, CD8VF>, VEX_W;
> -defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
> -                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>,
> EVEX_V512,
> -                           EVEX_CD8<64, CD8VQ>, VEX_W;
>
>  defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
>                             v16i32_info>, EVEX_V512,
>                             EVEX_CD8<32, CD8VF>;
> -defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
> -                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>,
> EVEX_V512,
> -                           EVEX_CD8<32, CD8VQ>;
> -
>  defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
>                             v8i64_info>, EVEX_V512,
>                             EVEX_CD8<64, CD8VF>, VEX_W;
> -defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
> -                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>,
> EVEX_V512,
> -                           EVEX_CD8<64, CD8VQ>, VEX_W;
>
>  defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
>                             v16i32_info>,
>                             EVEX_V512, EVEX_CD8<32, CD8VF>;
> -defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
> -                           VR512, v16i32, v4i32, bc_v4i32, VK16WM>,
> EVEX_V512,
> -                           EVEX_CD8<32, CD8VQ>;
> -
>  defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
>                             v8i64_info>, EVEX_V512,
>                             EVEX_CD8<64, CD8VF>, VEX_W;
> -defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
> -                           VR512, v8i64, v2i64, bc_v2i64, VK8WM>,
> EVEX_V512,
> -                           EVEX_CD8<64, CD8VQ>, VEX_W;
> +
> +defm VPSRL : avx512_varshift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
> +defm VPSLL : avx512_varshift_types<0xF2, 0xF3, "vpsll", X86vshl>;
> +defm VPSRA : avx512_varshift_types<0xE2, 0xE2, "vpsra", X86vsra>;
>
>
>  //===-------------------------------------------------------------------===//
>  // Variable Bit Shifts
>
> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=222786&r1=222785&r2=222786&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Nov 25 14:41:51 2014
> @@ -21,7 +21,7 @@ enum IntrinsicType {
>    GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
>    INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
>    CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
> -  INTR_TYPE_1OP_MASK_RM
> +  INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK
>  };
>
>  struct IntrinsicData {
> @@ -195,10 +195,16 @@ static const IntrinsicData  IntrinsicsWi
>    X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,
> X86ISD::PCMPGTM, 0),
>    X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,
> X86ISD::PCMPGTM, 0),
>    X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,
> X86ISD::PCMPGTM, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK,
> X86ISD::VSHL, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK,
> X86ISD::VSHL, 0),
>    X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK,
> X86ISD::VSHLI, 0),
>    X86_INTRINSIC_DATA(avx512_mask_pslli_q,       VSHIFT_MASK,
> X86ISD::VSHLI, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psra_d,        INTR_TYPE_2OP_MASK,
> X86ISD::VSRA, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psra_q,        INTR_TYPE_2OP_MASK,
> X86ISD::VSRA, 0),
>    X86_INTRINSIC_DATA(avx512_mask_psrai_d,       VSHIFT_MASK,
> X86ISD::VSRAI, 0),
>    X86_INTRINSIC_DATA(avx512_mask_psrai_q,       VSHIFT_MASK,
> X86ISD::VSRAI, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psrl_d,        INTR_TYPE_2OP_MASK,
> X86ISD::VSRL, 0),
> +  X86_INTRINSIC_DATA(avx512_mask_psrl_q,        INTR_TYPE_2OP_MASK,
> X86ISD::VSRL, 0),
>    X86_INTRINSIC_DATA(avx512_mask_psrli_d,       VSHIFT_MASK,
> X86ISD::VSRLI, 0),
>    X86_INTRINSIC_DATA(avx512_mask_psrli_q,       VSHIFT_MASK,
> X86ISD::VSRLI, 0),
>    X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128,    CMP_MASK_CC,
> X86ISD::CMPMU, 0),
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=222786&r1=222785&r2=222786&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Nov 25 14:41:51
> 2014
> @@ -1088,3 +1088,141 @@ define <8 x i64> @test_x86_avx512_maskz_
>  }
>
>  declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x
> i64>, i8) nounwind readnone
> +
> +define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psll_d
> +  ; CHECK: vpslld
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32>
> %a1, <16 x i32> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psll_d
> +  ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> %a2, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32>
> %a1, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
> +  ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>,
> <16 x i32>, i16) nounwind readnone
> +
> +define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psll_q
> +  ; CHECK: vpsllq
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 -1)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64>
> %a1, <8 x i64> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psll_q
> +  ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> %a2, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64>
> %a1, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
> +  ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x
> i64>, i8) nounwind readnone
> +
> +define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psrl_d
> +  ; CHECK: vpsrld
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32>
> %a1, <16 x i32> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
> +  ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> %a2, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32>
> %a1, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
> +  ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>,
> <16 x i32>, i16) nounwind readnone
> +
> +define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psrl_q
> +  ; CHECK: vpsrlq
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 -1)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64>
> %a1, <8 x i64> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
> +  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> %a2, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64>
> %a1, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
> +  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x
> i64>, i8) nounwind readnone
> +
> +define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psra_d
> +  ; CHECK: vpsrad
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32>
> %a1, <16 x i32> %a2, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psra_d
> +  ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> %a2, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32>
> %a1, i16 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
> +  ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4
> x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
> +  ret <16 x i32> %res
> +}
> +
> +declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>,
> <16 x i32>, i16) nounwind readnone
> +
> +define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
> +  ; CHECK-LABEL: test_x86_avx512_psra_q
> +  ; CHECK: vpsraq
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 -1)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64>
> %a1, <8 x i64> %a2, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_mask_psra_q
> +  ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> %a2, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64>
> %a1, i8 %mask) {
> +  ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
> +  ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
> +  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x
> i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
> +  ret <8 x i64> %res
> +}
> +
> +declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x
> i64>, i8) nounwind readnone
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141205/9ad99de9/attachment.html>


More information about the llvm-commits mailing list