[PATCH][AVX512] Add 512b integer shift by variable patterns and intrinsics
Cameron McInally
cameron.mcinally at nyu.edu
Wed Nov 19 05:58:22 PST 2014
Hi Elena,
I've attached an updated patch that adds the "_mask" suffix to the GNU
builtin names and also updates the AVX512BIBase base class.
If it's okay with you, I'd like to add the AVX512VL variants under a
separate patch. Is that okay with you? ;)
-Cam
On Wed, Nov 19, 2014 at 2:24 AM, Demikhovsky, Elena
<elena.demikhovsky at intel.com> wrote:
> Hi Cam,
>
> The GCC built-in names are incorrect. Please look here https://gcc.gnu.org/svn/gcc/branches/avx512-vlbwdq/gcc/config/i386/avx512fintrin.h
> for the reference.
>
> I also suggest to put two more multiclasses in the middle, we'll need to add "W" form as well.
>
> +defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, v4i32, bc_v4i32, v16i32_info>,
> + EVEX_V512, EVEX_CD8<32, CD8VQ>;
> +defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, v2i64, bc_v2i64, v8i64_info>,
> + EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W;
>
> I suggest something to write like this:
>
> multiclass avx512_varshift_sizes <>{
> defm Z: avx512_shift_rrm<opc..>, EVEX_V512
> defm Z256: avx512_shift_rrm<opc..>, EVEX_V256
> defm Z128: avx512_shift_rrm<opc.. ..>, EVEX_V128
> }
>
> multiclass avx512_varshift_types < bits<8> opcd, bits<8> opcq, bits<8> opcqw, string OpcodeStr, SDNode OpNode > {
> defm D: avx512_varshift_sizes <opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32, .. >, EVEX_CD8<32, CD8VQ>
> defm Q: avx512_varshift_sizes <opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64 ..>, EVEX_CD8<64, CD8VQ>, VEX_W
> defm W:
> }
> defm VPSRL : avx512_varshift_types <0xD2, 0xD3, 0xD1
>
>
>
>
> - Elena
>
>
> -----Original Message-----
> From: Cameron McInally [mailto:cameron.mcinally at nyu.edu]
> Sent: Tuesday, November 18, 2014 22:34
> To: llvm-commits at cs.uiuc.edu
> Cc: Demikhovsky, Elena; Robert Khasanov
> Subject: [PATCH][AVX512] Add 512b integer shift by variable patterns and intrinsics
>
> Hey guys,
>
> Attached is a patch to support 512b integer shift by variable intrinsics for AVX512.
>
> Thanks,
> Cam
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
-------------- next part --------------
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td (revision 222242)
+++ include/llvm/IR/IntrinsicsX86.td (working copy)
@@ -1603,6 +1603,25 @@
def int_x86_avx512_mask_psrai_q : GCCBuiltin<"__builtin_ia32_psraqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_q : GCCBuiltin<"__builtin_ia32_psllq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_d : GCCBuiltin<"__builtin_ia32_psrld512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_d : GCCBuiltin<"__builtin_ia32_psrad512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Pack ops.
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp (revision 222242)
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
@@ -16537,7 +16537,11 @@
RoundingMode),
Mask, Src0, Subtarget, DAG);
}
-
+ case INTR_TYPE_2OP_MASK: {
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
+ Op.getOperand(2)),
+ Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
+ }
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.
@@ -16589,7 +16593,7 @@
case VSHIFT_MASK:
return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG),
- Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);;
+ Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
default:
break;
}
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td (revision 222355)
+++ lib/Target/X86/X86InstrAVX512.td (working copy)
@@ -3138,74 +3138,56 @@
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
-multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType vt, ValueType SrcVT,
- PatFrag bc_frag, RegisterClass KRC> {
- // src2 is always 128-bit
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, VR128X:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
- SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))],
- SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
-}
+ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+ // src2 is always 128-bit
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (bc_frag (memopv2i64 addr:$src2)))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
+ }
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
-
+defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl, v4i32, bc_v4i32, v16i32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VQ>;
+
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
+defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl, v2i64, bc_v2i64, v8i64_info>,
+ EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
v16i32_info>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
+defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl, v4i32, bc_v4i32, v16i32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VQ>;
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
+defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl, v2i64, bc_v2i64, v8i64_info>,
+ EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
- VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
- EVEX_CD8<32, CD8VQ>;
+defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra, v4i32, bc_v4i32, v16i32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VQ>;
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
- VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
- EVEX_CD8<64, CD8VQ>, VEX_W;
+defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra, v2i64, bc_v2i64, v8i64_info>,
+ EVEX_V512, EVEX_CD8<64, CD8VQ>, VEX_W;
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
Index: lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- lib/Target/X86/X86IntrinsicsInfo.h (revision 222242)
+++ lib/Target/X86/X86IntrinsicsInfo.h (working copy)
@@ -21,7 +21,7 @@
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK_RM
+ INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK
};
struct IntrinsicData {
@@ -195,10 +195,16 @@
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psra_q, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 222242)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -1088,3 +1088,141 @@
}
declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psll_d
+ ; CHECK: vpslld
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psll_d
+ ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
+ ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psll_q
+ ; CHECK: vpsllq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psll_q
+ ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
+ ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psrl_d
+ ; CHECK: vpsrld
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
+ ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
+ ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psrl_q
+ ; CHECK: vpsrlq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
+ ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
+ ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psra_d
+ ; CHECK: vpsrad
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psra_d
+ ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
+ ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK-LABEL: test_x86_avx512_psra_q
+ ; CHECK: vpsraq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_mask_psra_q
+ ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
+ ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
More information about the llvm-commits
mailing list