[AVX512] Add 512b integer shift by immediate intrinsics
Cameron McInally
cameron.mcinally at nyu.edu
Mon Nov 10 06:39:49 PST 2014
Hey Robert,
Please find an updated patch attached.
Also, note that patterns will need to be added to match the masking
shift operations. I will have to address these in a separate patch.
-Cameron
On Fri, Nov 7, 2014 at 12:19 PM, Robert Khasanov <rob.khasanov at gmail.com> wrote:
> Hi Cameron,
>
> Your AVX512 builtins don't support merge-masking and zero-masking versions.
> General, merge-masked and zero-masked Intel intrinsics will return one
> builtin with the different mask and passthrough args.
> Please add these arguments to your intrinsics, then you will need to create
> another intrinsic type (VSHIFT_MASK), and handle it using
> getVectorMaskingNode() function.
>
>
> 2014-11-07 2:24 GMT+03:00 Cameron McInally <cameron.mcinally at nyu.edu>:
>>
>> Hey guys,
>>
>> Here is a small patch to add 512b shift by immediate intrinsics.
>>
>> If everything is cool, I'll send a patch for shift by vector next.
>>
>> Thanks,
>> Cam
>
>
-------------- next part --------------
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td (revision 221537)
+++ include/llvm/IR/IntrinsicsX86.td (working copy)
@@ -1584,6 +1584,25 @@
def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrai_d : GCCBuiltin<"__builtin_ia32_psradi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrai_q : GCCBuiltin<"__builtin_ia32_psraqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Pack ops.
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp (revision 221537)
+++ lib/Target/X86/X86ISelLowering.cpp (working copy)
@@ -16303,6 +16303,10 @@
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
+ case VSHIFT_MASK:
+ return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG),
+ Op.getOperand(4), Op.getOperand(3), DAG);;
default:
break;
}
Index: lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- lib/Target/X86/X86IntrinsicsInfo.h (revision 221539)
+++ lib/Target/X86/X86IntrinsicsInfo.h (working copy)
@@ -20,7 +20,7 @@
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
- CMP_MASK, CMP_MASK_CC, VSHIFT, COMI
+ CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI
};
struct IntrinsicData {
@@ -192,6 +192,12 @@
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 221537)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -985,3 +985,48 @@
}
declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+
+define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0) {
+ ; CHECK: vpslld
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0) {
+ ; CHECK: vpsllq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0) {
+ ; CHECK: vpsrld
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0) {
+ ; CHECK: vpsrlq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0) {
+ ; CHECK: vpsrad
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0) {
+ ; CHECK: vpsraq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
More information about the llvm-commits
mailing list