[AVX512] Add 512b integer shift by immediate intrinsics
Cameron McInally
cameron.mcinally at nyu.edu
Thu Nov 6 15:24:25 PST 2014
Hey guys,
Here is a small patch to add 512b shift by immediate intrinsics.
If everything is cool, I'll send a patch for shift by vector next.
Thanks,
Cam
-------------- next part --------------
Index: include/llvm/IR/IntrinsicsX86.td
===================================================================
--- include/llvm/IR/IntrinsicsX86.td (revision 221480)
+++ include/llvm/IR/IntrinsicsX86.td (working copy)
@@ -1584,6 +1584,25 @@
def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_psrai_d : GCCBuiltin<"__builtin_ia32_psradi512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_psrai_q : GCCBuiltin<"__builtin_ia32_psraqi512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i32_ty], [IntrNoMem]>;
}
// Pack ops.
Index: lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- lib/Target/X86/X86IntrinsicsInfo.h (revision 221480)
+++ lib/Target/X86/X86IntrinsicsInfo.h (working copy)
@@ -204,6 +204,12 @@
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_psrai_q, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
Index: test/CodeGen/X86/avx512-intrinsics.ll
===================================================================
--- test/CodeGen/X86/avx512-intrinsics.ll (revision 221480)
+++ test/CodeGen/X86/avx512-intrinsics.ll (working copy)
@@ -985,3 +985,48 @@
}
declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+
+define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
+ ; CHECK: vpslld
+ %res = call <16 x i32> @llvm.x86.avx512.pslli.d(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pslli.d(<16 x i32>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
+ ; CHECK: vpsllq
+ %res = call <8 x i64> @llvm.x86.avx512.pslli.q(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pslli.q(<8 x i64>, i32) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
+ ; CHECK: vpsrld
+ %res = call <16 x i32> @llvm.x86.avx512.psrli.d(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.psrli.d(<16 x i32>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
+ ; CHECK: vpsrlq
+ %res = call <8 x i64> @llvm.x86.avx512.psrli.q(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.psrli.q(<8 x i64>, i32) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
+ ; CHECK: vpsrad
+ %res = call <16 x i32> @llvm.x86.avx512.psrai.d(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.psrai.d(<16 x i32>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
+ ; CHECK: vpsraq
+ %res = call <8 x i64> @llvm.x86.avx512.psrai.q(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.psrai.q(<8 x i64>, i32) nounwind readnone
More information about the llvm-commits
mailing list