[llvm] r242990 - AVX-512: Implemented encoding , DAG lowering and intrinsics for Integer Truncate with/without saturation

Chandler Carruth chandlerc at google.com
Thu Jul 23 01:05:47 PDT 2015


Sorry, I've reverted this patch in r242992 because it broke compilation and
I needed to make progress.

Bots were broken:
http://lab.llvm.org:8011/builders/clang-x86_64-debian-fast/builds/28956

When changing the core DAG bits, you'll need to test with all the targets
enabled before committing.

On Thu, Jul 23, 2015 at 12:42 AM Igor Breger <igor.breger at intel.com> wrote:

> Author: ibreger
> Date: Thu Jul 23 02:39:21 2015
> New Revision: 242990
>
> URL: http://llvm.org/viewvc/llvm-project?rev=242990&view=rev
> Log:
> AVX-512: Implemented encoding , DAG lowering and intrinsics for Integer
> Truncate with/without saturation
> Added tests for DAG lowering ,encoding and intrinsic
>
> Differential Revision: http://reviews.llvm.org/D11218
>
> Added:
>     llvm/trunk/test/CodeGen/X86/avx512-ext.ll
>       - copied, changed from r242987,
> llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
>     llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
> Removed:
>     llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
> Modified:
>     llvm/trunk/include/llvm/IR/IntrinsicsX86.td
>     llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86ISelLowering.h
>     llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>     llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
>     llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>     llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
>     llvm/trunk/test/CodeGen/X86/masked_memop.ll
>     llvm/trunk/test/MC/X86/x86-64-avx512bw.s
>     llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
>     llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=242990&r1=242989&r2=242990&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Jul 23 02:39:21 2015
> @@ -5816,6 +5816,550 @@ let TargetPrefix = "x86" in {
>                     llvm_i8_ty], [IntrReadArgMem]>;
>
>  }
> +
> +// truncate
> +let TargetPrefix = "x86" in {
> +  def int_x86_avx512_mask_pmov_qb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw512_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qd_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd128_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qd_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qd_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd256_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qd_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">,
> +          Intrinsic<[llvm_v4i32_ty],
> +                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_qd_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd512_mask">,
> +          Intrinsic<[llvm_v8i32_ty],
> +                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_qd_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">,
> +          Intrinsic<[llvm_v8i32_ty],
> +                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_qd_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">,
> +          Intrinsic<[llvm_v8i32_ty],
> +                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_qd_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_db_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_db_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_db_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_db_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_db_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_db_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_db_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_db_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_db_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_db_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_db_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_db_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_db_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_db_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_db_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_db_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_db_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_db_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_dw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_dw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_dw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_dw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">,
> +          Intrinsic<[llvm_v8i16_ty],
> +                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_dw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw512_mask">,
> +          Intrinsic<[llvm_v16i16_ty],
> +                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_dw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">,
> +          Intrinsic<[llvm_v16i16_ty],
> +                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_dw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">,
> +          Intrinsic<[llvm_v16i16_ty],
> +                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_dw_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_wb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_wb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_mem_128 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_wb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_wb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">,
> +          Intrinsic<[llvm_v16i8_ty],
> +                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_mem_256 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmov_wb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb512_mask">,
> +          Intrinsic<[llvm_v32i8_ty],
> +                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmov_wb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb512_mask">,
> +          Intrinsic<[llvm_v32i8_ty],
> +                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovs_wb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
> +                    [IntrReadWriteArgMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">,
> +          Intrinsic<[llvm_v32i8_ty],
> +                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
> +                    [IntrNoMem]>;
> +  def int_x86_avx512_mask_pmovus_wb_mem_512 :
> +          GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
> +          Intrinsic<[],
> +                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
> +                    [IntrReadWriteArgMem]>;
> +}
>  // Misc.
>  let TargetPrefix = "x86" in {
>    def int_x86_avx512_mask_cmp_ps_512 :
>
> Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAG.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAG.td?rev=242990&r1=242989&r2=242990&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetSelectionDAG.td (original)
> +++ llvm/trunk/include/llvm/Target/TargetSelectionDAG.td Thu Jul 23
> 02:39:21 2015
> @@ -493,9 +493,10 @@ def atomic_load      : SDNode<"ISD::ATOM
>  def atomic_store     : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
>                      [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
>
> -def masked_store : SDNode<"ISD::MSTORE",  SDTMaskedStore,
> +// Do not use mld, mst directly. Use masked_store masked_load,
> masked_truncstore
> +def mst            : SDNode<"ISD::MSTORE",  SDTMaskedStore,
>                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
> -def masked_load  : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
> +def mld            : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
>                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
>  def masked_scatter : SDNode<"ISD::MSCATTER",  SDTMaskedScatter,
>                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
> @@ -680,6 +681,12 @@ def load : PatFrag<(ops node:$ptr), (uni
>    return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
>  }]>;
>
> +// masked load fragments.
> +def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +                          (mld node:$src1, node:$src2, node:$src3), [{
> +  return cast<MaskedLoadSDNode>(N)->getExtensionType() ==
> ISD::NON_EXTLOAD;
> +}]>;
> +
>  // extending load fragments.
>  def extload   : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
>    return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
> @@ -791,6 +798,12 @@ def store : PatFrag<(ops node:$val, node
>    return !cast<StoreSDNode>(N)->isTruncatingStore();
>  }]>;
>
> +// masked store fragments.
> +def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +                           (mst node:$src1, node:$src2, node:$src3), [{
> +  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
> +}]>;
> +
>  // truncstore fragments.
>  def truncstore : PatFrag<(ops node:$val, node:$ptr),
>                           (unindexedstore node:$val, node:$ptr), [{
> @@ -817,6 +830,21 @@ def truncstoref64 : PatFrag<(ops node:$v
>    return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
>  }]>;
>
> +def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
> +                            (truncstore node:$val, node:$ptr), [{
> +  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
> +}]>;
> +
> +def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
> +                             (truncstore node:$val, node:$ptr), [{
> +  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
> +}]>;
> +
> +def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
> +                             (truncstore node:$val, node:$ptr), [{
> +  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
> +}]>;
> +
>  // indexed store fragments.
>  def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
>                       (ist node:$val, node:$base, node:$offset), [{
> @@ -891,6 +919,27 @@ def post_truncstf32 : PatFrag<(ops node:
>    return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
>  }]>;
>
> +// masked truncstore fragments
> +def masked_truncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +                                (mst node:$src1, node:$src2, node:$src3),
> [{
> +    return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
> +}]>;
> +def masked_truncstorevi8 :
> +  PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +          (masked_truncstore node:$src1, node:$src2, node:$src3), [{
> +  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() ==
> MVT::i8;
> +}]>;
> +def masked_truncstorevi16 :
> +  PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +          (masked_truncstore node:$src1, node:$src2, node:$src3), [{
> +  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() ==
> MVT::i16;
> +}]>;
> +def masked_truncstorevi32 :
> +  PatFrag<(ops node:$src1, node:$src2, node:$src3),
> +          (masked_truncstore node:$src1, node:$src2, node:$src3), [{
> +  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() ==
> MVT::i32;
> +}]>;
> +
>  // setcc convenience fragments.
>  def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
>                       (setcc node:$lhs, node:$rhs, SETOEQ)>;
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=242990&r1=242989&r2=242990&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 23 02:39:21 2015
> @@ -1348,6 +1348,24 @@ X86TargetLowering::X86TargetLowering(con
>      setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
>      setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
>
> +    setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
> +    setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
> +    setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
> +    setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
> +    setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
> +    if (Subtarget->hasVLX()){
> +      setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
> +      setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
> +      setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
> +      setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
> +      setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
> +
> +      setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
> +      setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
> +      setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
> +      setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
> +      setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
> +    }
>      setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
>      setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
>      setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
> @@ -1556,6 +1574,7 @@ X86TargetLowering::X86TargetLowering(con
>      setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
>      setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
>      setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
> +    setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
>
>      setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
>      setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
> @@ -1566,6 +1585,11 @@ X86TargetLowering::X86TargetLowering(con
>      setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
>      setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
>
> +    setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
> +    setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
> +    if (Subtarget->hasVLX())
> +      setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
> +
>      for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
>        const MVT VT = (MVT::SimpleValueType)i;
>
> @@ -12485,10 +12509,8 @@ SDValue X86TargetLowering::LowerTRUNCATE
>          Subtarget->hasDQI() && Subtarget->hasVLX())
>        return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
>    }
> -  if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
> -    if (VT.getVectorElementType().getSizeInBits() >=8)
> -      return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
>
> +  if (VT.getVectorElementType() == MVT::i1) {
>      assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector
> type");
>      unsigned NumElts = InVT.getVectorNumElements();
>      assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
> @@ -12504,6 +12526,11 @@ SDValue X86TargetLowering::LowerTRUNCATE
>      return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
>    }
>
> +  // vpmovqb/w/d, vpmovdb/w, vpmovwb
> +  if (((!InVT.is512BitVector() && Subtarget->hasVLX()) ||
> InVT.is512BitVector()) &&
> +      (InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI()))
> +    return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
> +
>    if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
>      // On AVX2, v4i64 -> v4i32 becomes VPERMD.
>      if (Subtarget->hasInt256()) {
> @@ -15220,7 +15247,7 @@ static SDValue getTargetVShiftNode(unsig
>
>  /// \brief Return (and \p Op, \p Mask) for compare instructions or
>  /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
> -/// necessary casting for \p Mask when lowering masking intrinsics.
> +/// necessary casting or extending for \p Mask when lowering masking
> intrinsics
>  static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
>                                      SDValue PreservedSrc,
>                                      const X86Subtarget *Subtarget,
> @@ -15228,8 +15255,8 @@ static SDValue getVectorMaskingNode(SDVa
>      EVT VT = Op.getValueType();
>      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
>                                    MVT::i1, VT.getVectorNumElements());
> -    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
> -                                     Mask.getValueType().getSizeInBits());
> +    SDValue VMask = SDValue();
> +    unsigned OpcodeSelect = ISD::VSELECT;
>      SDLoc dl(Op);
>
>      assert(MaskVT.isSimple() && "invalid mask type");
> @@ -15237,11 +15264,20 @@ static SDValue getVectorMaskingNode(SDVa
>      if (isAllOnes(Mask))
>        return Op;
>
> -    // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
> -    // are extracted by EXTRACT_SUBVECTOR.
> -    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
> -                                DAG.getBitcast(BitcastVT, Mask),
> -                                DAG.getIntPtrConstant(0, dl));
> +    if (MaskVT.bitsGT(Mask.getValueType())) {
> +      EVT newMaskVT =  EVT::getIntegerVT(*DAG.getContext(),
> +                                         MaskVT.getSizeInBits());
> +      VMask = DAG.getBitcast(MaskVT,
> +                             DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT,
> Mask));
> +    } else {
> +      EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
> +
>  Mask.getValueType().getSizeInBits());
> +      // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
> +      // are extracted by EXTRACT_SUBVECTOR.
> +      VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
> +                          DAG.getBitcast(BitcastVT, Mask),
> +                          DAG.getIntPtrConstant(0, dl));
> +    }
>
>      switch (Op.getOpcode()) {
>        default: break;
> @@ -15250,10 +15286,18 @@ static SDValue getVectorMaskingNode(SDVa
>        case X86ISD::CMPM:
>        case X86ISD::CMPMU:
>          return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
> +      case X86ISD::VTRUNC:
> +      case X86ISD::VTRUNCS:
> +      case X86ISD::VTRUNCUS:
> +        // We can't use ISD::VSELECT here because it is not always "Legal"
> +        // for the destination type. For example vpmovqb require only
> AVX512
> +        // and vselect that can operate on byte element type require BWI
> +        OpcodeSelect = X86ISD::SELECT;
> +        break;
>      }
>      if (PreservedSrc.getOpcode() == ISD::UNDEF)
>        PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
> -    return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
> +    return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
>  }
>
>  /// \brief Creates an SDNode for a predicated scalar operation.
> @@ -16111,6 +16155,45 @@ static SDValue LowerSEHRESTOREFRAME(SDVa
>    return Chain;
>  }
>
> +/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
> +/// return truncate Store/MaskedStore Node
> +static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
> +                                               SelectionDAG &DAG,
> +                                               MVT ElementType) {
> +  SDLoc dl(Op);
> +  SDValue Mask = Op.getOperand(4);
> +  SDValue DataToTruncate = Op.getOperand(3);
> +  SDValue Addr = Op.getOperand(2);
> +  SDValue Chain = Op.getOperand(0);
> +
> +  EVT VT  = DataToTruncate.getValueType();
> +  EVT SVT = EVT::getVectorVT(*DAG.getContext(),
> +                             ElementType, VT.getVectorNumElements());
> +
> +  if (isAllOnes(Mask)) // return just a truncate store
> +    return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
> +                             MachinePointerInfo(), SVT, false, false,
> +                             SVT.getScalarSizeInBits()/8);
> +
> +  EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
> +                                MVT::i1, VT.getVectorNumElements());
> +  EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
> +                                   Mask.getValueType().getSizeInBits());
> +  // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
> +  // are extracted by EXTRACT_SUBVECTOR.
> +  SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
> +                              DAG.getBitcast(BitcastVT, Mask),
> +                              DAG.getIntPtrConstant(0, dl));
> +
> +  MachineMemOperand *MMO = DAG.getMachineFunction().
> +    getMachineMemOperand(MachinePointerInfo(),
> +                         MachineMemOperand::MOStore, SVT.getStoreSize(),
> +                         SVT.getScalarSizeInBits()/8);
> +
> +  return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
> +                            VMask, SVT, MMO, true);
> +}
> +
>  static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget
> *Subtarget,
>                                        SelectionDAG &DAG) {
>    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
> @@ -16244,6 +16327,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SD
>                          MachinePointerInfo(), false, false,
>                          VT.getScalarSizeInBits()/8);
>    }
> +  case TRUNCATE_TO_MEM_VI8:
> +    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
> +  case TRUNCATE_TO_MEM_VI16:
> +    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
> +  case TRUNCATE_TO_MEM_VI32:
> +    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
>    case EXPAND_FROM_MEM: {
>      SDLoc dl(Op);
>      SDValue Mask = Op.getOperand(4);
> @@ -18954,7 +19043,8 @@ const char *X86TargetLowering::getTarget
>    case X86ISD::VZEXT:              return "X86ISD::VZEXT";
>    case X86ISD::VSEXT:              return "X86ISD::VSEXT";
>    case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
> -  case X86ISD::VTRUNCM:            return "X86ISD::VTRUNCM";
> +  case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
> +  case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
>    case X86ISD::VINSERT:            return "X86ISD::VINSERT";
>    case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
>    case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
> @@ -24093,6 +24183,15 @@ static SDValue PerformMSTORECombine(SDNo
>    unsigned FromSz = VT.getVectorElementType().getSizeInBits();
>    unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
>
> +  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> +
> +  // The truncating store is legal in some cases. For example
> +  // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
> +  // are designated for truncate store.
> +  // In this case we don't need any further transformations.
> +  if (TLI.isTruncStoreLegal(VT, StVT))
> +    return SDValue();
> +
>    // From, To sizes and ElemCount must be pow of two
>    assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
>      "Unexpected size for truncating masked store");
> @@ -24204,6 +24303,13 @@ static SDValue PerformSTORECombine(SDNod
>      unsigned FromSz = VT.getVectorElementType().getSizeInBits();
>      unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
>
> +    // The truncating store is legal in some cases. For example
> +    // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
> +    // are designated for truncate store.
> +    // In this case we don't need any further transformations.
> +    if (TLI.isTruncStoreLegal(VT, StVT))
> +      return SDValue();
> +
>      // From, To sizes and ElemCount must be pow of two
>      if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
>      // We are going to use the original vector elt for storing.
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=242990&r1=242989&r2=242990&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Jul 23 02:39:21 2015
> @@ -282,9 +282,8 @@ namespace llvm {
>
>        // Vector integer truncate.
>        VTRUNC,
> -
> -      // Vector integer truncate with mask.
> -      VTRUNCM,
> +      // Vector integer truncate with unsigned/signed saturation.
> +      VTRUNCUS, VTRUNCS,
>
>        // Vector FP extend.
>        VFPEXT,
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=242990&r1=242989&r2=242990&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Jul 23 02:39:21 2015
> @@ -5571,82 +5571,217 @@ defm VRNDSCALESD : avx512_rndscale_scala
>  // Integer truncate and extend operations
>  //-------------------------------------------------
>
> -multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
> -                          RegisterClass dstRC, RegisterClass srcRC,
> -                          RegisterClass KRC, X86MemOperand x86memop> {
> -  def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
> -               (ins srcRC:$src),
> -               !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
> +multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode
> OpNode,
> +                              X86VectorVTInfo SrcInfo, X86VectorVTInfo
> DestInfo,
> +                              X86MemOperand x86memop> {
> +
> +  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs
> DestInfo.RC:$dst),
> +                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
> +                      (DestInfo.VT (OpNode (SrcInfo.VT
> SrcInfo.RC:$src1)))>,
> +                       EVEX, T8XS;
> +
> +  // for intrinsic patter match
> +  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
> +                           (DestInfo.VT (OpNode (SrcInfo.VT
> SrcInfo.RC:$src1))),
> +                           undef)),
> +            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz)
> DestInfo.KRCWM:$mask ,
> +                                      SrcInfo.RC:$src1)>;
> +
> +  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
> +                           (DestInfo.VT (OpNode (SrcInfo.VT
> SrcInfo.RC:$src1))),
> +                           DestInfo.ImmAllZerosV)),
> +            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz)
> DestInfo.KRCWM:$mask ,
> +                                      SrcInfo.RC:$src1)>;
> +
> +  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
> +                           (DestInfo.VT (OpNode (SrcInfo.VT
> SrcInfo.RC:$src1))),
> +                           DestInfo.RC:$src0)),
> +            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk)
> DestInfo.RC:$src0,
> +                                      DestInfo.KRCWM:$mask ,
> +                                      SrcInfo.RC:$src1)>;
> +
> +  let mayStore = 1 in {
> +    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
> +               (ins x86memop:$dst, SrcInfo.RC:$src),
> +               OpcodeStr # "\t{$src, $dst |$dst, $src}",
>                 []>, EVEX;
>
> -  def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
> -               (ins KRC:$mask, srcRC:$src),
> -               !strconcat(OpcodeStr,
> -                 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
> +    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
> +               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
> +               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}},
> $src}",
>                 []>, EVEX, EVEX_K;
> +  }//mayStore = 1
> +}
>
> -  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
> -               (ins KRC:$mask, srcRC:$src),
> -               !strconcat(OpcodeStr,
> -                 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z},
> $src}"),
> -               []>, EVEX, EVEX_KZ;
> +multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
> +                                    X86VectorVTInfo DestInfo,
> +                                    PatFrag truncFrag, PatFrag mtruncFrag
> > {
> +
> +  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
> +            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
> +                                    addr:$dst, SrcInfo.RC:$src)>;
> +
> +  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
> +                                               (SrcInfo.VT
> SrcInfo.RC:$src)),
> +            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
> +                            addr:$dst, SrcInfo.KRCWM:$mask,
> SrcInfo.RC:$src)>;
> +}
>
> -  def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst,
> srcRC:$src),
> -               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
> -               []>, EVEX;
> +multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
> +                                        X86VectorVTInfo DestInfo, string
> sat > {
>
> -  def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
> -               (ins x86memop:$dst, KRC:$mask, srcRC:$src),
> -               !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst}
> {${mask}}, $src}"),
> -               []>, EVEX, EVEX_K;
> +  def:
> Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
> +                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
> +                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src),
> SrcInfo.MRC:$mask),
> +           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
> +                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
> +                    (SrcInfo.VT SrcInfo.RC:$src))>;
> +
> +  def:
> Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
> +                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
> +                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
> +           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
> +                    (SrcInfo.VT SrcInfo.RC:$src))>;
> +}
> +
> +multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
> +         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
> +         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
> +         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
> +         X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
> +                                                     Predicate prd =
> HasAVX512>{
> +
> +  let Predicates = [HasVLX, prd] in {
> +    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info128,
> +                             DestInfoZ128, x86memopZ128>,
> +                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
> +                             truncFrag, mtruncFrag>, EVEX_V128;
> +
> +    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info256,
> +                             DestInfoZ256, x86memopZ256>,
> +                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
> +                             truncFrag, mtruncFrag>, EVEX_V256;
> +  }
> +  let Predicates = [prd] in
> +    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info512,
> +                             DestInfoZ, x86memopZ>,
> +                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
> +                             truncFrag, mtruncFrag>, EVEX_V512;
> +}
>
> +multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
> +         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
> +         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
> +         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
> +         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
> +
> +  let Predicates = [HasVLX, prd] in {
> +    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info128,
> +                             DestInfoZ128, x86memopZ128>,
> +                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128,
> DestInfoZ128,
> +                             sat>, EVEX_V128;
> +
> +    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info256,
> +                             DestInfoZ256, x86memopZ256>,
> +                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256,
> DestInfoZ256,
> +                             sat>, EVEX_V256;
> +  }
> +  let Predicates = [prd] in
> +    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode,
> VTSrcInfo.info512,
> +                             DestInfoZ, x86memopZ>,
> +                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
> +                             sat>, EVEX_V512;
> +}
> +
> +multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
> +               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem,
> i64mem,
> +               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
> +}
> +multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode,
> avx512vl_i64_info,
> +               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem,
> i64mem,
> +               sat>, EVEX_CD8<8, CD8VO>;
> +}
> +
> +multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
> +               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem,
> i128mem,
> +               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16,
> CD8VQ>;
> +}
> +multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode,
> avx512vl_i64_info,
> +               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem,
> i128mem,
> +               sat>, EVEX_CD8<16, CD8VQ>;
> +}
> +
> +multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
> +               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem,
> i256mem,
> +               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32,
> CD8VH>;
> +}
> +multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode,
> avx512vl_i64_info,
> +               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem,
> i256mem,
> +               sat>, EVEX_CD8<32, CD8VH>;
> +}
> +
> +multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
> +               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem,
> i128mem,
> +               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
> +}
> +multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode,
> avx512vl_i32_info,
> +               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem,
> i128mem,
> +               sat>, EVEX_CD8<8, CD8VQ>;
> +}
> +
> +multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
> +              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem,
> i256mem,
> +              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
> +}
> +multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode,
> avx512vl_i32_info,
> +              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem,
> i256mem,
> +              sat>, EVEX_CD8<16, CD8VH>;
> +}
> +
> +multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
> +  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
> +              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem,
> i256mem,
> +              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16,
> CD8VH>;
> +}
> +multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
> +  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode,
> avx512vl_i16_info,
> +              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem,
> i256mem,
> +              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
>  }
> -defm VPMOVQB    : avx512_trunc_sat<0x32, "vpmovqb",   VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
> -defm VPMOVSQB   : avx512_trunc_sat<0x22, "vpmovsqb",  VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
> -defm VPMOVUSQB  : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
> -defm VPMOVQW    : avx512_trunc_sat<0x34, "vpmovqw",   VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
> -defm VPMOVSQW   : avx512_trunc_sat<0x24, "vpmovsqw",  VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
> -defm VPMOVUSQW  : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512,
> VK8WM,
> -                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
> -defm VPMOVQD    : avx512_trunc_sat<0x35, "vpmovqd",   VR256X, VR512,
> VK8WM,
> -                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
> -defm VPMOVSQD   : avx512_trunc_sat<0x25, "vpmovsqd",  VR256X, VR512,
> VK8WM,
> -                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
> -defm VPMOVUSQD  : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512,
> VK8WM,
> -
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150723/be306b57/attachment.html>


More information about the llvm-commits mailing list