[llvm] r246640 - AVX512: Implemented encoding and intrinsics for vshufps/d.

Igor Breger via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 2 03:50:59 PDT 2015


Author: ibreger
Date: Wed Sep  2 05:50:58 2015
New Revision: 246640

URL: http://llvm.org/viewvc/llvm-project?rev=246640&view=rev
Log:
AVX512: Implemented encoding and intrinsics for vshufps/d.
Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D11709

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
    llvm/trunk/test/MC/X86/avx512-encodings.s
    llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Sep  2 05:50:58 2015
@@ -1423,6 +1423,42 @@ let TargetPrefix = "x86" in {  // All in
           Intrinsic<[llvm_v64i8_ty],
           [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
           [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_128 :
+         GCCBuiltin<"__builtin_ia32_shufpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_256 :
+         GCCBuiltin<"__builtin_ia32_shufpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_512 :
+         GCCBuiltin<"__builtin_ia32_shufpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_128 :
+         GCCBuiltin<"__builtin_ia32_shufps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_256 :
+         GCCBuiltin<"__builtin_ia32_shufps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_512 :
+         GCCBuiltin<"__builtin_ia32_shufps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
 }
 
 // Vector blend

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Sep  2 05:50:58 2015
@@ -6042,44 +6042,6 @@ defm VSCATTERPF1DPD: avx512_gather_scatt
 
 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
                      VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-//===----------------------------------------------------------------------===//
-// VSHUFPS - VSHUFPD Operations
-
-multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
-                      ValueType vt, string OpcodeStr, PatFrag mem_frag,
-                      Domain d> {
-  def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, x86memop:$src2, u8imm:$src3),
-                   !strconcat(OpcodeStr,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
-                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
-                   EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
-  def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, u8imm:$src3),
-                   !strconcat(OpcodeStr,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
-                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
-                   EVEX_4V, Sched<[WriteShuffle]>;
-}
-
-defm VSHUFPSZ  : avx512_shufp<VR512, f512mem, v16f32, "vshufps", loadv16f32,
-                  SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VSHUFPDZ  : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", loadv8f64,
-                  SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
-          (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v16i32 (X86Shufp VR512:$src1,
-                    (loadv16i32 addr:$src2), (i8 imm:$imm))),
-          (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
-          (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v8i64 (X86Shufp VR512:$src1,
-                            (loadv8i64 addr:$src2), (i8 imm:$imm))),
-          (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
 
 // Helper fragments to match sext vXi1 to vXiY.
 def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
@@ -6831,3 +6793,20 @@ defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<
                                        SSE_INTALU_ITINS_P, HasAVX512>;
 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
                                        SSE_INTALU_ITINS_P, HasAVX512>;
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+//===----------------------------------------------------------------------===//
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+                                                AVX512VLVectorVTInfo VTInfo_FP>{
+  defm NAME:     avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
+                                   EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
+                                   AVX512AIi8Base, EVEX_4V;
+  let isCodeGenOnly = 1 in {
+    defm NAME#_I: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0xC6, X86Shufp>,
+                                   EVEX_CD8<VTInfo_I.info512.EltSize, CD8VF>,
+                                   AVX512AIi8Base, EVEX_4V;
+  }
+}
+
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Sep  2 05:50:58 2015
@@ -2609,19 +2609,20 @@ multiclass sse12_shuffle<RegisterClass R
             Sched<[WriteFShuffle]>;
 }
 
-defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
+let Predicates = [HasAVX, NoVLX] in {
+  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv4f32, SSEPackedSingle>, PS, VEX_4V;
-defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
-defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
+  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv2f64, SSEPackedDouble>, PD, VEX_4V;
-defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}
 let Constraints = "$src1 = $dst" in {
   defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
                     "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -2631,7 +2632,7 @@ let Constraints = "$src1 = $dst" in {
                     memopv2f64, SSEPackedDouble>, PD;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (X86Shufp VR128:$src1,
                        (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
             (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Sep  2 05:50:58 2015
@@ -1155,6 +1155,18 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,

Modified: llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-isa-check.ll?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-isa-check.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-isa-check.ll Wed Sep  2 05:50:58 2015
@@ -248,4 +248,17 @@ define <4 x i32> @shuffle_v4i32_vpalignr
 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
   ret <8 x i32> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
+  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
+  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
+  ret <2 x double> %bitcast64
 }
\ No newline at end of file

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Wed Sep  2 05:50:58 2015
@@ -3951,3 +3951,41 @@ define <2 x double> @test_getexp_sd(<2 x
   ret <2 x double> %res
 }
 
+declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
+
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res3, %res2
+  ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)  
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Wed Sep  2 05:50:58 2015
@@ -4508,6 +4508,78 @@ define <8 x float>@test_int_x86_avx512_m
   ret <8 x float> %res2
 }
 
+declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm0
+; CHECK:         vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res2, %res3
+  ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm0
+; CHECK:         vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm0
+; CHECK:         vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm0
+; CHECK:         vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
 
 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {

Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
+++ llvm/trunk/test/MC/X86/avx512-encodings.s Wed Sep  2 05:50:58 2015
@@ -14958,6 +14958,126 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
           vgetexpsd -1032(%rdx), %xmm7, %xmm2
 
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6}
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x4e,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6}
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6} {z}
+// CHECK:  encoding: [0x62,0xd1,0x4c,0xce,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6} {z}
+
+// CHECK: vshufps $123, %zmm9, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0x7b]
+          vshufps $0x7b, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x29,0x7b]
+          vshufps $0x7b, (%rcx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x4c,0x48,0xc6,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x29,0x7b]
+          vshufps $0x7b, (%rcx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8128(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x7f,0x7b]
+          vshufps $0x7b, 8128(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8192(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vshufps $0x7b, 8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8192(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x80,0x7b]
+          vshufps $0x7b, -8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8256(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufps $0x7b, -8256(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 508(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -516(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xbd,0x4a,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2}
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xbd,0xca,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2} {z}
+
+// CHECK: vshufpd $123, %zmm22, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0x7b]
+          vshufpd $0x7b, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8128(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 8128(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8192(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vshufpd $0x7b, 8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8192(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8256(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufpd $0x7b, -8256(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1016(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1032(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to8}, %zmm8, %zmm28
+
 // CHECK: kortestw %k6, %k2
 // CHECK:  encoding: [0xc5,0xf8,0x98,0xd6]
           kortestw %k6, %k2

Modified: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s?rev=246640&r1=246639&r2=246640&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s Wed Sep  2 05:50:58 2015
@@ -19739,6 +19739,246 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff]
           vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
 
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x44,0x03,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3}
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x44,0x83,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3} {z}
+
+// CHECK: vshufps $123, %xmm21, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0x7b]
+          vshufps $0x7b, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x09,0x7b]
+          vshufps $0x7b, (%rcx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x09,0x7b]
+          vshufps $0x7b, (%rcx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2032(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x7f,0x7b]
+          vshufps $0x7b, 2032(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2048(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vshufps $0x7b, 2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2048(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x80,0x7b]
+          vshufps $0x7b, -2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2064(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vshufps $0x7b, -2064(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 508(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -516(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x34,0x23,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3}
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x34,0xa3,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3} {z}
+
+// CHECK: vshufps $123, %ymm23, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0x7b]
+          vshufps $0x7b, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x19,0x7b]
+          vshufps $0x7b, (%rcx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x19,0x7b]
+          vshufps $0x7b, (%rcx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4064(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x7f,0x7b]
+          vshufps $0x7b, 4064(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4096(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vshufps $0x7b, 4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4096(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x80,0x7b]
+          vshufps $0x7b, -4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4128(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vshufps $0x7b, -4128(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 508(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -516(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x03,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3}
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x83,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3} {z}
+
+// CHECK: vshufpd $123, %xmm22, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0x7b]
+          vshufpd $0x7b, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2032(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 2032(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2048(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vshufpd $0x7b, 2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2048(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2064(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vshufpd $0x7b, -2064(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1016(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1032(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x95,0x26,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6}
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x95,0xa6,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6} {z}
+
+// CHECK: vshufpd $123, %ymm22, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0x7b]
+          vshufpd $0x7b, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x01,0x7b]
+          vshufpd $0x7b, (%rcx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x01,0x7b]
+          vshufpd $0x7b, (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4064(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x7f,0x7b]
+          vshufpd $0x7b, 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0x00,0x10,0x00,0x00,0x7b]
+          vshufpd $0x7b, 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x80,0x7b]
+          vshufpd $0x7b, -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4128(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0xe0,0xef,0xff,0xff,0x7b]
+          vshufpd $0x7b, -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to4}, %ymm29, %ymm24
+
 // CHECK: vscatterqps %xmm28, 123(%r14,%xmm31,8) {%k1}
 // CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa3,0xa4,0xfe,0x7b,0x00,0x00,0x00]
           vscatterqps %xmm28, 123(%r14, %xmm31,8) {%k1}




More information about the llvm-commits mailing list