[llvm] r242761 - AVX512 : Implemented VPMADDUBSW and VPMADDWD instruction ,
Igor Breger
igor.breger at intel.com
Tue Jul 21 00:11:28 PDT 2015
Author: ibreger
Date: Tue Jul 21 02:11:28 2015
New Revision: 242761
URL: http://llvm.org/viewvc/llvm-project?rev=242761&view=rev
Log:
AVX512 : Implemented VPMADDUBSW and VPMADDWD instruction ,
Added tests for intrinsics and encoding.
Differential Revision: http://reviews.llvm.org/D11351
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/MC/X86/x86-64-avx512bw.s
llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Jul 21 02:11:28 2015
@@ -4511,6 +4511,36 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_128 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_256 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddw_d_512 :
+ GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_128 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_256 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_pmaddubs_w_512 :
+ GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
// Gather and Scatter ops
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 21 02:11:28 2015
@@ -19008,6 +19008,8 @@ const char *X86TargetLowering::getTarget
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
+ case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jul 21 02:11:28 2015
@@ -403,7 +403,8 @@ namespace llvm {
PMULDQ,
// Vector Multiply Packed UnsignedIntegers with Round and Scale
MULHRS,
-
+ // Multiply and Add Packed Integers
+ VPMADDUBSW, VPMADDWD,
// FMA nodes
FMADD,
FNMADD,
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jul 21 02:11:28 2015
@@ -3229,11 +3229,30 @@ multiclass avx512_packs_all_i16_i8<bits<
v16i8x_info>, EVEX_V128;
}
}
+
+multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo _Src,
+ AVX512VLVectorVTInfo _Dst> {
+ defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
+ _Dst.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
+ _Dst.info256>, EVEX_V256;
+ defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
+ _Dst.info128>, EVEX_V128;
+ }
+}
+
let Predicates = [HasBWI] in {
defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
+
+ defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
+ avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
+ defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
+ avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
}
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jul 21 02:11:28 2015
@@ -288,6 +288,9 @@ def X86Packus : SDNode<"X86ISD::PACKUS",
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
+def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
+def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>;
+
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Jul 21 02:11:28 2015
@@ -596,6 +596,18 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Tue Jul 21 02:11:28 2015
@@ -1038,3 +1038,36 @@ define <32 x i16>@test_int_x86_avx512_ma
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Tue Jul 21 02:11:28 2015
@@ -3843,3 +3843,70 @@ define <16 x i16>@test_int_x86_avx512_ma
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw.s?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw.s Tue Jul 21 02:11:28 2015
@@ -3776,3 +3776,75 @@
// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0xaa,0xc0,0xdf,0xff,0xff]
vpmulhrsw -8256(%rdx), %zmm27, %zmm21
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x02,0x5d,0x40,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+// CHECK: encoding: [0x62,0x02,0x5d,0x43,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x5d,0xc3,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+
+// CHECK: vpmaddubsw (%rcx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x19]
+ vpmaddubsw (%rcx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x22,0x5d,0x40,0x04,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x7f]
+ vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0x00,0x20,0x00,0x00]
+ vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x80]
+ vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0xc0,0xdf,0xff,0xff]
+ vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x01,0x4d,0x40,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0x4d,0x42,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x4d,0xc2,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x11]
+ vpmaddwd (%rcx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x21,0x4d,0x40,0xf5,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8128(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x7f]
+ vpmaddwd 8128(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8192(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0x00,0x20,0x00,0x00]
+ vpmaddwd 8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8192(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x80]
+ vpmaddwd -8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8256(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0xc0,0xdf,0xff,0xff]
+ vpmaddwd -8256(%rdx), %zmm22, %zmm26
+
Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s?rev=242761&r1=242760&r2=242761&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s Tue Jul 21 02:11:28 2015
@@ -6799,3 +6799,291 @@
// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0xa2,0xe0,0xef,0xff,0xff]
vpmulhrsw -4128(%rdx), %ymm20, %ymm28
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x22,0x55,0x00,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x22,0x55,0x06,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x55,0x86,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x21]
+ vpmaddubsw (%rcx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x22,0x55,0x00,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x7f]
+ vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0x00,0x08,0x00,0x00]
+ vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x80]
+ vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x02,0x2d,0x20,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+// CHECK: encoding: [0x62,0x02,0x2d,0x25,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0x2d,0xa5,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x31]
+ vpmaddubsw (%rcx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x22,0x2d,0x20,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x7f]
+ vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0x00,0x10,0x00,0x00]
+ vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x80]
+ vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0xe0,0xef,0xff,0xff]
+ vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17
+// CHECK: encoding: [0x62,0x81,0x3d,0x00,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0x81,0x3d,0x01,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x3d,0x81,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x09]
+ vpmaddwd (%rcx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xf5,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x7f]
+ vpmaddwd 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0x00,0x08,0x00,0x00]
+ vpmaddwd 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x80]
+ vpmaddwd -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0xf0,0xf7,0xff,0xff]
+ vpmaddwd -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x21,0x45,0x20,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+// CHECK: encoding: [0x62,0x21,0x45,0x24,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x45,0xa4,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x01]
+ vpmaddwd (%rcx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x21,0x45,0x20,0xf5,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x7f]
+ vpmaddwd 4064(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0x00,0x10,0x00,0x00]
+ vpmaddwd 4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x80]
+ vpmaddwd -4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0xe0,0xef,0xff,0xff]
+ vpmaddwd -4128(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19
+// CHECK: encoding: [0x62,0x82,0x45,0x00,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+// CHECK: encoding: [0x62,0x82,0x45,0x02,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x45,0x82,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x19]
+ vpmaddubsw (%rcx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x04,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x7f]
+ vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0x00,0x08,0x00,0x00]
+ vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x80]
+ vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x65,0x27,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x65,0xa7,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x09]
+ vpmaddubsw (%rcx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0x04,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x7f]
+ vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0x00,0x10,0x00,0x00]
+ vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x80]
+ vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0xe0,0xef,0xff,0xff]
+ vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x4d,0x03,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x4d,0x83,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x39]
+ vpmaddwd (%rcx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x7f]
+ vpmaddwd 2032(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0x00,0x08,0x00,0x00]
+ vpmaddwd 2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x80]
+ vpmaddwd -2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0xf0,0xf7,0xff,0xff]
+ vpmaddwd -2064(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x5d,0x22,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x5d,0xa2,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x19]
+ vpmaddwd (%rcx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xf5,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x7f]
+ vpmaddwd 4064(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0x00,0x10,0x00,0x00]
+ vpmaddwd 4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x80]
+ vpmaddwd -4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0xe0,0xef,0xff,0xff]
+ vpmaddwd -4128(%rdx), %ymm20, %ymm19
+
More information about the llvm-commits
mailing list