[llvm] r333857 - [X86] Remove and autoupgrade masked avx512vnni intrinsics using the unmasked intrinsics and select instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 3 16:24:17 PDT 2018
Author: ctopper
Date: Sun Jun 3 16:24:17 2018
New Revision: 333857
URL: http://llvm.org/viewvc/llvm-project?rev=333857&view=rev
Log:
[X86] Remove and autoupgrade masked avx512vnni intrinsics using the unmasked intrinsics and select instructions.
Added:
llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll
- copied, changed from r333856, llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll
- copied, changed from r333856, llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=333857&r1=333856&r2=333857&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Jun 3 16:24:17 2018
@@ -2411,83 +2411,6 @@ let TargetPrefix = "x86" in { // All in
// VNNI
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_vpdpbusd_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusd_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpbusd_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusd_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpbusd_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusd_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpdpbusds_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusds_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpbusds_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusds_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpbusds_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpbusds_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpdpwssd_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssd_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpwssd_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssd_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpwssd_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssd_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_vpdpwssds_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssds_128 : // FIXME: Remove
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpwssds_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssds_256 : // FIXME: Remove
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_vpdpwssds_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_maskz_vpdpwssds_512 : // FIXME: Remove
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-
-
def int_x86_avx512_vpdpbusd_128 :
GCCBuiltin<"__builtin_ia32_vpdpbusd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=333857&r1=333856&r2=333857&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sun Jun 3 16:24:17 2018
@@ -270,6 +270,14 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -2663,6 +2671,66 @@ void llvm::UpgradeIntrinsicCall(CallInst
: Builder.CreateBitCast(CI->getArgOperand(1),
CI->getType());
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
+ Name.startswith("avx512.maskz.vpdpbusd.") ||
+ Name.startswith("avx512.mask.vpdpbusds.") ||
+ Name.startswith("avx512.maskz.vpdpbusds."))) {
+ bool ZeroMask = Name[11] == 'z';
+ bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
+ unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusd_128;
+ else if (VecWidth == 256 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusd_256;
+ else if (VecWidth == 512 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusd_512;
+ else if (VecWidth == 128 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusds_128;
+ else if (VecWidth == 256 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusds_256;
+ else if (VecWidth == 512 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpbusds_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2) };
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
+ : CI->getArgOperand(0);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
+ Name.startswith("avx512.maskz.vpdpwssd.") ||
+ Name.startswith("avx512.mask.vpdpwssds.") ||
+ Name.startswith("avx512.maskz.vpdpwssds."))) {
+ bool ZeroMask = Name[11] == 'z';
+ bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
+ unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssd_128;
+ else if (VecWidth == 256 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssd_256;
+ else if (VecWidth == 512 && !IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssd_512;
+ else if (VecWidth == 128 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssds_128;
+ else if (VecWidth == 256 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssds_256;
+ else if (VecWidth == 512 && IsSaturating)
+ IID = Intrinsic::x86_avx512_vpdpwssds_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2) };
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
+ : CI->getArgOperand(0);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=333857&r1=333856&r2=333857&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Jun 3 16:24:17 2018
@@ -1048,19 +1048,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_128, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_256, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_512, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_128, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_256, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_512, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_128, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_256, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_512, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_128, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
-
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
@@ -1191,19 +1178,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
X86ISD::FMADDSUB_RND),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_128, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_256, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_512, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_128, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_256, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_512, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_128, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_256, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_512, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_128, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
-
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
Copied: llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll (from r333856, llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll?p2=llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll&p1=llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll&r1=333856&r2=333857&rev=333857&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll Sun Jun 3 16:24:17 2018
@@ -8,15 +8,15 @@ declare <8 x i32> @llvm.x86.avx512.maskz
define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xe2]
-; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xe2]
+; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -46,15 +46,15 @@ declare <4 x i32> @llvm.x86.avx512.maskz
define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xe2]
-; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xe2]
+; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -84,15 +84,15 @@ declare <8 x i32> @llvm.x86.avx512.maskz
define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xe2]
-; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xe2]
+; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -122,15 +122,15 @@ declare <4 x i32> @llvm.x86.avx512.maskz
define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xe2]
-; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xe2]
+; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -160,15 +160,15 @@ declare <8 x i32> @llvm.x86.avx512.maskz
define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xe2]
-; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xe2]
+; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -198,15 +198,15 @@ declare <4 x i32> @llvm.x86.avx512.maskz
define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xe2]
-; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xe2]
+; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -237,15 +237,15 @@ declare <8 x i32> @llvm.x86.avx512.maskz
define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xe2]
-; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xe2]
+; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -275,15 +275,15 @@ declare <4 x i32> @llvm.x86.avx512.maskz
define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xe2]
-; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xe2]
+; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll?rev=333857&r1=333856&r2=333857&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll Sun Jun 3 16:24:17 2018
@@ -2,21 +2,20 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xe2]
-; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xe2]
+; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -32,29 +31,32 @@ define <8 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32>, <8 x i32>* %x2p
- %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
- %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res2, %res3
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %5 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
+ %res3 = add <8 x i32> %3, %4
+ %res4 = add <8 x i32> %7, %res3
ret <8 x i32> %res4
}
-declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
-declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xe2]
-; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xe2]
+; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -70,29 +72,34 @@ define <4 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32>, <4 x i32>* %x2p
- %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
- %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
- %res3 = add <4 x i32> %res, %res1
- %res4 = add <4 x i32> %res2, %res3
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %5 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %7 = select <4 x i1> %extract1, <4 x i32> %5, <4 x i32> zeroinitializer
+ %res3 = add <4 x i32> %3, %4
+ %res4 = add <4 x i32> %7, %res3
ret <4 x i32> %res4
}
-declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xe2]
-; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xe2]
+; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -108,29 +115,32 @@ define <8 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32>, <8 x i32>* %x2p
- %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
- %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res2, %res3
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %5 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
+ %res3 = add <8 x i32> %3, %4
+ %res4 = add <8 x i32> %7, %res3
ret <8 x i32> %res4
}
-declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
-declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xe2]
-; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xe2]
+; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -146,29 +156,34 @@ define <4 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32>, <4 x i32>* %x2p
- %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
- %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
- %res3 = add <4 x i32> %res, %res1
- %res4 = add <4 x i32> %res2, %res3
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %5 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %7 = select <4 x i1> %extract1, <4 x i32> %5, <4 x i32> zeroinitializer
+ %res3 = add <4 x i32> %3, %4
+ %res4 = add <4 x i32> %7, %res3
ret <4 x i32> %res4
}
-declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xe2]
-; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xe2]
+; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -184,29 +199,32 @@ define <8 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32>, <8 x i32>* %x2p
- %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
- %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res2, %res3
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %5 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
+ %res3 = add <8 x i32> %3, %4
+ %res4 = add <8 x i32> %7, %res3
ret <8 x i32> %res4
}
-declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
-declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xe2]
-; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xe2]
+; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -222,30 +240,34 @@ define <4 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32>, <4 x i32>* %x2p
- %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
- %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
- %res3 = add <4 x i32> %res, %res1
- %res4 = add <4 x i32> %res2, %res3
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %5 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %7 = select <4 x i1> %extract1, <4 x i32> %5, <4 x i32> zeroinitializer
+ %res3 = add <4 x i32> %3, %4
+ %res4 = add <4 x i32> %7, %res3
ret <4 x i32> %res4
}
-
-declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18]
; X86-NEXT: vmovaps %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0]
-; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xe2]
-; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2]
-; X86-NEXT: vpaddd %ymm4, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm4 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xe2]
+; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2]
+; X86-NEXT: vpaddd %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfe,0xc0]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -261,29 +283,32 @@ define <8 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32>, <8 x i32>* %x2p
- %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
- %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res2, %res3
+ %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
+ %4 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %5 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
+ %res3 = add <8 x i32> %3, %4
+ %res4 = add <8 x i32> %7, %res3
ret <8 x i32> %res4
}
-declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
-declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
+; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18]
; X86-NEXT: vmovaps %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0]
-; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xe2]
-; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2]
-; X86-NEXT: vpaddd %xmm4, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm4 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xe2]
+; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2]
+; X86-NEXT: vpaddd %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -299,11 +324,17 @@ define <4 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32>, <4 x i32>* %x2p
- %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
- %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
- %res3 = add <4 x i32> %res, %res1
- %res4 = add <4 x i32> %res2, %res3
+ %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
+ %4 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %5 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
+ %6 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %7 = select <4 x i1> %extract1, <4 x i32> %5, <4 x i32> zeroinitializer
+ %res3 = add <4 x i32> %3, %4
+ %res4 = add <4 x i32> %7, %res3
ret <4 x i32> %res4
}
Copied: llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll (from r333856, llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll?p2=llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll&p1=llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll&r1=333856&r2=333857&rev=333857&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll Sun Jun 3 16:24:17 2018
@@ -13,9 +13,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xe2]
-; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xe2]
+; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -50,9 +50,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xe2]
-; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xe2]
+; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -87,9 +87,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xe2]
-; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xe2]
+; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -124,9 +124,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xe2]
-; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xe2]
+; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
Modified: llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll?rev=333857&r1=333856&r2=333857&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll Sun Jun 3 16:24:17 2018
@@ -2,8 +2,7 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>)
define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
@@ -13,9 +12,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xe2]
-; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xe2]
+; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -31,16 +30,19 @@ define <16 x i32>@test_int_x86_avx512_ma
; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <16 x i32>, <16 x i32>* %x2p
- %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
- %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res2, %res3
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %5 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %6 = bitcast i16 %x3 to <16 x i1>
+ %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
+ %res3 = add <16 x i32> %3, %4
+ %res4 = add <16 x i32> %7, %res3
ret <16 x i32> %res4
}
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>)
define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
@@ -50,9 +52,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xe2]
-; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xe2]
+; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -68,16 +70,19 @@ define <16 x i32>@test_int_x86_avx512_ma
; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <16 x i32>, <16 x i32>* %x2p
- %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
- %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res2, %res3
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %5 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %6 = bitcast i16 %x3 to <16 x i1>
+ %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
+ %res3 = add <16 x i32> %3, %4
+ %res4 = add <16 x i32> %7, %res3
ret <16 x i32> %res4
}
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>)
define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512:
@@ -87,9 +92,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xe2]
-; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xe2]
+; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -105,16 +110,19 @@ define <16 x i32>@test_int_x86_avx512_ma
; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <16 x i32>, <16 x i32>* %x2p
- %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
- %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res2, %res3
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %5 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %6 = bitcast i16 %x3 to <16 x i1>
+ %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
+ %res3 = add <16 x i32> %3, %4
+ %res4 = add <16 x i32> %7, %res3
ret <16 x i32> %res4
}
-declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>)
define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512:
@@ -124,9 +132,9 @@ define <16 x i32>@test_int_x86_avx512_ma
; X86-NEXT: vmovaps %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18]
; X86-NEXT: vmovaps %zmm0, %zmm4 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0]
-; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xe2]
-; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2]
-; X86-NEXT: vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
+; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xe2]
+; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2]
+; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -142,11 +150,15 @@ define <16 x i32>@test_int_x86_avx512_ma
; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <16 x i32>, <16 x i32>* %x2p
- %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
- %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res2, %res3
+ %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %5 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
+ %6 = bitcast i16 %x3 to <16 x i1>
+ %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
+ %res3 = add <16 x i32> %3, %4
+ %res4 = add <16 x i32> %7, %res3
ret <16 x i32> %res4
}
More information about the llvm-commits
mailing list