[llvm] r352343 - [X86] Add new variadic avx512 compress/expand intrinsics that use vXi1 types for the mask argument.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 27 23:03:03 PST 2019
Author: ctopper
Date: Sun Jan 27 23:03:03 2019
New Revision: 352343
URL: http://llvm.org/viewvc/llvm-project?rev=352343&view=rev
Log:
[X86] Add new variadic avx512 compress/expand intrinsics that use vXi1 types for the mask argument.
Remove and autoupgrade the old intrinsics
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Jan 27 23:03:03 2019
@@ -4029,156 +4029,16 @@ let TargetPrefix = "x86" in {
// Compress, Expand
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_compress_ps_512 :
- GCCBuiltin<"__builtin_ia32_compresssf512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_512 :
- GCCBuiltin<"__builtin_ia32_compressdf512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_ps_256 :
- GCCBuiltin<"__builtin_ia32_compresssf256_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_256 :
- GCCBuiltin<"__builtin_ia32_compressdf256_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_ps_128 :
- GCCBuiltin<"__builtin_ia32_compresssf128_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_128 :
- GCCBuiltin<"__builtin_ia32_compressdf128_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_compress_d_512 :
- GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_512 :
- GCCBuiltin<"__builtin_ia32_compressdi512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_d_256 :
- GCCBuiltin<"__builtin_ia32_compresssi256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_256 :
- GCCBuiltin<"__builtin_ia32_compressdi256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_d_128 :
- GCCBuiltin<"__builtin_ia32_compresssi128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_128 :
- GCCBuiltin<"__builtin_ia32_compressdi128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_compress_b_512 :
- GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_512 :
- GCCBuiltin<"__builtin_ia32_compresshi512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_b_256 :
- GCCBuiltin<"__builtin_ia32_compressqi256_mask">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_256 :
- GCCBuiltin<"__builtin_ia32_compresshi256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_b_128 :
- GCCBuiltin<"__builtin_ia32_compressqi128_mask">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_128 :
- GCCBuiltin<"__builtin_ia32_compresshi128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
-// expand
- def int_x86_avx512_mask_expand_ps_512 :
- GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_512 :
- GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_ps_256 :
- GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_256 :
- GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_ps_128 :
- GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_128 :
- GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_expand_d_512 :
- GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_512 :
- GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_d_256 :
- GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_256 :
- GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_d_128 :
- GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_128 :
- GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_expand_b_512 :
- GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_512 :
- GCCBuiltin<"__builtin_ia32_expandhi512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_b_256 :
- GCCBuiltin<"__builtin_ia32_expandqi256_mask">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_256 :
- GCCBuiltin<"__builtin_ia32_expandhi256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_b_128 :
- GCCBuiltin<"__builtin_ia32_expandqi128_mask">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_128 :
- GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_compress :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_expand :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
}
// truncate
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sun Jan 27 23:03:03 2019
@@ -342,6 +342,16 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
+ Name.startswith("avx512.mask.expand.b") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.w") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.d") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.q") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.p") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.b") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.w") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.d") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.q") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.p") || // Added in 9.0
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
@@ -2055,6 +2065,19 @@ void llvm::UpgradeIntrinsicCall(CallInst
Intrinsic::masked_compressstore,
ResultTy);
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
+ } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
+ Name.startswith("avx512.mask.expand."))) {
+ Type *ResultTy = CI->getType();
+
+ Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
+ ResultTy->getVectorNumElements());
+
+ bool IsCompress = Name[12] == 'c';
+ Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
+ : Intrinsic::x86_avx512_mask_expand;
+ Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
+ Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
+ MaskVec });
} else if (IsX86 && Name.startswith("xop.vpcom")) {
bool IsSigned;
if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jan 27 23:03:03 2019
@@ -22037,12 +22037,9 @@ SDValue X86TargetLowering::LowerINTRINSI
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
- if (isAllOnesConstant(Mask)) // return data as is
+ if (ISD::isBuildVectorAllOnes(Mask.getNode())) // return data as is
return Op.getOperand(1);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
- Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
-
// Avoid false dependency.
if (PassThru.isUndef())
PassThru = DAG.getConstant(0, dl, VT);
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Jan 27 23:03:03 2019
@@ -465,41 +465,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_512, COMPRESS_EXPAND_IN_REG,
+ X86_INTRINSIC_DATA(avx512_mask_compress, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
@@ -607,41 +573,7 @@ static const IntrinsicData IntrinsicsWi
X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FDIVS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_512, COMPRESS_EXPAND_IN_REG,
+ X86_INTRINSIC_DATA(avx512_mask_expand, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Sun Jan 27 23:03:03 2019
@@ -9782,3 +9782,363 @@ define <16 x float> @test_int_x86_avx512
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
+
+define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_compress_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_compress_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_ps_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_ps_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_compress_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_compress_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_compress_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_d_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_d_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_compress_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
+
+define <8 x double> @test_expand_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_expand_pd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define <16 x float> @test_expand_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_expand_ps_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_ps_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_ps_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_expand_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_512:
+; X86: ## %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_expand_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_d_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_d_512:
+; X86: ## %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
+; X86-NEXT: retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_512:
+; X64: ## %bb.0:
+; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
+; X64-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Jan 27 23:03:03 2019
@@ -9,8 +9,9 @@ define <8 x double> @test_mask_compress_
; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+ ret <8 x double> %2
}
define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
@@ -19,20 +20,19 @@ define <8 x double> @test_maskz_compress
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
- ret <8 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+ ret <8 x double> %2
}
define <8 x double> @test_compress_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
- ret <8 x double> %res
+ %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x double> %1
}
-declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
-
define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_ps_512:
; CHECK: ## %bb.0:
@@ -40,8 +40,9 @@ define <16 x float> @test_mask_compress_
; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+ ret <16 x float> %2
}
define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
@@ -50,20 +51,19 @@ define <16 x float> @test_maskz_compress
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
- ret <16 x float> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+ ret <16 x float> %2
}
define <16 x float> @test_compress_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x float> %1
}
-declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
-
define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_q_512:
; CHECK: ## %bb.0:
@@ -71,8 +71,9 @@ define <8 x i64> @test_mask_compress_q_5
; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+ ret <8 x i64> %2
}
define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
@@ -81,20 +82,19 @@ define <8 x i64> @test_maskz_compress_q_
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
- ret <8 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+ ret <8 x i64> %2
}
define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
- ret <8 x i64> %res
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i64> %1
}
-declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
-
define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_d_512:
; CHECK: ## %bb.0:
@@ -102,8 +102,9 @@ define <16 x i32> @test_mask_compress_d_
; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+ ret <16 x i32> %2
}
define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
@@ -112,26 +113,25 @@ define <16 x i32> @test_maskz_compress_d
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
- ret <16 x i32> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+ ret <16 x i32> %2
}
define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i32> %1
}
-declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
-
define <8 x double> @test_expand_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
- ret <8 x double> %res
+ %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x double> %1
}
define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
@@ -141,8 +141,9 @@ define <8 x double> @test_mask_expand_pd
; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+ ret <8 x double> %2
}
define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
@@ -151,18 +152,17 @@ define <8 x double> @test_maskz_expand_p
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
- ret <8 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+ ret <8 x double> %2
}
-declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
-
define <16 x float> @test_expand_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
- ret <16 x float> %res
+ %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x float> %1
}
define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
@@ -172,8 +172,9 @@ define <16 x float> @test_mask_expand_ps
; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+ ret <16 x float> %2
}
define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
@@ -182,18 +183,17 @@ define <16 x float> @test_maskz_expand_p
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
- ret <16 x float> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+ ret <16 x float> %2
}
-declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
-
define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
- ret <8 x i64> %res
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i64> %1
}
define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
@@ -203,8 +203,9 @@ define <8 x i64> @test_mask_expand_q_512
; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+ ret <8 x i64> %2
}
define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
@@ -213,18 +214,17 @@ define <8 x i64> @test_maskz_expand_q_51
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
- ret <8 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+ ret <8 x i64> %2
}
-declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
-
define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i32> %1
}
define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
@@ -234,8 +234,9 @@ define <16 x i32> @test_mask_expand_d_51
; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+ ret <16 x i32> %2
}
define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
@@ -244,12 +245,11 @@ define <16 x i32> @test_maskz_expand_d_5
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
- ret <16 x i32> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+ ret <16 x i32> %2
}
-declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
-
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0:
@@ -5546,3 +5546,12 @@ entry:
%3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e
ret <16 x float> %3
}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
+declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll Sun Jan 27 23:03:03 2019
@@ -55,6 +55,50 @@ define <32 x i16> @test_expand_load_w_51
ret <32 x i16> %res
}
+define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_expand_w_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_expand_w_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
+; X86-LABEL: test_maskz_expand_w_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_expand_load_b_512:
; X86: # %bb.0:
@@ -108,6 +152,50 @@ define <64 x i8> @test_expand_load_b_512
ret <64 x i8> %res
}
+define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_expand_b_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; X86-LABEL: test_mask_expand_b_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_maskz_expand_b_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_w_512:
; X86: # %bb.0:
@@ -148,6 +236,50 @@ define void @test_compress_store_w_512(i
ret void
}
+define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_compress_w_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
+; X86-LABEL: test_maskz_compress_w_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_compress_w_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_compress_store_b_512:
; X86: # %bb.0:
@@ -188,6 +320,50 @@ define void @test_compress_store_b_512(i
ret void
}
+define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; X86-LABEL: test_mask_compress_b_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_maskz_compress_b_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_compress_b_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
; X86: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll Sun Jan 27 23:03:03 2019
@@ -62,8 +62,8 @@ define <32 x i16> @test_expand_w_512(<32
; CHECK-LABEL: test_expand_w_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <32 x i16> %1
}
define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
@@ -80,8 +80,9 @@ define <32 x i16> @test_mask_expand_w_51
; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
+ ret <32 x i16> %2
}
define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
@@ -96,12 +97,11 @@ define <32 x i16> @test_maskz_expand_w_5
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
+ ret <32 x i16> %2
}
-declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
-
define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_expand_load_b_512:
; X86: # %bb.0:
@@ -159,16 +159,14 @@ define <64 x i8> @test_expand_b_512(<64
; CHECK-LABEL: test_expand_b_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
- ret <64 x i8> %res
+ %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <64 x i8> %1
}
define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
; X86-LABEL: test_mask_expand_b_512:
; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
@@ -179,16 +177,15 @@ define <64 x i8> @test_mask_expand_b_512
; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %res
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
+ ret <64 x i8> %2
}
define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
; X86-LABEL: test_maskz_expand_b_512:
; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -197,12 +194,11 @@ define <64 x i8> @test_maskz_expand_b_51
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
+ ret <64 x i8> %2
}
-declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
-
define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_w_512:
; X86: # %bb.0:
@@ -224,6 +220,26 @@ define void @test_mask_compress_store_w_
ret void
}
+define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
+; X86-LABEL: test_compress_store_w_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
+; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_compress_store_w_512:
+; X64: # %bb.0:
+; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
+; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+ %1 = bitcast i8* %addr to i16*
+ call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret void
+}
+
define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
; X86-LABEL: test_mask_compress_w_512:
; X86: # %bb.0:
@@ -238,8 +254,9 @@ define <32 x i16> @test_mask_compress_w_
; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
+ ret <32 x i16> %2
}
define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
@@ -254,66 +271,62 @@ define <32 x i16> @test_maskz_compress_w
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
- ret <32 x i16> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
+ ret <32 x i16> %2
}
define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
; CHECK-LABEL: test_compress_w_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
- ret <32 x i16> %res
+ %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <32 x i16> %1
}
-declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
-
-define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
-; X86-LABEL: test_compress_store_w_512:
+define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_mask_compress_store_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
-; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
-; X64-LABEL: test_compress_store_w_512:
+; X64-LABEL: test_mask_compress_store_b_512:
; X64: # %bb.0:
-; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
-; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
+; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
+; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = bitcast i8* %addr to i16*
- call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ %1 = bitcast i64 %mask to <64 x i1>
+ call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
ret void
}
-define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
-; X86-LABEL: test_mask_compress_store_b_512:
+define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
+; X86-LABEL: test_compress_store_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
-; X64-LABEL: test_mask_compress_store_b_512:
+; X64-LABEL: test_compress_store_b_512:
; X64: # %bb.0:
-; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
+; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = bitcast i64 %mask to <64 x i1>
- call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
+ call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
ret void
}
define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
; X86-LABEL: test_mask_compress_b_512:
; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
@@ -324,16 +337,15 @@ define <64 x i8> @test_mask_compress_b_5
; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %res
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
+ ret <64 x i8> %2
}
define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
; X86-LABEL: test_maskz_compress_b_512:
; X86: # %bb.0:
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -342,37 +354,17 @@ define <64 x i8> @test_maskz_compress_b_
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
- ret <64 x i8> %res
+ %1 = bitcast i64 %mask to <64 x i1>
+ %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
+ ret <64 x i8> %2
}
define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
; CHECK-LABEL: test_compress_b_512:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
- ret <64 x i8> %res
-}
-
-declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
-
-define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
-; X86-LABEL: test_compress_store_b_512:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
-; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
-; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_compress_store_b_512:
-; X64: # %bb.0:
-; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
-; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
-; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-NEXT: retq # encoding: [0xc3]
- call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret void
+ %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <64 x i8> %1
}
define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
@@ -761,3 +753,7 @@ declare <32 x i16> @llvm.masked.expandlo
declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
+declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
+declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
+declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
+declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll Sun Jan 27 23:03:03 2019
@@ -57,6 +57,52 @@ define <8 x i16> @test_expand_load_w_128
ret <8 x i16> %res
}
+define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_expand_w_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_w_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_w_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_b_128:
; X86: # %bb.0:
@@ -110,6 +156,50 @@ define <16 x i8> @test_expand_load_b_128
ret <16 x i8> %res
}
+define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_expand_b_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_b_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_b_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_w_128:
; X86: # %bb.0:
@@ -147,6 +237,52 @@ define void @test_compress_store_w_128(i
ret void
}
+define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_w_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_w_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_compress_w_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_b_128:
; X86: # %bb.0:
@@ -183,6 +319,50 @@ define void @test_compress_store_b_128(i
ret void
}
+define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_b_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_b_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_compress_b_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_w_256:
; X86: # %bb.0:
@@ -236,6 +416,50 @@ define <16 x i16> @test_expand_load_w_25
ret <16 x i16> %res
}
+define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
+; CHECK-LABEL: test_expand_w_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_w_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_w_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_expand_load_b_256:
; X86: # %bb.0:
@@ -289,6 +513,50 @@ define <32 x i8> @test_expand_load_b_256
ret <32 x i8> %res
}
+define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_expand_b_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_expand_b_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
+; X86-LABEL: test_maskz_expand_b_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_w_256:
; X86: # %bb.0:
@@ -329,6 +597,50 @@ define void @test_compress_store_w_256(i
ret void
}
+define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_w_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_w_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
+; CHECK-LABEL: test_compress_w_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_b_256:
; X86: # %bb.0:
@@ -369,6 +681,50 @@ define void @test_compress_store_b_256(i
ret void
}
+define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_compress_b_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
+; X86-LABEL: test_maskz_compress_b_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_compress_b_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
; X86: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll Sun Jan 27 23:03:03 2019
@@ -64,8 +64,8 @@ define <8 x i16> @test_expand_w_128(<8 x
; CHECK-LABEL: test_expand_w_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
- ret <8 x i16> %res
+ %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i16> %1
}
define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
@@ -83,8 +83,9 @@ define <8 x i16> @test_mask_expand_w_128
; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
+ ret <8 x i16> %2
}
define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
@@ -100,12 +101,11 @@ define <8 x i16> @test_maskz_expand_w_12
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
- ret <8 x i16> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
+ ret <8 x i16> %2
}
-declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
-
define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_b_128:
; X86: # %bb.0:
@@ -163,8 +163,8 @@ define <16 x i8> @test_expand_b_128(<16
; CHECK-LABEL: test_expand_b_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
- ret <16 x i8> %res
+ %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i8> %1
}
define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
@@ -181,8 +181,9 @@ define <16 x i8> @test_mask_expand_b_128
; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
+ ret <16 x i8> %2
}
define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
@@ -197,12 +198,11 @@ define <16 x i8> @test_maskz_expand_b_12
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
- ret <16 x i8> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
+ ret <16 x i8> %2
}
-declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
-
define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; X86-LABEL: test_mask_compress_store_w_128:
; X86: # %bb.0:
@@ -256,8 +256,9 @@ define <8 x i16> @test_mask_compress_w_1
; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
+ ret <8 x i16> %2
}
define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
@@ -273,20 +274,19 @@ define <8 x i16> @test_maskz_compress_w_
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
- ret <8 x i16> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
+ ret <8 x i16> %2
}
define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
; CHECK-LABEL: test_compress_w_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
- ret <8 x i16> %res
+ %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i16> %1
}
-declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
-
define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_b_128:
; X86: # %bb.0:
@@ -336,8 +336,9 @@ define <16 x i8> @test_mask_compress_b_1
; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
+ ret <16 x i8> %2
}
define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
@@ -352,20 +353,19 @@ define <16 x i8> @test_maskz_compress_b_
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
- ret <16 x i8> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
+ ret <16 x i8> %2
}
define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
; CHECK-LABEL: test_compress_b_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
- ret <16 x i8> %res
+ %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i8> %1
}
-declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
-
define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_expand_load_w_256:
; X86: # %bb.0:
@@ -426,8 +426,8 @@ define <16 x i16> @test_expand_w_256(<16
; CHECK-LABEL: test_expand_w_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
- ret <16 x i16> %res
+ %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i16> %1
}
define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
@@ -444,8 +444,9 @@ define <16 x i16> @test_mask_expand_w_25
; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
+ ret <16 x i16> %2
}
define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
@@ -460,12 +461,11 @@ define <16 x i16> @test_maskz_expand_w_2
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
- ret <16 x i16> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
+ ret <16 x i16> %2
}
-declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
-
define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_expand_load_b_256:
; X86: # %bb.0:
@@ -523,8 +523,8 @@ define <32 x i8> @test_expand_b_256(<32
; CHECK-LABEL: test_expand_b_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
- ret <32 x i8> %res
+ %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <32 x i8> %1
}
define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
@@ -541,8 +541,9 @@ define <32 x i8> @test_mask_expand_b_256
; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
+ ret <32 x i8> %2
}
define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
@@ -557,12 +558,11 @@ define <32 x i8> @test_maskz_expand_b_25
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
- ret <32 x i8> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
+ ret <32 x i8> %2
}
-declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
-
define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; X86-LABEL: test_mask_compress_store_w_256:
; X86: # %bb.0:
@@ -618,8 +618,9 @@ define <16 x i16> @test_mask_compress_w_
; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
+ ret <16 x i16> %2
}
define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
@@ -634,20 +635,19 @@ define <16 x i16> @test_maskz_compress_w
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
- ret <16 x i16> %res
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
+ ret <16 x i16> %2
}
define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
; CHECK-LABEL: test_compress_w_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
- ret <16 x i16> %res
+ %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i16> %1
}
-declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
-
define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; X86-LABEL: test_mask_compress_store_b_256:
; X86: # %bb.0:
@@ -701,8 +701,9 @@ define <32 x i8> @test_mask_compress_b_2
; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
+ ret <32 x i8> %2
}
define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
@@ -717,20 +718,19 @@ define <32 x i8> @test_maskz_compress_b_
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
- ret <32 x i8> %res
+ %1 = bitcast i32 %mask to <32 x i1>
+ %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
+ ret <32 x i8> %2
}
define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
; CHECK-LABEL: test_compress_b_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
- ret <32 x i8> %res
+ %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <32 x i8> %1
}
-declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
-
define <4 x i32> @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
; X86: # %bb.0:
@@ -1551,3 +1551,11 @@ declare <16 x i16> @llvm.masked.expandlo
declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
+declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
+declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
+declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
+declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
+declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
+declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
+declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
+declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Sun Jan 27 23:03:03 2019
@@ -14578,10 +14578,746 @@ define <4 x i32>@test_int_x86_avx512_mas
; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
- %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
- %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
- %res3 = add <4 x i32> %res0, %res1
- %res4 = add <4 x i32> %res3, %res2
- ret <4 x i32> %res4
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
}
+
+define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_compress_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_compress_pd_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_ps_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_ps_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_compress_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_compress_ps_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_compress_q_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_d_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_d_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_compress_d_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <2 x double> @test_expand_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_expand_pd_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define <4 x float> @test_expand_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_expand_ps_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_ps_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_ps_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_expand_q_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_expand_d_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_d_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
+; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
+; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_d_128:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_compress_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_compress_pd_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+ ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_ps_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_ps_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_compress_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_compress_ps_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
+
+define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_compress_q_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
+
+define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_d_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_d_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_compress_d_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
+
+define <4 x double> @test_expand_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_expand_pd_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+define <8 x float> @test_expand_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_expand_ps_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_ps_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_ps_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
+
+define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_expand_q_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
+
+define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_expand_d_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_d_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
+; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
+; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_d_256:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=352343&r1=352342&r2=352343&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Sun Jan 27 23:03:03 2019
@@ -17,8 +17,10 @@ define <2 x double> @test_mask_compress_
; X64-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
- ret <2 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
+ ret <2 x double> %2
}
define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
@@ -34,20 +36,20 @@ define <2 x double> @test_maskz_compress
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
- ret <2 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
+ ret <2 x double> %2
}
define <2 x double> @test_compress_pd_128(<2 x double> %data) {
; CHECK-LABEL: test_compress_pd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
+ ret <2 x double> %1
}
-declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
-
define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_ps_128:
; X86: # %bb.0:
@@ -63,8 +65,10 @@ define <4 x float> @test_mask_compress_p
; X64-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
- ret <4 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
+ ret <4 x float> %2
}
define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
@@ -80,20 +84,20 @@ define <4 x float> @test_maskz_compress_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
- ret <4 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
+ ret <4 x float> %2
}
define <4 x float> @test_compress_ps_128(<4 x float> %data) {
; CHECK-LABEL: test_compress_ps_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x float> %1
}
-declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
-
define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_q_128:
; X86: # %bb.0:
@@ -109,8 +113,10 @@ define <2 x i64> @test_mask_compress_q_1
; X64-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
- ret <2 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
+ ret <2 x i64> %2
}
define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
@@ -126,20 +132,20 @@ define <2 x i64> @test_maskz_compress_q_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
- ret <2 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
+ ret <2 x i64> %2
}
define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
; CHECK-LABEL: test_compress_q_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
- ret <2 x i64> %res
+ %1 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
+ ret <2 x i64> %1
}
-declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
-
define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_d_128:
; X86: # %bb.0:
@@ -155,8 +161,10 @@ define <4 x i32> @test_mask_compress_d_1
; X64-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
- ret <4 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
+ ret <4 x i32> %2
}
define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
@@ -172,26 +180,26 @@ define <4 x i32> @test_maskz_compress_d_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
- ret <4 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
+ ret <4 x i32> %2
}
define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
; CHECK-LABEL: test_compress_d_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
- ret <4 x i32> %res
+ %1 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x i32> %1
}
-declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
-
define <2 x double> @test_expand_pd_128(<2 x double> %data) {
; CHECK-LABEL: test_expand_pd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
- ret <2 x double> %res
+ %1 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
+ ret <2 x double> %1
}
define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
@@ -209,8 +217,10 @@ define <2 x double> @test_mask_expand_pd
; X64-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
- ret <2 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
+ ret <2 x double> %2
}
define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
@@ -226,18 +236,18 @@ define <2 x double> @test_maskz_expand_p
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
- ret <2 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
+ ret <2 x double> %2
}
-declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
-
define <4 x float> @test_expand_ps_128(<4 x float> %data) {
; CHECK-LABEL: test_expand_ps_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
- ret <4 x float> %res
+ %1 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x float> %1
}
define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
@@ -255,8 +265,10 @@ define <4 x float> @test_mask_expand_ps_
; X64-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
- ret <4 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
+ ret <4 x float> %2
}
define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
@@ -272,18 +284,18 @@ define <4 x float> @test_maskz_expand_ps
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
- ret <4 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
+ ret <4 x float> %2
}
-declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
-
define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
; CHECK-LABEL: test_expand_q_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
- ret <2 x i64> %res
+ %1 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
+ ret <2 x i64> %1
}
define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
@@ -301,8 +313,10 @@ define <2 x i64> @test_mask_expand_q_128
; X64-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
- ret <2 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
+ ret <2 x i64> %2
}
define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
@@ -318,18 +332,18 @@ define <2 x i64> @test_maskz_expand_q_12
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
- ret <2 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+ %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
+ ret <2 x i64> %2
}
-declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
-
define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
; CHECK-LABEL: test_expand_d_128:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
- ret <4 x i32> %res
+ %1 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x i32> %1
}
define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
@@ -347,8 +361,10 @@ define <4 x i32> @test_mask_expand_d_128
; X64-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
- ret <4 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
+ ret <4 x i32> %2
}
define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
@@ -364,12 +380,12 @@ define <4 x i32> @test_maskz_expand_d_12
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
- ret <4 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
+ ret <4 x i32> %2
}
-declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
-
define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_pd_256:
; X86: # %bb.0:
@@ -385,8 +401,10 @@ define <4 x double> @test_mask_compress_
; X64-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
+ ret <4 x double> %2
}
define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
@@ -402,20 +420,20 @@ define <4 x double> @test_maskz_compress
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
- ret <4 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
+ ret <4 x double> %2
}
define <4 x double> @test_compress_pd_256(<4 x double> %data) {
; CHECK-LABEL: test_compress_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x double> %1
}
-declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-
define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_ps_256:
; X86: # %bb.0:
@@ -431,8 +449,9 @@ define <8 x float> @test_mask_compress_p
; X64-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
+ ret <8 x float> %2
}
define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
@@ -448,20 +467,19 @@ define <8 x float> @test_maskz_compress_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
- ret <8 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
+ ret <8 x float> %2
}
define <8 x float> @test_compress_ps_256(<8 x float> %data) {
; CHECK-LABEL: test_compress_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x float> %1
}
-declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
-
define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_q_256:
; X86: # %bb.0:
@@ -477,8 +495,10 @@ define <4 x i64> @test_mask_compress_q_2
; X64-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
+ ret <4 x i64> %2
}
define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
@@ -494,20 +514,20 @@ define <4 x i64> @test_maskz_compress_q_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
- ret <4 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
+ ret <4 x i64> %2
}
define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
; CHECK-LABEL: test_compress_q_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
- ret <4 x i64> %res
+ %1 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x i64> %1
}
-declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
-
define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
; X86-LABEL: test_mask_compress_d_256:
; X86: # %bb.0:
@@ -523,8 +543,9 @@ define <8 x i32> @test_mask_compress_d_2
; X64-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
+ ret <8 x i32> %2
}
define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
@@ -540,26 +561,25 @@ define <8 x i32> @test_maskz_compress_d_
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
- ret <8 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
+ ret <8 x i32> %2
}
define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
; CHECK-LABEL: test_compress_d_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
- ret <8 x i32> %res
+ %1 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i32> %1
}
-declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
-
define <4 x double> @test_expand_pd_256(<4 x double> %data) {
; CHECK-LABEL: test_expand_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
- ret <4 x double> %res
+ %1 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x double> %1
}
define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
@@ -577,8 +597,10 @@ define <4 x double> @test_mask_expand_pd
; X64-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
+ ret <4 x double> %2
}
define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
@@ -594,18 +616,18 @@ define <4 x double> @test_maskz_expand_p
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
- ret <4 x double> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
+ ret <4 x double> %2
}
-declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-
define <8 x float> @test_expand_ps_256(<8 x float> %data) {
; CHECK-LABEL: test_expand_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
- ret <8 x float> %res
+ %1 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x float> %1
}
define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
@@ -623,8 +645,9 @@ define <8 x float> @test_mask_expand_ps_
; X64-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
+ ret <8 x float> %2
}
define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
@@ -640,18 +663,17 @@ define <8 x float> @test_maskz_expand_ps
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
- ret <8 x float> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
+ ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
-
define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
; CHECK-LABEL: test_expand_q_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
- ret <4 x i64> %res
+ %1 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret <4 x i64> %1
}
define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
@@ -669,8 +691,10 @@ define <4 x i64> @test_mask_expand_q_256
; X64-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
+ ret <4 x i64> %2
}
define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
@@ -686,18 +710,18 @@ define <4 x i64> @test_maskz_expand_q_25
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
- ret <4 x i64> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
+ ret <4 x i64> %2
}
-declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
-
define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
; CHECK-LABEL: test_expand_d_256:
; CHECK: # %bb.0:
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
- ret <8 x i32> %res
+ %1 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i32> %1
}
define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
@@ -715,8 +739,9 @@ define <8 x i32> @test_mask_expand_d_256
; X64-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
+ ret <8 x i32> %2
}
define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
@@ -732,12 +757,11 @@ define <8 x i32> @test_maskz_expand_d_25
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
- ret <8 x i32> %res
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
+ ret <8 x i32> %2
}
-declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
-
define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: test_cmpps_256:
; CHECK: # %bb.0:
@@ -6915,3 +6939,19 @@ declare <8 x float> @llvm.fma.v8f32(<8 x
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double>, <2 x double>, <2 x i1>)
+declare <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float>, <4 x float>, <4 x i1>)
+declare <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
+declare <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
+declare <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double>, <2 x double>, <2 x i1>)
+declare <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float>, <4 x float>, <4 x i1>)
+declare <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
+declare <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
+declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>)
+declare <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float>, <8 x float>, <8 x i1>)
+declare <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
+declare <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
+declare <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double>, <4 x double>, <4 x i1>)
+declare <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float>, <8 x float>, <8 x i1>)
+declare <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
+declare <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
More information about the llvm-commits
mailing list