[llvm] r332824 - [X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun May 20 16:34:04 PDT 2018
Author: ctopper
Date: Sun May 20 16:34:04 2018
New Revision: 332824
URL: http://llvm.org/viewvc/llvm-project?rev=332824&view=rev
Log:
[X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.
Someday maybe we'll use selects for all intrinsics.
Added:
llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun May 20 16:34:04 2018
@@ -3259,42 +3259,42 @@ let TargetPrefix = "x86" in { // All in
}
// Permute
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256_mask">,
+ def int_x86_avx512_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
- llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512_mask">,
+ llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
- llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256_mask">,
+ llvm_v8i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512_mask">,
+ llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128_mask">,
+ llvm_v8i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256_mask">,
+ llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512_mask">,
+ llvm_v16i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128_mask">,
+ llvm_v32i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256_mask">,
+ llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
+ llvm_v32i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
+ llvm_v64i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
- llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
+ llvm_v16i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ llvm_v16i32_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sun May 20 16:34:04 2018
@@ -182,8 +182,8 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.cvtusi2sd" || // Added in 7.0
- Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
- Name == "avx512.mask.permvar.si.256" || // Added in 7.0
+ Name.startswith("avx512.mask.permvar.") || // Added in 7.0
+ Name.startswith("avx512.mask.permvar.") || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
@@ -1207,10 +1207,38 @@ static bool upgradeAVX512MaskToSelect(St
IID = Intrinsic::x86_sse2_cvttps2dq;
} else if (Name == "cvttps2dq.256") {
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
- } else if (Name == "permvar.sf.256") {
- IID = Intrinsic::x86_avx2_permps;
- } else if (Name == "permvar.si.256") {
- IID = Intrinsic::x86_avx2_permd;
+ } else if (Name.startswith("permvar.")) {
+ bool IsFloat = CI.getType()->isFPOrFPVectorTy();
+ if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx2_permps;
+ else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx2_permd;
+ else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_df_256;
+ else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_di_256;
+ else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_sf_512;
+ else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_si_512;
+ else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_df_512;
+ else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_permvar_di_512;
+ else if (VecWidth == 128 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_128;
+ else if (VecWidth == 256 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_256;
+ else if (VecWidth == 512 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_permvar_hi_512;
+ else if (VecWidth == 128 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_128;
+ else if (VecWidth == 256 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_256;
+ else if (VecWidth == 512 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_permvar_qi_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
} else
return false;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun May 20 16:34:04 2018
@@ -20508,15 +20508,12 @@ SDValue X86TargetLowering::LowerINTRINSI
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_2OP_MASK : {
+ case VPERM_2OP : {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
- SDValue PassThru = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
// Swap Src1 and Src2 in the node creation
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1),
- Mask, PassThru, Subtarget, DAG);
+ return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:{
@@ -20914,13 +20911,6 @@ SDValue X86TargetLowering::LowerINTRINSI
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- case Intrinsic::x86_avx2_permd:
- case Intrinsic::x86_avx2_permps:
- // Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/instruction.
- return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(1));
-
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun May 20 16:34:04 2018
@@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
IFMA_OP_MASK, IFMA_OP_MASKZ,
- VPERM_2OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -406,6 +406,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
+ X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -797,30 +799,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_df_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_df_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_di_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_di_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_128, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_hi_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_128, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_256, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
- X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
@@ -1420,10 +1398,20 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP,
- X86ISD::VPMADDUBSW, 0),
- X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP,
- X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_di_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_128, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_hi_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_128, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_256, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_qi_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_sf_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_permvar_si_512, VPERM_2OP, X86ISD::VPERMV, 0),
+ X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun May 20 16:34:04 2018
@@ -2785,30 +2785,22 @@ Instruction *InstCombiner::visitCallInst
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
+ case Intrinsic::x86_avx512_permvar_df_256:
+ case Intrinsic::x86_avx512_permvar_df_512:
+ case Intrinsic::x86_avx512_permvar_di_256:
+ case Intrinsic::x86_avx512_permvar_di_512:
+ case Intrinsic::x86_avx512_permvar_hi_128:
+ case Intrinsic::x86_avx512_permvar_hi_256:
+ case Intrinsic::x86_avx512_permvar_hi_512:
+ case Intrinsic::x86_avx512_permvar_qi_128:
+ case Intrinsic::x86_avx512_permvar_qi_256:
+ case Intrinsic::x86_avx512_permvar_qi_512:
+ case Intrinsic::x86_avx512_permvar_sf_512:
+ case Intrinsic::x86_avx512_permvar_si_512:
if (Value *V = simplifyX86vpermv(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
- case Intrinsic::x86_avx512_mask_permvar_df_256:
- case Intrinsic::x86_avx512_mask_permvar_df_512:
- case Intrinsic::x86_avx512_mask_permvar_di_256:
- case Intrinsic::x86_avx512_mask_permvar_di_512:
- case Intrinsic::x86_avx512_mask_permvar_hi_128:
- case Intrinsic::x86_avx512_mask_permvar_hi_256:
- case Intrinsic::x86_avx512_mask_permvar_hi_512:
- case Intrinsic::x86_avx512_mask_permvar_qi_128:
- case Intrinsic::x86_avx512_mask_permvar_qi_256:
- case Intrinsic::x86_avx512_mask_permvar_qi_512:
- case Intrinsic::x86_avx512_mask_permvar_sf_512:
- case Intrinsic::x86_avx512_mask_permvar_si_512:
- if (Value *V = simplifyX86vpermv(*II, Builder)) {
- // We simplified the permuting, now create a select for the masking.
- V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
- Builder);
- return replaceInstUsesWith(*II, V);
- }
- break;
-
case Intrinsic::x86_avx_maskload_ps:
case Intrinsic::x86_avx_maskload_pd:
case Intrinsic::x86_avx_maskload_ps_256:
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -4151,3 +4151,83 @@ define <8 x double> @test_x86_vbroadcast
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
+
+declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res3, %res2
+ ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun May 20 16:34:04 2018
@@ -3448,83 +3448,99 @@ define <8 x i64>@test_int_x86_avx512_mas
ret <8 x i64> %res4
}
-declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
- %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res3, %res2
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
+ %4 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer
+ %7 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ %res3 = fadd <8 x double> %3, %6
+ %res4 = fadd <8 x double> %res3, %7
ret <8 x double> %res4
}
-declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
- %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
- %res3 = add <8 x i64> %res, %res1
- %res4 = add <8 x i64> %res3, %res2
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
+ %4 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
+ %7 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ %res3 = add <8 x i64> %3, %6
+ %res4 = add <8 x i64> %res3, %7
ret <8 x i64> %res4
}
-declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res3, %res2
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
+ %4 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ %5 = bitcast i16 %x3 to <16 x i1>
+ %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer
+ %7 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ %res3 = fadd <16 x float> %3, %6
+ %res4 = fadd <16 x float> %res3, %7
ret <16 x float> %res4
}
-declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
- %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res3, %res2
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ %4 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ %5 = bitcast i16 %x3 to <16 x i1>
+ %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
+ %7 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ %res3 = add <16 x i32> %3, %6
+ %res4 = add <16 x i32> %res3, %7
ret <16 x i32> %res4
}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -2757,3 +2757,33 @@ define <16 x i32>@test_int_x86_avx512_ma
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
+
+declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Sun May 20 16:34:04 2018
@@ -1573,33 +1573,37 @@ define <32 x i16>@test_int_x86_avx512_ma
ret <32 x i16> %res4
}
-declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
-; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
-; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
-; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2
-; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
- %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
- %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
- %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
- %res3 = add <32 x i16> %res, %res1
- %res4 = add <32 x i16> %res3, %res2
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
+ %2 = bitcast i32 %x3 to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
+ %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
+ %5 = bitcast i32 %x3 to <32 x i1>
+ %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
+ %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
+ %res3 = add <32 x i16> %3, %6
+ %res4 = add <32 x i16> %res3, %7
ret <32 x i16> %res4
}
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -4010,3 +4010,43 @@ define <8 x i32>@test_int_x86_avx512_mas
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
+
+declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
+; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
+; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
+; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
+; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Sun May 20 16:34:04 2018
@@ -2445,43 +2445,51 @@ define <8 x i16>@test_int_x86_avx512_mas
ret <8 x i16> %res4
}
-declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
- %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
- %res3 = add <8 x i16> %res, %res1
- %res4 = add <8 x i16> %res3, %res2
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
+ %4 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
+ %7 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
+ %res3 = add <8 x i16> %3, %6
+ %res4 = add <8 x i16> %res3, %7
ret <8 x i16> %res4
}
-declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
- %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
- %res3 = add <16 x i16> %res, %res1
- %res4 = add <16 x i16> %res3, %res2
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
+ %4 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
+ %5 = bitcast i16 %x3 to <16 x i1>
+ %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
+ %7 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
+ %res3 = add <16 x i16> %3, %6
+ %res4 = add <16 x i16> %res3, %7
ret <16 x i16> %res4
}
Added: llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll?rev=332824&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s
+declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3
+; CHECK-NEXT: kmovq %rdi, %k1
+; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddb %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
+ %res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res3, %res2
+ ret <64 x i8> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi-intrinsics.ll Sun May 20 16:34:04 2018
@@ -1,23 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s
-declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512:
; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1}
-; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddb %zmm3, %zmm0, %zmm0
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
- %res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
- %res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
- %res3 = add <64 x i8> %res, %res1
- %res4 = add <64 x i8> %res3, %res2
- ret <64 x i8> %res4
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
+ %2 = bitcast i64 %x3 to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2
+ %4 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
+ %5 = bitcast i64 %x3 to <64 x i1>
+ %6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer
+ %7 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
+ %res3 = add <64 x i8> %3, %6
+ %res4 = add <64 x i8> %res3, %7
+ ret <64 x i8> %res4
}
declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
Added: llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll?rev=332824&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s
+
+declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
+; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
+; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
+; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
+; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
+; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
+; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmivl-intrinsics.ll Sun May 20 16:34:04 2018
@@ -1,43 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s
-declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
- %res3 = add <16 x i8> %res, %res1
- %res4 = add <16 x i8> %res3, %res2
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
+ %4 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
+ %5 = bitcast i16 %x3 to <16 x i1>
+ %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
+ %7 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
+ %res3 = add <16 x i8> %3, %6
+ %res4 = add <16 x i8> %res3, %7
ret <16 x i8> %res4
}
-declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
-; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xd8]
-; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xc0]
-; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
+; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
+; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
- %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
- %res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
- %res3 = add <32 x i8> %res, %res1
- %res4 = add <32 x i8> %res3, %res2
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
+ %2 = bitcast i32 %x3 to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
+ %4 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
+ %5 = bitcast i32 %x3 to <32 x i1>
+ %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
+ %7 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
+ %res3 = add <32 x i8> %3, %6
+ %res4 = add <32 x i8> %res3, %7
ret <32 x i8> %res4
}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Sun May 20 16:34:04 2018
@@ -6777,3 +6777,43 @@ define <8 x i32>@test_int_x86_avx512_mas
%res4 = add <8 x i32> %res3, %res2
ret <8 x i32> %res4
}
+
+declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
+; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
+; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res3, %res2
+ ret <4 x double> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
+; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
+; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Sun May 20 16:34:04 2018
@@ -3035,43 +3035,55 @@ define <4 x i64>@test_int_x86_avx512_mas
ret <4 x i64> %res4
}
-declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
- %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
- %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
- %res3 = fadd <4 x double> %res, %res1
- %res4 = fadd <4 x double> %res3, %res2
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> %x2
+ %4 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
+ %7 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
+ %res3 = fadd <4 x double> %3, %6
+ %res4 = fadd <4 x double> %res3, %7
ret <4 x double> %res4
}
-declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
- %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
- %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
- %res3 = add <4 x i64> %res, %res1
- %res4 = add <4 x i64> %res3, %res2
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
+ %4 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
+ %7 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
+ %res3 = add <4 x i64> %3, %6
+ %res4 = add <4 x i64> %res3, %7
ret <4 x i64> %res4
}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll Sun May 20 16:34:04 2018
@@ -714,21 +714,21 @@ define <8 x double> @stack_fold_permpdva
;CHECK-LABEL: stack_fold_permpdvar
;CHECK: vpermpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a1, <8 x i64> %a0, <8 x double> undef, i8 -1)
+ %2 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a1, <8 x i64> %a0)
; fadd forces execution domain
- %3 = fadd <8 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
+ %3 = fadd <8 x double> %2, zeroinitializer
ret <8 x double> %3
}
-declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) nounwind readonly
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) nounwind readonly
define <16 x float> @stack_fold_permps(<16 x i32> %a0, <16 x float> %a1) {
;CHECK-LABEL: stack_fold_permps
;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a1, <16 x i32> %a0, <16 x float> undef, i16 -1)
+ %2 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a1, <16 x i32> %a0)
ret <16 x float> %2
}
-declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) nounwind readonly
+declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) nounwind readonly
define <8 x double> @stack_fold_permilpd_zmm(<8 x double> %a0) {
;CHECK-LABEL: stack_fold_permilpd_zmm
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512vl.ll Sun May 20 16:34:04 2018
@@ -721,12 +721,12 @@ define <4 x double> @stack_fold_permpdva
;CHECK-LABEL: stack_fold_permpdvar
;CHECK: vpermpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a1, <4 x i64> %a0, <4 x double> undef, i8 -1)
+ %2 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a1, <4 x i64> %a0)
; fadd forces execution domain
- %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+ %3 = fadd <4 x double> %2, zeroinitializer
ret <4 x double> %3
}
-declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) nounwind readonly
+declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) nounwind readonly
define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_permps
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sun May 20 16:34:04 2018
@@ -599,16 +599,16 @@ define <64 x i8> @stack_fold_permbvar(<6
;CHECK-LABEL: stack_fold_permbvar
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
+ %2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
ret <64 x i8> %2
}
-declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readonly
+declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) nounwind readonly
define <64 x i8> @stack_fold_permbvar_mask(<64 x i8>* %passthru, <64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
;CHECK-LABEL: stack_fold_permbvar_mask
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
+ %2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
%3 = bitcast i64 %mask to <64 x i1>
; load needed to keep the operation from being scheduled above the asm block
%4 = load <64 x i8>, <64 x i8>* %passthru
@@ -620,7 +620,7 @@ define <64 x i8> @stack_fold_permbvar_ma
;CHECK-LABEL: stack_fold_permbvar_maskz
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
+ %2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
%3 = bitcast i64 %mask to <64 x i1>
%4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
ret <64 x i8> %4
@@ -630,12 +630,12 @@ define <16 x i32> @stack_fold_permd(<16
;CHECK-LABEL: stack_fold_permd
;CHECK: vpermd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a1, <16 x i32> %a0, <16 x i32> undef, i16 -1)
+ %2 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a1, <16 x i32> %a0)
; add forces execution domain
%3 = add <16 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <16 x i32> %3
}
-declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readonly
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) nounwind readonly
define <64 x i8> @stack_fold_vpermi2b(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
;CHECK-LABEL: stack_fold_vpermi2b
@@ -711,18 +711,18 @@ define <8 x i64> @stack_fold_permqvar(<8
;CHECK-LABEL: stack_fold_permqvar
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0, <8 x i64> undef, i8 -1)
+ %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0)
; add forces execution domain
%3 = add <8 x i64> %2, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
ret <8 x i64> %3
}
-declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readonly
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) nounwind readonly
define <8 x i64> @stack_fold_permqvar_mask(<8 x i64>* %passthru, <8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
;CHECK-LABEL: stack_fold_permqvar_mask
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0, <8 x i64> undef, i8 -1)
+ %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0)
%3 = bitcast i8 %mask to <8 x i1>
; load needed to keep the operation from being scheduled above the asm block
%4 = load <8 x i64>, <8 x i64>* %passthru
@@ -772,16 +772,16 @@ define <32 x i16> @stack_fold_permwvar(<
;CHECK-LABEL: stack_fold_permwvar
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
+ %2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
ret <32 x i16> %2
}
-declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readonly
+declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) nounwind readonly
define <32 x i16> @stack_fold_permwvar_mask(<32 x i16>* %passthru, <32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
;CHECK-LABEL: stack_fold_permwvar_mask
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
+ %2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
%3 = bitcast i32 %mask to <32 x i1>
; load needed to keep the operation from being scheduled above the asm block
%4 = load <32 x i16>, <32 x i16>* %passthru
@@ -793,7 +793,7 @@ define <32 x i16> @stack_fold_permwvar_m
;CHECK-LABEL: stack_fold_permwvar_maskz
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
+ %2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
%3 = bitcast i32 %mask to <32 x i1>
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
ret <32 x i16> %4
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sun May 20 16:34:04 2018
@@ -568,12 +568,12 @@ define <32 x i8> @stack_fold_permbvar(<3
;CHECK-LABEL: stack_fold_permbvar
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a1, <32 x i8> %a0, <32 x i8> undef, i32 -1)
+ %2 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a1, <32 x i8> %a0)
; add forces execution domain
%3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <32 x i8> %3
}
-declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) nounwind readonly
+declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) nounwind readonly
define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_permd
@@ -672,12 +672,12 @@ define <4 x i64> @stack_fold_permqvar(<4
;CHECK-LABEL: stack_fold_permqvar
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a1, <4 x i64> %a0, <4 x i64> undef, i8 -1)
+ %2 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a1, <4 x i64> %a0)
; add forces execution domain
%3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %3
}
-declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readonly
+declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) nounwind readonly
define <16 x i8> @stack_fold_vpermt2b(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
;CHECK-LABEL: stack_fold_vpermt2b
@@ -755,12 +755,12 @@ define <16 x i16> @stack_fold_permwvar(<
;CHECK-LABEL: stack_fold_permwvar
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a1, <16 x i16> %a0, <16 x i16> undef, i16 -1)
+ %2 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a1, <16 x i16> %a0)
; add forces execution domain
%3 = add <16 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <16 x i16> %3
}
-declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) nounwind readonly
+declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) nounwind readonly
define <4 x i32> @stack_fold_vplzcntd(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_vplzcntd
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sun May 20 16:34:04 2018
@@ -6,10 +6,10 @@ declare <64 x i8> @llvm.x86.avx512.mask.
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
-declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
-declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
+declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
@@ -33,16 +33,16 @@ define <8 x double> @combine_permvar_8f6
; X64-LABEL: combine_permvar_8f64_identity:
; X64: # %bb.0:
; X64-NEXT: retq
- %res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 -1)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 -1)
- ret <8 x double> %res1
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %1, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
+ ret <8 x double> %2
}
define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
; X32-LABEL: combine_permvar_8f64_identity_mask:
; X32: # %bb.0:
+; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
; X32-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
@@ -51,16 +51,20 @@ define <8 x double> @combine_permvar_8f6
;
; X64-LABEL: combine_permvar_8f64_identity_mask:
; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
+; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; X64-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
; X64-NEXT: vmovapd %zmm1, %zmm0
; X64-NEXT: retq
- %res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 %m)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 %m)
- ret <8 x double> %res1
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %m to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x1
+ %4 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %3, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
+ %5 = bitcast i8 %m to <8 x i1>
+ %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %3
+ ret <8 x double> %6
}
define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
@@ -71,16 +75,16 @@ define <8 x i64> @combine_permvar_8i64_i
; X64-LABEL: combine_permvar_8i64_identity:
; X64: # %bb.0:
; X64-NEXT: retq
- %res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x1, i8 -1)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, i8 -1)
- ret <8 x i64> %res1
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
+ ret <8 x i64> %2
}
define <8 x i64> @combine_permvar_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
; X32-LABEL: combine_permvar_8i64_identity_mask:
; X32: # %bb.0:
+; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
; X32-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
; X32-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1}
@@ -89,16 +93,20 @@ define <8 x i64> @combine_permvar_8i64_i
;
; X64-LABEL: combine_permvar_8i64_identity_mask:
; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
+; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; X64-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1}
; X64-NEXT: vmovdqa64 %zmm1, %zmm0
; X64-NEXT: retq
- %res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x1, i8 %m)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, i8 %m)
- ret <8 x i64> %res1
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %m to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
+ %4 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %3, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
+ %5 = bitcast i8 %m to <8 x i1>
+ %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %3
+ ret <8 x i64> %6
}
define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
@@ -619,7 +627,7 @@ define <32 x i16> @combine_permvar_as_vp
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw %xmm0, %zmm0
; X64-NEXT: retq
- %1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1)
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> zeroinitializer)
ret <32 x i16> %1
}
@@ -633,7 +641,7 @@ define <16 x i32> @combine_permvar_as_vp
; X64: # %bb.0:
; X64-NEXT: vbroadcastss %xmm0, %zmm0
; X64-NEXT: retq
- %1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer)
ret <16 x i32> %1
}
@@ -647,7 +655,7 @@ define <8 x i64> @combine_permvar_as_vpb
; X64: # %bb.0:
; X64-NEXT: vbroadcastsd %xmm0, %zmm0
; X64-NEXT: retq
- %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer)
ret <8 x i64> %1
}
@@ -661,7 +669,7 @@ define <8 x i64> @combine_permvar_8i64_a
; X64: # %bb.0:
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
; X64-NEXT: retq
- %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 -1)
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
ret <8 x i64> %1
}
define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
@@ -679,8 +687,10 @@ define <8 x i64> @combine_permvar_8i64_a
; X64-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
; X64-NEXT: vmovdqa64 %zmm1, %zmm0
; X64-NEXT: retq
- %1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 %m)
- ret <8 x i64> %1
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
+ %2 = bitcast i8 %m to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
+ ret <8 x i64> %3
}
define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
@@ -693,7 +703,7 @@ define <8 x double> @combine_permvar_8f6
; X64: # %bb.0:
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
; X64-NEXT: retq
- %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 -1)
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
ret <8 x double> %1
}
define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
@@ -711,8 +721,10 @@ define <8 x double> @combine_permvar_8f6
; X64-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
; X64-NEXT: vmovapd %zmm1, %zmm0
; X64-NEXT: retq
- %1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
- ret <8 x double> %1
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
+ %2 = bitcast i8 %m to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x1
+ ret <8 x double> %3
}
define <16 x float> @combine_vpermilvar_16f32_230146759A8BCFDE(<16 x float> %x0) {
@@ -798,8 +810,8 @@ define <32 x i16> @combine_permvar_as_ps
; X64: # %bb.0:
; X64-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15,17,16,19,18,20,21,22,23,25,24,27,26,28,29,30,31]
; X64-NEXT: retq
- %res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %res0
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>)
+ ret <32 x i16> %1
}
define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) {
@@ -812,8 +824,8 @@ define <32 x i16> @combine_pshufb_as_psh
; X64: # %bb.0:
; X64-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14,16,17,18,19,21,20,23,22,24,25,26,27,29,28,31,30]
; X64-NEXT: retq
- %res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %res0
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>)
+ ret <32 x i16> %1
}
define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) {
@@ -826,9 +838,9 @@ define <32 x i16> @combine_vpermi2var_32
; X64: # %bb.0:
; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29,34,35,32,33,38,39,36,37,42,43,40,41,46,47,44,45,50,51,48,49,54,55,52,53,58,59,56,57,62,63,60,61]
; X64-NEXT: retq
- %res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
- %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %res0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %res1
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>)
+ %2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %1, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>)
+ ret <32 x i16> %2
}
define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
@@ -1093,8 +1105,8 @@ define <8 x double> @combine_vpermi2var_
; X64-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
; X64-NEXT: retq
%res0 = shufflevector <8 x double> %x0, <8 x double> zeroinitializer, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 3, i64 2, i64 1, i64 7, i64 0, i64 6, i64 5, i64 4>, <8 x double> %res0, i8 -1)
- ret <8 x double> %res1
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 3, i64 2, i64 1, i64 7, i64 0, i64 6, i64 5, i64 4>)
+ ret <8 x double> %1
}
define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x float> %x0) {
@@ -1128,6 +1140,6 @@ define <8 x i64> @combine_vpermvar_inser
; X64-NEXT: vpbroadcastq %xmm0, %zmm0
; X64-NEXT: retq
%1 = insertelement <8 x i64> undef, i64 %a0, i32 0
- %2 = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
+ %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer)
ret <8 x i64> %2
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll Sun May 20 16:34:04 2018
@@ -2,7 +2,6 @@
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X64
-declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll Sun May 20 16:34:04 2018
@@ -2,17 +2,14 @@
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X64
-declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
-declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
-declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
declare <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
Modified: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll?rev=332824&r1=332823&r2=332824&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll Sun May 20 16:34:04 2018
@@ -1306,14 +1306,14 @@ define double @test_mask3_vfnmsub_sd_1(<
ret double %3
}
-declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
; CHECK-LABEL: @identity_test_permvar_si_256(
; CHECK-NEXT: ret <8 x i32> [[A0:%.*]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, i8 -1)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ ret <8 x i32> %1
}
define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
@@ -1322,8 +1322,10 @@ define <8 x i32> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+ ret <8 x i32> %3
}
define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
@@ -1331,8 +1333,8 @@ define <8 x i32> @zero_test_permvar_si_2
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> undef, i8 -1)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+ ret <8 x i32> %1
}
define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
@@ -1342,8 +1344,10 @@ define <8 x i32> @zero_test_permvar_si_2
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+ ret <8 x i32> %3
}
define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
@@ -1351,8 +1355,8 @@ define <8 x i32> @shuffle_test_permvar_s
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x i32> %1
}
define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
@@ -1362,8 +1366,10 @@ define <8 x i32> @shuffle_test_permvar_s
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+ ret <8 x i32> %3
}
define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
@@ -1371,8 +1377,8 @@ define <8 x i32> @undef_test_permvar_si_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x i32> %1
}
define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
@@ -1382,18 +1388,20 @@ define <8 x i32> @undef_test_permvar_si_
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
- %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
- ret <8 x i32> %a
+ %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+ ret <8 x i32> %3
}
-declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
; CHECK-LABEL: @identity_test_permvar_sf_256(
; CHECK-NEXT: ret <8 x float> [[A0:%.*]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> undef, i8 -1)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ ret <8 x float> %1
}
define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
@@ -1402,8 +1410,10 @@ define <8 x float> @identity_test_permva
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x float> [[TMP2]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+ ret <8 x float> %3
}
define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
@@ -1411,8 +1421,8 @@ define <8 x float> @zero_test_permvar_sf
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> undef, i8 -1)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+ ret <8 x float> %1
}
define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
@@ -1422,8 +1432,10 @@ define <8 x float> @zero_test_permvar_sf
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+ ret <8 x float> %3
}
define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
@@ -1431,8 +1443,8 @@ define <8 x float> @shuffle_test_permvar
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %1
}
define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
@@ -1442,8 +1454,10 @@ define <8 x float> @shuffle_test_permvar
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+ ret <8 x float> %3
}
define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
@@ -1451,8 +1465,8 @@ define <8 x float> @undef_test_permvar_s
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[TMP1]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %1
}
define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
@@ -1462,18 +1476,20 @@ define <8 x float> @undef_test_permvar_s
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
- %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
- ret <8 x float> %a
+ %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+ ret <8 x float> %3
}
-declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
; CHECK-LABEL: @identity_test_permvar_di_256(
; CHECK-NEXT: ret <4 x i64> [[A0:%.*]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> undef, i8 -1)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+ ret <4 x i64> %1
}
define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
@@ -1483,8 +1499,11 @@ define <4 x i64> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+ ret <4 x i64> %3
}
define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
@@ -1492,8 +1511,8 @@ define <4 x i64> @zero_test_permvar_di_2
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> undef, i8 -1)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
+ ret <4 x i64> %1
}
define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
@@ -1504,8 +1523,11 @@ define <4 x i64> @zero_test_permvar_di_2
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+ ret <4 x i64> %3
}
define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
@@ -1513,8 +1535,8 @@ define <4 x i64> @shuffle_test_permvar_d
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+ ret <4 x i64> %1
}
define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
@@ -1525,8 +1547,11 @@ define <4 x i64> @shuffle_test_permvar_d
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+ ret <4 x i64> %3
}
define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
@@ -1534,8 +1559,8 @@ define <4 x i64> @undef_test_permvar_di_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+ ret <4 x i64> %1
}
define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
@@ -1546,18 +1571,21 @@ define <4 x i64> @undef_test_permvar_di_
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
;
- %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
- ret <4 x i64> %a
+ %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+ ret <4 x i64> %3
}
-declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
; CHECK-LABEL: @identity_test_permvar_df_256(
; CHECK-NEXT: ret <4 x double> [[A0:%.*]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> undef, i8 -1)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+ ret <4 x double> %1
}
define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
@@ -1567,8 +1595,11 @@ define <4 x double> @identity_test_permv
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x double> [[TMP2]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+ ret <4 x double> %3
}
define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
@@ -1576,8 +1607,8 @@ define <4 x double> @zero_test_permvar_d
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x double> [[TMP1]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> undef, i8 -1)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
+ ret <4 x double> %1
}
define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
@@ -1588,8 +1619,11 @@ define <4 x double> @zero_test_permvar_d
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x double> [[TMP3]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+ ret <4 x double> %3
}
define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
@@ -1597,8 +1631,8 @@ define <4 x double> @shuffle_test_permva
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x double> [[TMP1]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+ ret <4 x double> %1
}
define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
@@ -1609,8 +1643,11 @@ define <4 x double> @shuffle_test_permva
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x double> [[TMP3]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+ ret <4 x double> %3
}
define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
@@ -1618,8 +1655,8 @@ define <4 x double> @undef_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x double> [[TMP1]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+ ret <4 x double> %1
}
define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
@@ -1630,18 +1667,21 @@ define <4 x double> @undef_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <4 x double> [[TMP3]]
;
- %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
- ret <4 x double> %a
+ %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+ ret <4 x double> %3
}
-declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
; CHECK-LABEL: @identity_test_permvar_si_512(
; CHECK-NEXT: ret <16 x i32> [[A0:%.*]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> undef, i16 -1)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+ ret <16 x i32> %1
}
define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
@@ -1650,8 +1690,10 @@ define <16 x i32> @identity_test_permvar
; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i32> [[TMP2]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+ ret <16 x i32> %3
}
define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
@@ -1659,8 +1701,8 @@ define <16 x i32> @zero_test_permvar_si_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
+ ret <16 x i32> %1
}
define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
@@ -1670,8 +1712,10 @@ define <16 x i32> @zero_test_permvar_si_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i32> [[TMP3]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+ ret <16 x i32> %3
}
define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
@@ -1679,8 +1723,8 @@ define <16 x i32> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <16 x i32> %1
}
define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
@@ -1690,8 +1734,10 @@ define <16 x i32> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i32> [[TMP3]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+ ret <16 x i32> %3
}
define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
@@ -1699,8 +1745,8 @@ define <16 x i32> @undef_test_permvar_si
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <16 x i32> %1
}
define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
@@ -1710,18 +1756,20 @@ define <16 x i32> @undef_test_permvar_si
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i32> [[TMP3]]
;
- %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
- ret <16 x i32> %a
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+ ret <16 x i32> %3
}
-declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
; CHECK-LABEL: @identity_test_permvar_sf_512(
; CHECK-NEXT: ret <16 x float> [[A0:%.*]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> undef, i16 -1)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+ ret <16 x float> %1
}
define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
@@ -1730,8 +1778,10 @@ define <16 x float> @identity_test_permv
; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x float> [[TMP2]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
}
define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
@@ -1739,8 +1789,8 @@ define <16 x float> @zero_test_permvar_s
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: ret <16 x float> [[TMP1]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> undef, i16 -1)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
+ ret <16 x float> %1
}
define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
@@ -1750,8 +1800,10 @@ define <16 x float> @zero_test_permvar_s
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x float> [[TMP3]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
}
define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
@@ -1759,8 +1811,8 @@ define <16 x float> @shuffle_test_permva
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x float> [[TMP1]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <16 x float> %1
}
define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
@@ -1770,8 +1822,10 @@ define <16 x float> @shuffle_test_permva
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x float> [[TMP3]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
}
define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
@@ -1779,8 +1833,8 @@ define <16 x float> @undef_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x float> [[TMP1]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <16 x float> %1
}
define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
@@ -1790,18 +1844,20 @@ define <16 x float> @undef_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x float> [[TMP3]]
;
- %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
- ret <16 x float> %a
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
}
-declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
; CHECK-LABEL: @identity_test_permvar_di_512(
; CHECK-NEXT: ret <8 x i64> [[A0:%.*]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> undef, i8 -1)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+ ret <8 x i64> %1
}
define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
@@ -1810,8 +1866,10 @@ define <8 x i64> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i64> [[TMP2]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+ ret <8 x i64> %3
}
define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
@@ -1819,8 +1877,8 @@ define <8 x i64> @zero_test_permvar_di_5
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
+ ret <8 x i64> %1
}
define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
@@ -1830,8 +1888,10 @@ define <8 x i64> @zero_test_permvar_di_5
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+ ret <8 x i64> %3
}
define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
@@ -1839,8 +1899,8 @@ define <8 x i64> @shuffle_test_permvar_d
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ ret <8 x i64> %1
}
define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
@@ -1850,8 +1910,10 @@ define <8 x i64> @shuffle_test_permvar_d
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+ ret <8 x i64> %3
}
define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
@@ -1859,8 +1921,8 @@ define <8 x i64> @undef_test_permvar_di_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ ret <8 x i64> %1
}
define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
@@ -1870,18 +1932,20 @@ define <8 x i64> @undef_test_permvar_di_
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i64> [[TMP3]]
;
- %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
- ret <8 x i64> %a
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+ ret <8 x i64> %3
}
-declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
; CHECK-LABEL: @identity_test_permvar_df_512(
; CHECK-NEXT: ret <8 x double> [[A0:%.*]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> undef, i8 -1)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+ ret <8 x double> %1
}
define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
@@ -1890,8 +1954,10 @@ define <8 x double> @identity_test_permv
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x double> [[TMP2]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
}
define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
@@ -1899,8 +1965,8 @@ define <8 x double> @zero_test_permvar_d
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x double> [[TMP1]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> undef, i8 -1)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
+ ret <8 x double> %1
}
define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
@@ -1910,8 +1976,10 @@ define <8 x double> @zero_test_permvar_d
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x double> [[TMP3]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
}
define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
@@ -1919,8 +1987,8 @@ define <8 x double> @shuffle_test_permva
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x double> [[TMP1]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ ret <8 x double> %1
}
define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
@@ -1930,8 +1998,10 @@ define <8 x double> @shuffle_test_permva
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x double> [[TMP3]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
}
define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
@@ -1939,8 +2009,8 @@ define <8 x double> @undef_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x double> [[TMP1]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ ret <8 x double> %1
}
define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
@@ -1950,18 +2020,20 @@ define <8 x double> @undef_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x double> [[TMP3]]
;
- %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
- ret <8 x double> %a
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
}
-declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
; CHECK-LABEL: @identity_test_permvar_hi_128(
; CHECK-NEXT: ret <8 x i16> [[A0:%.*]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> undef, i8 -1)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+ ret <8 x i16> %1
}
define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
@@ -1970,8 +2042,10 @@ define <8 x i16> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+ ret <8 x i16> %3
}
define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
@@ -1979,8 +2053,8 @@ define <8 x i16> @zero_test_permvar_hi_1
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> undef, i8 -1)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
}
define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
@@ -1990,8 +2064,10 @@ define <8 x i16> @zero_test_permvar_hi_1
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+ ret <8 x i16> %3
}
define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
@@ -1999,8 +2075,8 @@ define <8 x i16> @shuffle_test_permvar_h
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <8 x i16> %1
}
define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
@@ -2010,8 +2086,10 @@ define <8 x i16> @shuffle_test_permvar_h
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+ ret <8 x i16> %3
}
define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
@@ -2019,8 +2097,8 @@ define <8 x i16> @undef_test_permvar_hi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <8 x i16> %1
}
define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
@@ -2030,18 +2108,20 @@ define <8 x i16> @undef_test_permvar_hi_
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
- %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
- ret <8 x i16> %a
+ %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+ ret <8 x i16> %3
}
-declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
; CHECK-LABEL: @identity_test_permvar_hi_256(
; CHECK-NEXT: ret <16 x i16> [[A0:%.*]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> undef, i16 -1)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+ ret <16 x i16> %1
}
define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
@@ -2050,8 +2130,10 @@ define <16 x i16> @identity_test_permvar
; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i16> [[TMP2]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+ ret <16 x i16> %3
}
define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
@@ -2059,8 +2141,8 @@ define <16 x i16> @zero_test_permvar_hi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> undef, i16 -1)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
+ ret <16 x i16> %1
}
define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
@@ -2070,8 +2152,10 @@ define <16 x i16> @zero_test_permvar_hi_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+ ret <16 x i16> %3
}
define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
@@ -2079,8 +2163,8 @@ define <16 x i16> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <16 x i16> %1
}
define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
@@ -2090,8 +2174,10 @@ define <16 x i16> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+ ret <16 x i16> %3
}
define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
@@ -2099,8 +2185,8 @@ define <16 x i16> @undef_test_permvar_hi
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <16 x i16> %1
}
define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
@@ -2110,18 +2196,20 @@ define <16 x i16> @undef_test_permvar_hi
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i16> [[TMP3]]
;
- %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
- ret <16 x i16> %a
+ %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+ ret <16 x i16> %3
}
-declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
; CHECK-LABEL: @identity_test_permvar_hi_512(
; CHECK-NEXT: ret <32 x i16> [[A0:%.*]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
+ ret <32 x i16> %1
}
define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
@@ -2130,8 +2218,10 @@ define <32 x i16> @identity_test_permvar
; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i16> [[TMP2]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+ ret <32 x i16> %3
}
define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
@@ -2139,8 +2229,8 @@ define <32 x i16> @zero_test_permvar_hi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
+ ret <32 x i16> %1
}
define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
@@ -2150,8 +2240,10 @@ define <32 x i16> @zero_test_permvar_hi_
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+ ret <32 x i16> %3
}
define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
@@ -2159,8 +2251,8 @@ define <32 x i16> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <32 x i16> %1
}
define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
@@ -2170,8 +2262,10 @@ define <32 x i16> @shuffle_test_permvar_
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+ ret <32 x i16> %3
}
define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
@@ -2179,8 +2273,8 @@ define <32 x i16> @undef_test_permvar_hi
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ ret <32 x i16> %1
}
define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
@@ -2190,18 +2284,20 @@ define <32 x i16> @undef_test_permvar_hi
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i16> [[TMP3]]
;
- %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
- ret <32 x i16> %a
+ %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+ ret <32 x i16> %3
}
-declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
; CHECK-LABEL: @identity_test_permvar_qi_128(
; CHECK-NEXT: ret <16 x i8> [[A0:%.*]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> undef, i16 -1)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
}
define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
@@ -2210,8 +2306,10 @@ define <16 x i8> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i8> [[TMP2]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+ ret <16 x i8> %3
}
define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
@@ -2219,8 +2317,8 @@ define <16 x i8> @zero_test_permvar_qi_1
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> undef, i16 -1)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
}
define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
@@ -2230,8 +2328,10 @@ define <16 x i8> @zero_test_permvar_qi_1
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+ ret <16 x i8> %3
}
define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
@@ -2239,8 +2339,8 @@ define <16 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %1
}
define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
@@ -2250,8 +2350,10 @@ define <16 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+ ret <16 x i8> %3
}
define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
@@ -2259,8 +2361,8 @@ define <16 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[TMP1]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %1
}
define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
@@ -2270,18 +2372,20 @@ define <16 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
;
- %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
- ret <16 x i8> %a
+ %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+ ret <16 x i8> %3
}
-declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
; CHECK-LABEL: @identity_test_permvar_qi_256(
; CHECK-NEXT: ret <32 x i8> [[A0:%.*]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> undef, i32 -1)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+ ret <32 x i8> %1
}
define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
@@ -2290,8 +2394,10 @@ define <32 x i8> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i8> [[TMP2]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+ ret <32 x i8> %3
}
define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
@@ -2299,8 +2405,8 @@ define <32 x i8> @zero_test_permvar_qi_2
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> undef, i32 -1)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
}
define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
@@ -2310,8 +2416,10 @@ define <32 x i8> @zero_test_permvar_qi_2
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i8> [[TMP3]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+ ret <32 x i8> %3
}
define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
@@ -2319,8 +2427,8 @@ define <32 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %1
}
define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
@@ -2330,8 +2438,10 @@ define <32 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i8> [[TMP3]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+ ret <32 x i8> %3
}
define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
@@ -2339,8 +2449,8 @@ define <32 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %1
}
define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
@@ -2350,18 +2460,20 @@ define <32 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <32 x i8> [[TMP3]]
;
- %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
- ret <32 x i8> %a
+ %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i32 %mask to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+ ret <32 x i8> %3
}
-declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
; CHECK-LABEL: @identity_test_permvar_qi_512(
; CHECK-NEXT: ret <64 x i8> [[A0:%.*]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> undef, i64 -1)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
+ ret <64 x i8> %1
}
define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
@@ -2370,8 +2482,10 @@ define <64 x i8> @identity_test_permvar_
; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <64 x i8> [[TMP2]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+ ret <64 x i8> %3
}
define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
@@ -2379,8 +2493,8 @@ define <64 x i8> @zero_test_permvar_qi_5
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
; CHECK-NEXT: ret <64 x i8> [[TMP1]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> undef, i64 -1)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
+ ret <64 x i8> %1
}
define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
@@ -2390,8 +2504,10 @@ define <64 x i8> @zero_test_permvar_qi_5
; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+ ret <64 x i8> %3
}
define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
@@ -2399,8 +2515,8 @@ define <64 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <64 x i8> [[TMP1]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <64 x i8> %1
}
define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
@@ -2410,8 +2526,10 @@ define <64 x i8> @shuffle_test_permvar_q
; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+ ret <64 x i8> %3
}
define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
@@ -2419,8 +2537,8 @@ define <64 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <64 x i8> [[TMP1]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <64 x i8> %1
}
define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
@@ -2430,8 +2548,10 @@ define <64 x i8> @undef_test_permvar_qi_
; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
- %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
- ret <64 x i8> %a
+ %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ %2 = bitcast i64 %mask to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+ ret <64 x i8> %3
}
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
More information about the llvm-commits
mailing list