[llvm] r332890 - [X86] Remove masking from vpternlog intrinsics. Use a select in IR instead.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon May 21 13:58:10 PDT 2018
Author: ctopper
Date: Mon May 21 13:58:09 2018
New Revision: 332890
URL: http://llvm.org/viewvc/llvm-project?rev=332890&view=rev
Log:
[X86] Remove masking from vpternlog intrinsics. Use a select in IR instead.
This removes 6 intrinsics since we no longer need separate mask and maskz intrinsics.
Differential Revision: https://reviews.llvm.org/D47124
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon May 21 13:58:09 2018
@@ -6066,77 +6066,41 @@ let TargetPrefix = "x86" in {
// Bitwise ternary logic
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_pternlog_d_128 :
- GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
+ def int_x86_avx512_pternlog_d_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogd128">,
Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
- def int_x86_avx512_maskz_pternlog_d_128 :
- GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
- Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_pternlog_d_256 :
- GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_maskz_pternlog_d_256 :
- GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
+ def int_x86_avx512_pternlog_d_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogd256">,
Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_pternlog_d_512 :
- GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
- def int_x86_avx512_maskz_pternlog_d_512 :
- GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
+ def int_x86_avx512_pternlog_d_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogd512">,
Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
+ [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
+ llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_pternlog_q_128 :
- GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
+ def int_x86_avx512_pternlog_q_128 :
+ GCCBuiltin<"__builtin_ia32_pternlogq128">,
Intrinsic<[llvm_v2i64_ty],
- [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
- def int_x86_avx512_maskz_pternlog_q_128 :
- GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
- Intrinsic<[llvm_v2i64_ty],
- [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_pternlog_q_256 :
- GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
- Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_maskz_pternlog_q_256 :
- GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
+ def int_x86_avx512_pternlog_q_256 :
+ GCCBuiltin<"__builtin_ia32_pternlogq256">,
Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_pternlog_q_512 :
- GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
- def int_x86_avx512_maskz_pternlog_q_512 :
- GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
+ def int_x86_avx512_pternlog_q_512 :
+ GCCBuiltin<"__builtin_ia32_pternlogq512">,
Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
// Misc.
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon May 21 13:58:09 2018
@@ -257,6 +257,8 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
+ Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
+ Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -2530,6 +2532,34 @@ void llvm::UpgradeIntrinsicCall(CallInst
Value *Ops[] = { Arg0, Arg1, Arg2 };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Ops);
+ } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
+ Name.startswith("avx512.maskz.pternlog."))) {
+ bool ZeroMask = Name[11] == 'z';
+ unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
+ unsigned EltWidth = CI->getType()->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3) };
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
+ : CI->getArgOperand(0);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon May 21 13:58:09 2018
@@ -20631,23 +20631,6 @@ SDValue X86TargetLowering::LowerINTRINSI
Src2, Src3, Src1),
Mask, PassThru, Subtarget, DAG);
}
- case TERLOG_OP_MASK:
- case TERLOG_OP_MASKZ: {
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
- SDValue Mask = Op.getOperand(5);
- MVT VT = Op.getSimpleValueType();
- SDValue PassThru = Src1;
- // Set PassThru element.
- if (IntrData->Type == TERLOG_OP_MASKZ)
- PassThru = getZeroVector(VT, Subtarget, DAG, dl);
-
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1, Src2, Src3, Src4),
- Mask, PassThru, Subtarget, DAG);
- }
case CVTPD2PS:
// ISD::FP_ROUND has a second argument that indicates if the truncation
// does not change the value. Set it to 0 since it can change.
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon May 21 13:58:09 2018
@@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
- TERLOG_OP_MASK, TERLOG_OP_MASKZ, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
+ FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
ROUNDP, ROUNDS
};
@@ -949,18 +949,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
- X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
@@ -1281,18 +1269,6 @@ static const IntrinsicData IntrinsicsWi
X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
- X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
- X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, ISD::FMA,
@@ -1456,6 +1432,12 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_pternlog_q_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Mon May 21 13:58:09 2018
@@ -2354,5 +2354,129 @@ entry:
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+define <8 x i64> @test_mm512_ternarylogic_epi32(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %1 = bitcast <8 x i64> %__B to <16 x i32>
+ %2 = bitcast <8 x i64> %__C to <16 x i32>
+ %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
+ %4 = bitcast <16 x i32> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
+
+define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_mask_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %1 = bitcast <8 x i64> %__B to <16 x i32>
+ %2 = bitcast <8 x i64> %__C to <16 x i32>
+ %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
+ %4 = bitcast i16 %__U to <16 x i1>
+ %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
+ %6 = bitcast <16 x i32> %5 to <8 x i64>
+ ret <8 x i64> %6
+}
+
+define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_maskz_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <8 x i64> %__A to <16 x i32>
+ %1 = bitcast <8 x i64> %__B to <16 x i32>
+ %2 = bitcast <8 x i64> %__C to <16 x i32>
+ %3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
+ %4 = bitcast i16 %__U to <16 x i1>
+ %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
+ %6 = bitcast <16 x i32> %5 to <8 x i64>
+ ret <8 x i64> %6
+}
+
+define <8 x i64> @test_mm512_ternarylogic_epi64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
+ ret <8 x i64> %0
+}
+
+declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) #1
+
+define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_mask_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
+; X32-LABEL: test_mm512_maskz_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_maskz_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
+ ret <8 x i64> %2
+}
+
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Mon May 21 13:58:09 2018
@@ -4231,3 +4231,71 @@ define <16 x i32>@test_int_x86_avx512_ma
%res4 = add <16 x i32> %res3, %res2
ret <16 x i32> %res4
}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon May 21 13:58:09 2018
@@ -3206,71 +3206,75 @@ define <4 x float>@test_int_x86_avx512_m
ret <4 x float> %res2
}
-declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
- %res2 = add <16 x i32> %res, %res1
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %2 = bitcast i16 %x4 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ %4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %res2 = add <16 x i32> %3, %4
ret <16 x i32> %res2
}
-declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
-
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
- %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
- %res2 = add <16 x i32> %res, %res1
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %2 = bitcast i16 %x4 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ %4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %res2 = add <16 x i32> %3, %4
ret <16 x i32> %res2
}
-declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
- %res2 = add <8 x i64> %res, %res1
+ %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
+ %4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %res2 = add <8 x i64> %3, %4
ret <8 x i64> %res2
}
-declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
-
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
- %res2 = add <8 x i64> %res, %res1
+ %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
+ %4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %res2 = add <8 x i64> %3, %4
ret <8 x i64> %res2
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll Mon May 21 13:58:09 2018
@@ -3,16 +3,15 @@
; These test cases demonstrate cases where vpternlog could benefit from being commuted.
-declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
-declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
define <16 x i32> @vpternlog_v16i32_012(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: vpternlog_v16i32_012:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
@@ -20,8 +19,8 @@ define <16 x i32> @vpternlog_v16i32_102(
; CHECK: ## %bb.0:
; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
@@ -29,8 +28,8 @@ define <16 x i32> @vpternlog_v16i32_210(
; CHECK: ## %bb.0:
; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
@@ -39,8 +38,8 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
@@ -49,8 +48,8 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
@@ -59,8 +58,8 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
@@ -69,8 +68,8 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
@@ -79,8 +78,8 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
@@ -89,8 +88,8 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
@@ -99,8 +98,8 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
@@ -109,8 +108,8 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
@@ -119,8 +118,8 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_021_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) {
@@ -129,8 +128,8 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_021_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) {
@@ -139,8 +138,8 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_021_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) {
@@ -149,8 +148,8 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -159,8 +158,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -170,8 +171,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -181,8 +184,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -192,9 +197,9 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
ret <16 x i32> %res2
}
@@ -205,23 +210,25 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $58, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
ret <16 x i32> %res2
}
define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_012_load0_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -231,9 +238,9 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
ret <16 x i32> %res2
}
@@ -245,9 +252,9 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
ret <16 x i32> %res2
}
@@ -258,8 +265,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -270,9 +279,9 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
ret <16 x i32> %res2
}
@@ -283,8 +292,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -295,9 +306,9 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
ret <16 x i32> %res2
}
@@ -308,21 +319,25 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_102_load1_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -333,8 +348,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -345,8 +362,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -357,34 +376,40 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_210_load2_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_021_load0_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -394,8 +419,10 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -405,8 +432,10 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -415,8 +444,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -425,8 +456,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -435,8 +468,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -446,8 +481,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -457,8 +494,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -468,8 +507,10 @@ define <16 x i32> @vpternlog_v16i32_012_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -479,8 +520,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -490,8 +533,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -501,8 +546,10 @@ define <16 x i32> @vpternlog_v16i32_102_
; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -512,8 +559,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -523,8 +572,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -534,8 +585,10 @@ define <16 x i32> @vpternlog_v16i32_210_
; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -545,8 +598,10 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x0 = load <16 x i32>, <16 x i32>* %x0ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -556,8 +611,10 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x1 = load <16 x i32>, <16 x i32>* %x1ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) {
@@ -567,8 +624,10 @@ define <16 x i32> @vpternlog_v16i32_021_
; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2ptr
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
@@ -579,8 +638,8 @@ define <16 x i32> @vpternlog_v16i32_012_
%x0_scalar = load i32, i32* %ptr_x0
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
@@ -591,8 +650,8 @@ define <16 x i32> @vpternlog_v16i32_012_
%x1_scalar = load i32, i32* %ptr_x1
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
@@ -603,8 +662,8 @@ define <16 x i32> @vpternlog_v16i32_012_
%x2_scalar = load i32, i32* %ptr_x2
%vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
@@ -615,8 +674,8 @@ define <16 x i32> @vpternlog_v16i32_102_
%x0_scalar = load i32, i32* %ptr_x0
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
@@ -627,8 +686,8 @@ define <16 x i32> @vpternlog_v16i32_102_
%x1_scalar = load i32, i32* %ptr_x1
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
@@ -639,8 +698,8 @@ define <16 x i32> @vpternlog_v16i32_102_
%x2_scalar = load i32, i32* %ptr_x2
%vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
@@ -651,8 +710,8 @@ define <16 x i32> @vpternlog_v16i32_210_
%x0_scalar = load i32, i32* %ptr_x0
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
@@ -663,8 +722,8 @@ define <16 x i32> @vpternlog_v16i32_210_
%x1_scalar = load i32, i32* %ptr_x1
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_210_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
@@ -675,23 +734,25 @@ define <16 x i32> @vpternlog_v16i32_210_
%x2_scalar = load i32, i32* %ptr_x2
%vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ ret <16 x i32> %1
}
define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -703,8 +764,10 @@ define <16 x i32> @vpternlog_v16i32_012_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -716,8 +779,10 @@ define <16 x i32> @vpternlog_v16i32_012_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -729,23 +794,27 @@ define <16 x i32> @vpternlog_v16i32_102_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -758,8 +827,10 @@ define <16 x i32> @vpternlog_v16i32_102_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -772,8 +843,10 @@ define <16 x i32> @vpternlog_v16i32_210_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -786,38 +859,44 @@ define <16 x i32> @vpternlog_v16i32_210_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
+; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -829,8 +908,10 @@ define <16 x i32> @vpternlog_v16i32_021_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -842,8 +923,10 @@ define <16 x i32> @vpternlog_v16i32_021_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -855,8 +938,10 @@ define <16 x i32> @vpternlog_v16i32_012_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -868,8 +953,10 @@ define <16 x i32> @vpternlog_v16i32_012_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -881,8 +968,10 @@ define <16 x i32> @vpternlog_v16i32_012_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -894,8 +983,10 @@ define <16 x i32> @vpternlog_v16i32_102_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -907,8 +998,10 @@ define <16 x i32> @vpternlog_v16i32_102_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -920,8 +1013,10 @@ define <16 x i32> @vpternlog_v16i32_102_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -933,8 +1028,10 @@ define <16 x i32> @vpternlog_v16i32_210_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -946,8 +1043,10 @@ define <16 x i32> @vpternlog_v16i32_210_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_210_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -959,8 +1058,10 @@ define <16 x i32> @vpternlog_v16i32_210_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -972,8 +1073,10 @@ define <16 x i32> @vpternlog_v16i32_021_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
@@ -985,8 +1088,10 @@ define <16 x i32> @vpternlog_v16i32_021_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_021_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
@@ -998,8 +1103,10 @@ define <16 x i32> @vpternlog_v16i32_021_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
- ret <16 x i32> %res
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
}
define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask1(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
@@ -1011,9 +1118,9 @@ define <16 x i32> @vpternlog_v16i32_012_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
ret <16 x i32> %res2
}
@@ -1027,9 +1134,9 @@ define <16 x i32> @vpternlog_v16i32_012_
%x0scalar = load i32, i32* %x0ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
%x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
ret <16 x i32> %res2
}
@@ -1043,9 +1150,9 @@ define <16 x i32> @vpternlog_v16i32_012_
%x1scalar = load i32, i32* %x1ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
%x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2
ret <16 x i32> %res2
}
@@ -1059,8 +1166,8 @@ define <16 x i32> @vpternlog_v16i32_012_
%x2scalar = load i32, i32* %x2ptr
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
%x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114)
%mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1
ret <16 x i32> %res2
}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Mon May 21 13:58:09 2018
@@ -3535,6 +3535,264 @@ entry:
ret <2 x i64> %2
}
+define <2 x i64> @test_mm_ternarylogic_epi32(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__A to <4 x i32>
+ %1 = bitcast <2 x i64> %__B to <4 x i32>
+ %2 = bitcast <2 x i64> %__C to <4 x i32>
+ %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
+ %4 = bitcast <4 x i32> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32) #2
+
+define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_mask_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__A to <4 x i32>
+ %1 = bitcast <2 x i64> %__B to <4 x i32>
+ %2 = bitcast <2 x i64> %__C to <4 x i32>
+ %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
+ %4 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> %0
+ %6 = bitcast <4 x i32> %5 to <2 x i64>
+ ret <2 x i64> %6
+}
+
+define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_maskz_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_maskz_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__A to <4 x i32>
+ %1 = bitcast <2 x i64> %__B to <4 x i32>
+ %2 = bitcast <2 x i64> %__C to <4 x i32>
+ %3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
+ %4 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> zeroinitializer
+ %6 = bitcast <4 x i32> %5 to <2 x i64>
+ ret <2 x i64> %6
+}
+
+define <4 x i64> @test_mm256_ternarylogic_epi32(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %1 = bitcast <4 x i64> %__B to <8 x i32>
+ %2 = bitcast <4 x i64> %__C to <8 x i32>
+ %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
+ %4 = bitcast <8 x i32> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32) #2
+
+define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_mask_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %1 = bitcast <4 x i64> %__B to <8 x i32>
+ %2 = bitcast <4 x i64> %__C to <8 x i32>
+ %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
+ %4 = bitcast i8 %__U to <8 x i1>
+ %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0
+ %6 = bitcast <8 x i32> %5 to <4 x i64>
+ ret <4 x i64> %6
+}
+
+define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_maskz_ternarylogic_epi32:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_ternarylogic_epi32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__A to <8 x i32>
+ %1 = bitcast <4 x i64> %__B to <8 x i32>
+ %2 = bitcast <4 x i64> %__C to <8 x i32>
+ %3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
+ %4 = bitcast i8 %__U to <8 x i1>
+ %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer
+ %6 = bitcast <8 x i32> %5 to <4 x i64>
+ ret <4 x i64> %6
+}
+
+define <2 x i64> @test_mm_ternarylogic_epi64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
+ ret <2 x i64> %0
+}
+
+declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32) #2
+
+define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_mask_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__A
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
+; X32-LABEL: test_mm_maskz_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_maskz_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @test_mm256_ternarylogic_epi64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
+ ret <4 x i64> %0
+}
+
+declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32) #2
+
+define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_mask_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__A
+ ret <4 x i64> %2
+}
+
+define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
+; X32-LABEL: test_mm256_maskz_ternarylogic_epi64:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_maskz_ternarylogic_epi64:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
+; X64-NEXT: retq
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
+ ret <4 x i64> %2
+}
+
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon May 21 13:58:09 2018
@@ -6817,3 +6817,139 @@ define <4 x i64>@test_int_x86_avx512_mas
%res4 = add <4 x i64> %res3, %res2
ret <4 x i64> %res4
}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
+; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon May 21 13:58:09 2018
@@ -2222,20 +2222,23 @@ define <8 x float>@test_int_x86_avx512_m
ret <8 x float> %res2
}
-declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
- %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
- %res2 = add <4 x i32> %res, %res1
+ %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
+ %4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
+ %res2 = add <4 x i32> %3, %4
ret <4 x i32> %res2
}
@@ -2244,32 +2247,37 @@ declare <4 x i32> @llvm.x86.avx512.maskz
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
- %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
- %res2 = add <4 x i32> %res, %res1
+ %1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
+ %4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
+ %res2 = add <4 x i32> %3, %4
ret <4 x i32> %res2
}
-declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
- %res2 = add <8 x i32> %res, %res1
+ %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
+ %4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
+ %res2 = add <8 x i32> %3, %4
ret <8 x i32> %res2
}
@@ -2278,83 +2286,93 @@ declare <8 x i32> @llvm.x86.avx512.maskz
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
- %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
- %res2 = add <8 x i32> %res, %res1
+ %1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
+ %4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
+ %res2 = add <8 x i32> %3, %4
ret <8 x i32> %res2
}
-declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
- %res2 = add <2 x i64> %res, %res1
+ %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
+ %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
+ %4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
+ %res2 = add <2 x i64> %3, %4
ret <2 x i64> %res2
}
-declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
-
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
- %res2 = add <2 x i64> %res, %res1
+ %1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
+ %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
+ %4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
+ %res2 = add <2 x i64> %3, %4
ret <2 x i64> %res2
}
-declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
- %res2 = add <4 x i64> %res, %res1
+ %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
+ %4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
+ %res2 = add <4 x i64> %3, %4
ret <4 x i64> %res2
}
-declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
-
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
- %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
- %res2 = add <4 x i64> %res, %res1
+ %1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
+ %4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
+ %res2 = add <4 x i64> %3, %4
ret <4 x i64> %res2
}
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=332890&r1=332889&r2=332890&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Mon May 21 13:58:09 2018
@@ -1798,20 +1798,20 @@ define <16 x i32> @stack_fold_ternlogd(<
;CHECK-LABEL: stack_fold_ternlogd
;CHECK: vpternlogd $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
- ret <16 x i32> %res
+ %2 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ ret <16 x i32> %2
}
-declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
define <8 x i64> @stack_fold_ternlogq(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
;CHECK-LABEL: stack_fold_ternlogq
;CHECK: vpternlogq $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
- ret <8 x i64> %res
+ %2 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ ret <8 x i64> %2
}
-declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
define <64 x i8> @stack_fold_punpckhbw_zmm(<64 x i8> %a0, <64 x i8> %a1) {
;CHECK-LABEL: stack_fold_punpckhbw_zmm
More information about the llvm-commits
mailing list