[llvm] r290864 - [AVX-512] Remove vinsert intrinsics and autoupgrade to native shufflevectors. There are some codegen problems here that I'll try to fix in future commits.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 2 21:45:58 PST 2017
Author: ctopper
Date: Mon Jan 2 23:45:57 2017
New Revision: 290864
URL: http://llvm.org/viewvc/llvm-project?rev=290864&view=rev
Log:
[AVX-512] Remove vinsert intrinsics and autoupgrade to native shufflevectors. There are some codegen problems here that I'll try to fix in future commits.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon Jan 2 23:45:57 2017
@@ -2063,81 +2063,6 @@ let TargetPrefix = "x86" in { // All in
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
-// Vector extract and insert
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_insertf32x4_256 :
- GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_insertf32x4_512 :
- GCCBuiltin<"__builtin_ia32_insertf32x4_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_insertf32x8_512 :
- GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_insertf64x2_256 :
- GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
- Intrinsic<[llvm_v4f64_ty],
- [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_insertf64x2_512 :
- GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_insertf64x4_512 :
- GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti32x4_256 :
- GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti32x4_512 :
- GCCBuiltin<"__builtin_ia32_inserti32x4_mask">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti32x8_512 :
- GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti64x2_256 :
- GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
- Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti64x2_512 :
- GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_inserti64x4_512 :
- GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
-}
-
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon Jan 2 23:45:57 2017
@@ -342,6 +342,7 @@ static bool UpgradeIntrinsicFunction1(Fu
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
Name.startswith("avx.vinsertf128.") || // Added in 3.7
Name == "avx2.vinserti128" || // Added in 3.7
+ Name.startswith("avx512.mask.insert") || // Added in 4.0
Name.startswith("avx.vextractf128.") || // Added in 3.7
Name == "avx2.vextracti128" || // Added in 3.7
Name.startswith("avx512.mask.vextract") || // Added in 4.0
@@ -1151,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
- Name == "avx2.vinserti128")) {
+ Name == "avx2.vinserti128" ||
+ Name.startswith("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- VectorType *VecTy = cast<VectorType>(CI->getType());
- unsigned NumElts = VecTy->getNumElements();
+ unsigned DstNumElts = CI->getType()->getVectorNumElements();
+ unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
+ unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
- Imm = Imm & 1;
+ Imm = Imm % Scale;
- // Extend the second operand into a vector that is twice as big.
+ // Extend the second operand into a vector the size of the destination.
Value *UndefV = UndefValue::get(Op1->getType());
- SmallVector<uint32_t, 8> Idxs(NumElts);
- for (unsigned i = 0; i != NumElts; ++i)
+ SmallVector<uint32_t, 8> Idxs(DstNumElts);
+ for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
+ for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
+ Idxs[i] = SrcNumElts;
Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
// Insert the second operand into the first operand.
@@ -1179,15 +1184,18 @@ void llvm::UpgradeIntrinsicCall(CallInst
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
- // The low half of the result is either the low half of the 1st operand
- // or the low half of the 2nd operand (the inserted vector).
- for (unsigned i = 0; i != NumElts / 2; ++i)
- Idxs[i] = Imm ? i : (i + NumElts);
- // The high half of the result is either the low half of the 2nd operand
- // (the inserted vector) or the high half of the 1st operand.
- for (unsigned i = NumElts / 2; i != NumElts; ++i)
- Idxs[i] = Imm ? (i + NumElts / 2) : i;
+ // First fill with identify mask.
+ for (unsigned i = 0; i != DstNumElts; ++i)
+ Idxs[i] = i;
+ // Then replace the elements where we need to insert.
+ for (unsigned i = 0; i != SrcNumElts; ++i)
+ Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
+
+ // If the intrinsic has a mask operand, handle that.
+ if (CI->getNumArgOperands() == 5)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+ CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
Name == "avx2.vextracti128" ||
Name.startswith("avx512.mask.vextract"))) {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 2 23:45:57 2017
@@ -18660,8 +18660,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
- case INTR_TYPE_3OP_MASK:
- case INSERT_SUBVEC: {
+ case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
@@ -18670,13 +18669,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
- else if (IntrData->Type == INSERT_SUBVEC) {
- // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
- assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
- unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
- Imm *= Src2.getSimpleValueType().getVectorNumElements();
- Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
- }
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon Jan 2 23:45:57 2017
@@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- EXPAND_FROM_MEM, INSERT_SUBVEC,
+ EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
@@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWi
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANTS, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Mon Jan 2 23:45:57 2017
@@ -2970,3 +2970,89 @@ define <4 x double> @test_vextractf64x4(
}
declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
+; CHECK-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vblendmps %zmm3, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vmovaps %zmm3, %zmm1 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
+; CHECK-NEXT: vpermi2q %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpblendmd %zmm3, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: vmovdqa32 %zmm3, %zmm1 {%k1} {z}
+; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,2,3]
+; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon Jan 2 23:45:57 2017
@@ -3937,86 +3937,6 @@ define <16 x float>@test_int_x86_avx512_
ret <16 x float> %res2
}
-declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
-
-define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res2, %res3
- ret <16 x float> %res4
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
-
-define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
- %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res2, %res3
- ret <16 x i32> %res4
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
-
-define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
- %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res2, %res3
- ret <8 x double> %res4
-}
-
-declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
-
-define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
- %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
- %res3 = add <8 x i64> %res, %res1
- %res4 = add <8 x i64> %res2, %res3
- ret <8 x i64> %res4
-}
-
declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll Mon Jan 2 23:45:57 2017
@@ -52,3 +52,91 @@ define <8 x float>@test_int_x86_avx512_m
%res4 = fadd <8 x float> %res2, %res3
ret <8 x float> %res4
}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm3 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
+; CHECK-NEXT: vmovapd %zmm0, %zmm4
+; CHECK-NEXT: vpermt2pd %zmm1, %zmm3, %zmm4
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vblendmpd %zmm4, %zmm2, %zmm2 {%k1}
+; CHECK-NEXT: vpermt2pd %zmm1, %zmm3, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm4, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm3 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vpermt2q %zmm1, %zmm3, %zmm4
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vpblendmq %zmm4, %zmm2, %zmm2 {%k1}
+; CHECK-NEXT: vpermt2q %zmm1, %zmm3, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddq %zmm4, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Mon Jan 2 23:45:57 2017
@@ -325,86 +325,6 @@ define <2 x double>@test_int_x86_avx512_
ret <2 x double> %res2
}
-declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
-
-define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res2, %res3
- ret <16 x float> %res4
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
-
-define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
- %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res3, %res2
- ret <8 x double> %res4
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
-
-define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
- %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
- %res3 = add <16 x i32> %res, %res1
- %res4 = add <16 x i32> %res3, %res2
- ret <16 x i32> %res4
-}
-
-declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
-
-define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
- %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
- %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
- %res3 = add <8 x i64> %res, %res1
- %res4 = add <8 x i64> %res2, %res3
- ret <8 x i64> %res4
-}
-
declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll Mon Jan 2 23:45:57 2017
@@ -1579,3 +1579,43 @@ define <2 x double>@test_int_x86_avx512_
%res4 = fadd <2 x double> %res3, %res2
ret <2 x double> %res4
}
+
+declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xd9,0x01]
+; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01]
+; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xcb]
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xd9,0x01]
+; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01]
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb]
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll Mon Jan 2 23:45:57 2017
@@ -549,46 +549,6 @@ define <8 x float>@test_int_x86_avx512_m
ret <8 x float> %res2
}
-declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
-
-define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xd9,0x01]
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xc1,0x01]
-; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
-; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
- %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
- %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
- %res3 = fadd <4 x double> %res, %res1
- %res4 = fadd <4 x double> %res2, %res3
- ret <4 x double> %res4
-}
-
-declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
-
-define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xd9,0x01]
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xc1,0x01]
-; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
-; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
- %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
- %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
- %res3 = add <4 x i64> %res, %res1
- %res4 = add <4 x i64> %res3, %res2
- ret <4 x i64> %res4
-}
-
declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon Jan 2 23:45:57 2017
@@ -4792,3 +4792,44 @@ define <4 x float>@test_int_x86_avx512_m
%res4 = fadd <4 x float> %res2, %res3
ret <4 x float> %res4
}
+
+declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xc1,0x01]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x65,0xc8]
+; CHECK-NEXT: vmovaps %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xd0]
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xc1,0x01]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x64,0xc8]
+; CHECK-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xd0]
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+
+ %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=290864&r1=290863&r2=290864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon Jan 2 23:45:57 2017
@@ -3689,47 +3689,6 @@ define <8 x float>@test_int_x86_avx512_m
ret <8 x float> %res2
}
-declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
-
-define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xd9,0x01]
-; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xc1,0x01]
-; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
-; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
- %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
- %res3 = fadd <8 x float> %res, %res1
- %res4 = fadd <8 x float> %res2, %res3
- ret <8 x float> %res4
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
-
-define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
-; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xd9,0x01]
-; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xc1,0x01]
-; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
-; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
-; CHECK-NEXT: retq ## encoding: [0xc3]
-
- %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
- %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
- %res3 = add <8 x i32> %res, %res1
- %res4 = add <8 x i32> %res2, %res3
- ret <8 x i32> %res4
-}
-
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
More information about the llvm-commits
mailing list