[llvm] r265259 - AVX-512: Load and Extended Load for i1 vectors
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 3 01:41:14 PDT 2016
Author: delena
Date: Sun Apr 3 03:41:12 2016
New Revision: 265259
URL: http://llvm.org/viewvc/llvm-project?rev=265259&view=rev
Log:
AVX-512: Load and Extended Load for i1 vectors
Implemented load+{sign|zero}_extend for i1 vectors
Fixed failures in i1 vector load.
Covered loading of v2i1, v4i1, v8i1, v16i1, v32i1, v64i1 vectors for KNL and SKX.
Differential Revision: http://reviews.llvm.org/D18737
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Apr 3 03:41:12 2016
@@ -1384,8 +1384,17 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
- setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
+ setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
+ setOperationAction(ISD::LOAD, MVT::v8i1, Legal);
+ for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
+ MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
+ MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
+ }
setOperationAction(ISD::FADD, MVT::v16f32, Legal);
setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
@@ -1661,6 +1670,8 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+ setOperationAction(ISD::LOAD, MVT::v32i1, Legal);
+ setOperationAction(ISD::LOAD, MVT::v64i1, Legal);
setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
@@ -1757,6 +1768,8 @@ X86TargetLowering::X86TargetLowering(con
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+ setOperationAction(ISD::LOAD, MVT::v2i1, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4i1, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
@@ -16093,6 +16106,98 @@ static SDValue LowerSIGN_EXTEND(SDValue
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+static SDValue LowerExtended1BitVectorLoad(SDValue Op,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+
+ LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
+ SDLoc dl(Ld);
+ EVT MemVT = Ld->getMemoryVT();
+ assert(MemVT.isVector() && MemVT.getScalarType() == MVT::i1 &&
+ "Expected i1 vector load");
+ unsigned ExtOpcode = Ld->getExtensionType() == ISD::ZEXTLOAD ?
+ ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ MVT VT = Op.getValueType().getSimpleVT();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) ||
+ NumElts == 16) {
+ // Load and extend - everything is legal
+ if (NumElts < 8) {
+ SDValue Load = DAG.getLoad(MVT::v8i1, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getMemOperand());
+ // Replace chain users with the new chain.
+ assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
+ SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+ SDValue Load = DAG.getLoad(MemVT, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getMemOperand());
+ // Replace chain users with the new chain.
+ assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Load);
+ }
+
+ if (NumElts <= 8) {
+ // A subset, assume that we have only AVX-512F
+ unsigned NumBitsToLoad = NumElts < 8 ? 8 : NumElts;
+ MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
+ SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getMemOperand());
+ // Replace chain users with the new chain.
+ assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
+ SDValue BitVec = DAG.getBitcast(MaskVT, Load);
+
+ if (NumElts == 8)
+ return DAG.getNode(ExtOpcode, dl, VT, BitVec);
+
+ // we should take care to v4i1 and v2i1
+
+ MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
+ SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ assert(VT == MVT::v32i8 && "Unexpected extload type");
+
+ SmallVector<SDValue, 2> Chains;
+
+ SDValue BasePtr = Ld->getBasePtr();
+ SDValue LoadLo = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
+ Ld->getBasePtr(),
+ Ld->getMemOperand());
+ Chains.push_back(LoadLo.getValue(1));
+
+ SDValue BasePtrHi =
+ DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(2, dl, BasePtr.getValueType()));
+
+ SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
+ BasePtrHi,
+ Ld->getMemOperand());
+ Chains.push_back(LoadHi.getValue(1));
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+
+ SDValue Lo = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadLo);
+ SDValue Hi = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v32i8, Lo, Hi);
+}
+
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
// may emit an illegal shuffle but the expansion is still better than scalar
// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
@@ -16113,6 +16218,9 @@ static SDValue LowerExtendedLoad(SDValue
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
+ if (MemVT.getScalarType() == MVT::i1)
+ return LowerExtended1BitVectorLoad(Op, Subtarget, DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned RegSz = RegVT.getSizeInBits();
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Apr 3 03:41:12 2016
@@ -2091,6 +2091,11 @@ let Predicates = [HasDQI] in {
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
def : Pat<(store VK1:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
+
+ def : Pat<(v2i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
+ def : Pat<(v4i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store VK1:$src, addr:$dst),
@@ -2110,18 +2115,19 @@ let Predicates = [HasAVX512, NoDQI] in {
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit))>;
- def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
- (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
- def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
- (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
+ def : Pat<(v8i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK8)>;
+ def : Pat<(v2i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK2)>;
+ def : Pat<(v4i1 (load addr:$src)),
+ (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK4)>;
}
+
let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(i1 (load addr:$src)),
- (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
- (MOV8rm addr:$src), sub_8bit)),
- (i16 1)), VK1)>;
+ (COPY_TO_REGCLASS (AND16ri (MOVZX16rm8 addr:$src), (i16 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
}
@@ -2130,8 +2136,6 @@ let Predicates = [HasBWI] in {
(KMOVDmk addr:$dst, VK32:$src)>;
def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
(KMOVDkm addr:$src)>;
-}
-let Predicates = [HasBWI] in {
def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
(KMOVQmk addr:$dst, VK64:$src)>;
def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sun Apr 3 03:41:12 2016
@@ -200,7 +200,7 @@ define i16 @test15(i1 *%addr) {
}
;CHECK-LABEL: test16
-;CHECK: movb (%rdi), %al
+;CHECK: movzbw (%rdi), %ax
;CHECK: kmovw
;CHECK: kshiftlw $10
;CHECK: korw
@@ -214,7 +214,7 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
-;KNL: movb (%rdi), %al
+;KNL: movzbw (%rdi), %ax
;KNL: andw $1, %ax
;KNL: kshiftlw $4
;KNL: korw
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sun Apr 3 03:41:12 2016
@@ -53,9 +53,11 @@ define void @mask16_mem(i16* %ptr) {
define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL: ## BB#0:
-; KNL-NEXT: kmovw (%rdi), %k0
+; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: knotw %k0, %k0
-; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: movb %al, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: mask8_mem:
@@ -417,444 +419,6 @@ define <16 x i1> @test15(i32 %x, i32 %y)
}
define <64 x i8> @test16(i64 %x) {
-; KNL-LABEL: test16:
-; KNL: ## BB#0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: Ltmp0:
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: Ltmp1:
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: Ltmp2:
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: pushq %r15
-; KNL-NEXT: pushq %r14
-; KNL-NEXT: pushq %r13
-; KNL-NEXT: pushq %r12
-; KNL-NEXT: pushq %rbx
-; KNL-NEXT: andq $-32, %rsp
-; KNL-NEXT: subq $128, %rsp
-; KNL-NEXT: Ltmp3:
-; KNL-NEXT: .cfi_offset %rbx, -56
-; KNL-NEXT: Ltmp4:
-; KNL-NEXT: .cfi_offset %r12, -48
-; KNL-NEXT: Ltmp5:
-; KNL-NEXT: .cfi_offset %r13, -40
-; KNL-NEXT: Ltmp6:
-; KNL-NEXT: .cfi_offset %r14, -32
-; KNL-NEXT: Ltmp7:
-; KNL-NEXT: .cfi_offset %r15, -24
-; KNL-NEXT: movq %rdi, %rax
-; KNL-NEXT: shrq $32, %rax
-; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl $271, %eax ## imm = 0x10F
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: movl %edi, %ecx
-; KNL-NEXT: andl $1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: movl $257, %ecx ## imm = 0x101
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $258, %ecx ## imm = 0x102
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $259, %ecx ## imm = 0x103
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $260, %ecx ## imm = 0x104
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $261, %ecx ## imm = 0x105
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $262, %ecx ## imm = 0x106
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $263, %ecx ## imm = 0x107
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $264, %ecx ## imm = 0x108
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $265, %ecx ## imm = 0x109
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $266, %ecx ## imm = 0x10A
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $267, %ecx ## imm = 0x10B
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $268, %ecx ## imm = 0x10C
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $269, %ecx ## imm = 0x10D
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; KNL-NEXT: movl $270, %ecx ## imm = 0x10E
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
-; KNL-NEXT: movl $1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
-; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
-; KNL-NEXT: movq %r15, %rdx
-; KNL-NEXT: shrq $17, %rdx
-; KNL-NEXT: andb $1, %dl
-; KNL-NEXT: je LBB22_2
-; KNL-NEXT: ## BB#1:
-; KNL-NEXT: movb $-1, %dl
-; KNL-NEXT: LBB22_2:
-; KNL-NEXT: movq %r15, %r11
-; KNL-NEXT: shrq $16, %r11
-; KNL-NEXT: andb $1, %r11b
-; KNL-NEXT: je LBB22_4
-; KNL-NEXT: ## BB#3:
-; KNL-NEXT: movb $-1, %r11b
-; KNL-NEXT: LBB22_4:
-; KNL-NEXT: movq %r15, %r10
-; KNL-NEXT: shrq $18, %r10
-; KNL-NEXT: andb $1, %r10b
-; KNL-NEXT: je LBB22_6
-; KNL-NEXT: ## BB#5:
-; KNL-NEXT: movb $-1, %r10b
-; KNL-NEXT: LBB22_6:
-; KNL-NEXT: movq %r15, %r9
-; KNL-NEXT: shrq $19, %r9
-; KNL-NEXT: andb $1, %r9b
-; KNL-NEXT: je LBB22_8
-; KNL-NEXT: ## BB#7:
-; KNL-NEXT: movb $-1, %r9b
-; KNL-NEXT: LBB22_8:
-; KNL-NEXT: movq %r15, %rbx
-; KNL-NEXT: shrq $20, %rbx
-; KNL-NEXT: andb $1, %bl
-; KNL-NEXT: je LBB22_10
-; KNL-NEXT: ## BB#9:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB22_10:
-; KNL-NEXT: movq %r15, %r12
-; KNL-NEXT: shrq $21, %r12
-; KNL-NEXT: andb $1, %r12b
-; KNL-NEXT: je LBB22_12
-; KNL-NEXT: ## BB#11:
-; KNL-NEXT: movb $-1, %r12b
-; KNL-NEXT: LBB22_12:
-; KNL-NEXT: movq %r15, %r14
-; KNL-NEXT: shrq $22, %r14
-; KNL-NEXT: andb $1, %r14b
-; KNL-NEXT: je LBB22_14
-; KNL-NEXT: ## BB#13:
-; KNL-NEXT: movb $-1, %r14b
-; KNL-NEXT: LBB22_14:
-; KNL-NEXT: movq %r15, %r8
-; KNL-NEXT: shrq $23, %r8
-; KNL-NEXT: andb $1, %r8b
-; KNL-NEXT: je LBB22_16
-; KNL-NEXT: ## BB#15:
-; KNL-NEXT: movb $-1, %r8b
-; KNL-NEXT: LBB22_16:
-; KNL-NEXT: movq %r15, %r13
-; KNL-NEXT: shrq $24, %r13
-; KNL-NEXT: andb $1, %r13b
-; KNL-NEXT: je LBB22_18
-; KNL-NEXT: ## BB#17:
-; KNL-NEXT: movb $-1, %r13b
-; KNL-NEXT: LBB22_18:
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $25, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_20
-; KNL-NEXT: ## BB#19:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_20:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $26, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_22
-; KNL-NEXT: ## BB#21:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_22:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movl $272, %esi ## imm = 0x110
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $27, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_24
-; KNL-NEXT: ## BB#23:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_24:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movl $273, %eax ## imm = 0x111
-; KNL-NEXT: bextrl %esi, %edi, %esi
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $28, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB22_26
-; KNL-NEXT: ## BB#25:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB22_26:
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vmovd %esi, %xmm2
-; KNL-NEXT: movl $274, %esi ## imm = 0x112
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $29, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB22_28
-; KNL-NEXT: ## BB#27:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB22_28:
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; KNL-NEXT: bextrl %esi, %edi, %eax
-; KNL-NEXT: movzbl %r11b, %esi
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $30, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB22_30
-; KNL-NEXT: ## BB#29:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB22_30:
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; KNL-NEXT: movl $275, %eax ## imm = 0x113
-; KNL-NEXT: bextrl %eax, %edi, %r11d
-; KNL-NEXT: movzbl %dl, %edx
-; KNL-NEXT: vmovd %esi, %xmm3
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $31, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_32
-; KNL-NEXT: ## BB#31:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_32:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; KNL-NEXT: movl $276, %eax ## imm = 0x114
-; KNL-NEXT: bextrl %eax, %edi, %esi
-; KNL-NEXT: movl $277, %r11d ## imm = 0x115
-; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r10b, %r10d
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_34
-; KNL-NEXT: ## BB#33:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_34:
-; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r11d, %edi, %edx
-; KNL-NEXT: movl $278, %r11d ## imm = 0x116
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r9b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shlq $63, %rcx
-; KNL-NEXT: sarq $63, %rcx
-; KNL-NEXT: vmovd %ecx, %xmm4
-; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $2, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_36
-; KNL-NEXT: ## BB#35:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_36:
-; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r11d, %edi, %edx
-; KNL-NEXT: movl $279, %r9d ## imm = 0x117
-; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %bl, %ebx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $3, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_38
-; KNL-NEXT: ## BB#37:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_38:
-; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r9d, %edi, %edx
-; KNL-NEXT: movl $280, %esi ## imm = 0x118
-; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r12b, %ebx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $4, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_40
-; KNL-NEXT: ## BB#39:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_40:
-; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %esi, %edi, %ecx
-; KNL-NEXT: movl $281, %edx ## imm = 0x119
-; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r14b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $5, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_42
-; KNL-NEXT: ## BB#41:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_42:
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $282, %edx ## imm = 0x11A
-; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r8b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %bl
-; KNL-NEXT: shrb $6, %bl
-; KNL-NEXT: andb $1, %bl
-; KNL-NEXT: je LBB22_44
-; KNL-NEXT: ## BB#43:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB22_44:
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %eax
-; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
-; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r13b, %esi
-; KNL-NEXT: movzbl %bl, %edx
-; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %bl
-; KNL-NEXT: shrb $7, %bl
-; KNL-NEXT: je LBB22_46
-; KNL-NEXT: ## BB#45:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB22_46:
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: movl $284, %edx ## imm = 0x11C
-; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
-; KNL-NEXT: movzbl %al, %esi
-; KNL-NEXT: movzbl %bl, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $8, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_48
-; KNL-NEXT: ## BB#47:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_48:
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $285, %edx ## imm = 0x11D
-; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
-; KNL-NEXT: movzbl %sil, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $9, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_50
-; KNL-NEXT: ## BB#49:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_50:
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $286, %edx ## imm = 0x11E
-; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
-; KNL-NEXT: movzbl %sil, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $10, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_52
-; KNL-NEXT: ## BB#51:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_52:
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %edx
-; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $11, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_54
-; KNL-NEXT: ## BB#53:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_54:
-; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; KNL-NEXT: shrl $31, %edi
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $12, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_56
-; KNL-NEXT: ## BB#55:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_56:
-; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $13, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_58
-; KNL-NEXT: ## BB#57:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_58:
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $14, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB22_60
-; KNL-NEXT: ## BB#59:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB22_60:
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
-; KNL-NEXT: shrq $15, %r15
-; KNL-NEXT: andb $1, %r15b
-; KNL-NEXT: je LBB22_62
-; KNL-NEXT: ## BB#61:
-; KNL-NEXT: movb $-1, %r15b
-; KNL-NEXT: LBB22_62:
-; KNL-NEXT: movzbl %r15b, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
-; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: leaq -40(%rbp), %rsp
-; KNL-NEXT: popq %rbx
-; KNL-NEXT: popq %r12
-; KNL-NEXT: popq %r13
-; KNL-NEXT: popq %r14
-; KNL-NEXT: popq %r15
-; KNL-NEXT: popq %rbp
-; KNL-NEXT: retq
;
; SKX-LABEL: test16:
; SKX: ## BB#0:
@@ -872,446 +436,6 @@ define <64 x i8> @test16(i64 %x) {
}
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
-; KNL-LABEL: test17:
-; KNL: ## BB#0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: Ltmp8:
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: Ltmp9:
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: movq %rsp, %rbp
-; KNL-NEXT: Ltmp10:
-; KNL-NEXT: .cfi_def_cfa_register %rbp
-; KNL-NEXT: pushq %r15
-; KNL-NEXT: pushq %r14
-; KNL-NEXT: pushq %r13
-; KNL-NEXT: pushq %r12
-; KNL-NEXT: pushq %rbx
-; KNL-NEXT: andq $-32, %rsp
-; KNL-NEXT: subq $128, %rsp
-; KNL-NEXT: Ltmp11:
-; KNL-NEXT: .cfi_offset %rbx, -56
-; KNL-NEXT: Ltmp12:
-; KNL-NEXT: .cfi_offset %r12, -48
-; KNL-NEXT: Ltmp13:
-; KNL-NEXT: .cfi_offset %r13, -40
-; KNL-NEXT: Ltmp14:
-; KNL-NEXT: .cfi_offset %r14, -32
-; KNL-NEXT: Ltmp15:
-; KNL-NEXT: .cfi_offset %r15, -24
-; KNL-NEXT: movq %rdi, %rax
-; KNL-NEXT: shrq $32, %rax
-; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
-; KNL-NEXT: movl %edi, %eax
-; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: vmovd %eax, %xmm0
-; KNL-NEXT: movl $257, %eax ## imm = 0x101
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $258, %eax ## imm = 0x102
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $259, %eax ## imm = 0x103
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $260, %eax ## imm = 0x104
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $261, %eax ## imm = 0x105
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $262, %eax ## imm = 0x106
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $263, %eax ## imm = 0x107
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $264, %eax ## imm = 0x108
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $265, %eax ## imm = 0x109
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $266, %eax ## imm = 0x10A
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $267, %eax ## imm = 0x10B
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $268, %eax ## imm = 0x10C
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $269, %eax ## imm = 0x10D
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $270, %eax ## imm = 0x10E
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: movl $271, %eax ## imm = 0x10F
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
-; KNL-NEXT: cmpl %edx, %esi
-; KNL-NEXT: setg %al
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
-; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
-; KNL-NEXT: movq %r15, %rdx
-; KNL-NEXT: shrq $17, %rdx
-; KNL-NEXT: andb $1, %dl
-; KNL-NEXT: je LBB23_2
-; KNL-NEXT: ## BB#1:
-; KNL-NEXT: movb $-1, %dl
-; KNL-NEXT: LBB23_2:
-; KNL-NEXT: movq %r15, %r11
-; KNL-NEXT: shrq $16, %r11
-; KNL-NEXT: andb $1, %r11b
-; KNL-NEXT: je LBB23_4
-; KNL-NEXT: ## BB#3:
-; KNL-NEXT: movb $-1, %r11b
-; KNL-NEXT: LBB23_4:
-; KNL-NEXT: movq %r15, %r10
-; KNL-NEXT: shrq $18, %r10
-; KNL-NEXT: andb $1, %r10b
-; KNL-NEXT: je LBB23_6
-; KNL-NEXT: ## BB#5:
-; KNL-NEXT: movb $-1, %r10b
-; KNL-NEXT: LBB23_6:
-; KNL-NEXT: movq %r15, %r9
-; KNL-NEXT: shrq $19, %r9
-; KNL-NEXT: andb $1, %r9b
-; KNL-NEXT: je LBB23_8
-; KNL-NEXT: ## BB#7:
-; KNL-NEXT: movb $-1, %r9b
-; KNL-NEXT: LBB23_8:
-; KNL-NEXT: movq %r15, %rbx
-; KNL-NEXT: shrq $20, %rbx
-; KNL-NEXT: andb $1, %bl
-; KNL-NEXT: je LBB23_10
-; KNL-NEXT: ## BB#9:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB23_10:
-; KNL-NEXT: movq %r15, %r12
-; KNL-NEXT: shrq $21, %r12
-; KNL-NEXT: andb $1, %r12b
-; KNL-NEXT: je LBB23_12
-; KNL-NEXT: ## BB#11:
-; KNL-NEXT: movb $-1, %r12b
-; KNL-NEXT: LBB23_12:
-; KNL-NEXT: movq %r15, %r14
-; KNL-NEXT: shrq $22, %r14
-; KNL-NEXT: andb $1, %r14b
-; KNL-NEXT: je LBB23_14
-; KNL-NEXT: ## BB#13:
-; KNL-NEXT: movb $-1, %r14b
-; KNL-NEXT: LBB23_14:
-; KNL-NEXT: movq %r15, %r8
-; KNL-NEXT: shrq $23, %r8
-; KNL-NEXT: andb $1, %r8b
-; KNL-NEXT: je LBB23_16
-; KNL-NEXT: ## BB#15:
-; KNL-NEXT: movb $-1, %r8b
-; KNL-NEXT: LBB23_16:
-; KNL-NEXT: movq %r15, %r13
-; KNL-NEXT: shrq $24, %r13
-; KNL-NEXT: andb $1, %r13b
-; KNL-NEXT: je LBB23_18
-; KNL-NEXT: ## BB#17:
-; KNL-NEXT: movb $-1, %r13b
-; KNL-NEXT: LBB23_18:
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $25, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_20
-; KNL-NEXT: ## BB#19:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_20:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $26, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_22
-; KNL-NEXT: ## BB#21:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_22:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movl $272, %esi ## imm = 0x110
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $27, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_24
-; KNL-NEXT: ## BB#23:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_24:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movl $273, %eax ## imm = 0x111
-; KNL-NEXT: bextrl %esi, %edi, %esi
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $28, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB23_26
-; KNL-NEXT: ## BB#25:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB23_26:
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: bextrl %eax, %edi, %eax
-; KNL-NEXT: vmovd %esi, %xmm2
-; KNL-NEXT: movl $274, %esi ## imm = 0x112
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $29, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB23_28
-; KNL-NEXT: ## BB#27:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB23_28:
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; KNL-NEXT: bextrl %esi, %edi, %eax
-; KNL-NEXT: movzbl %r11b, %esi
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shrq $30, %rcx
-; KNL-NEXT: andb $1, %cl
-; KNL-NEXT: je LBB23_30
-; KNL-NEXT: ## BB#29:
-; KNL-NEXT: movb $-1, %cl
-; KNL-NEXT: LBB23_30:
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; KNL-NEXT: movl $275, %eax ## imm = 0x113
-; KNL-NEXT: bextrl %eax, %edi, %r11d
-; KNL-NEXT: movzbl %dl, %edx
-; KNL-NEXT: vmovd %esi, %xmm3
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $31, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_32
-; KNL-NEXT: ## BB#31:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_32:
-; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; KNL-NEXT: movl $276, %eax ## imm = 0x114
-; KNL-NEXT: bextrl %eax, %edi, %esi
-; KNL-NEXT: movl $277, %r11d ## imm = 0x115
-; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r10b, %r10d
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_34
-; KNL-NEXT: ## BB#33:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_34:
-; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r11d, %edi, %edx
-; KNL-NEXT: movl $278, %r11d ## imm = 0x116
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r9b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: movq %r15, %rcx
-; KNL-NEXT: shlq $63, %rcx
-; KNL-NEXT: sarq $63, %rcx
-; KNL-NEXT: vmovd %ecx, %xmm4
-; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $2, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_36
-; KNL-NEXT: ## BB#35:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_36:
-; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r11d, %edi, %edx
-; KNL-NEXT: movl $279, %r9d ## imm = 0x117
-; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %bl, %ebx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $3, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_38
-; KNL-NEXT: ## BB#37:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_38:
-; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %r9d, %edi, %edx
-; KNL-NEXT: movl $280, %esi ## imm = 0x118
-; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r12b, %ebx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $4, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_40
-; KNL-NEXT: ## BB#39:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_40:
-; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %esi, %edi, %ecx
-; KNL-NEXT: movl $281, %edx ## imm = 0x119
-; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r14b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %al
-; KNL-NEXT: shrb $5, %al
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_42
-; KNL-NEXT: ## BB#41:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_42:
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $282, %edx ## imm = 0x11A
-; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r8b, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %bl
-; KNL-NEXT: shrb $6, %bl
-; KNL-NEXT: andb $1, %bl
-; KNL-NEXT: je LBB23_44
-; KNL-NEXT: ## BB#43:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB23_44:
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %eax
-; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
-; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
-; KNL-NEXT: movzbl %r13b, %esi
-; KNL-NEXT: movzbl %bl, %edx
-; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
-; KNL-NEXT: movb %r15b, %bl
-; KNL-NEXT: shrb $7, %bl
-; KNL-NEXT: je LBB23_46
-; KNL-NEXT: ## BB#45:
-; KNL-NEXT: movb $-1, %bl
-; KNL-NEXT: LBB23_46:
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: bextrl %ecx, %edi, %ecx
-; KNL-NEXT: movl $284, %edx ## imm = 0x11C
-; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
-; KNL-NEXT: movzbl %al, %esi
-; KNL-NEXT: movzbl %bl, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $8, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_48
-; KNL-NEXT: ## BB#47:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_48:
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $285, %edx ## imm = 0x11D
-; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
-; KNL-NEXT: movzbl %sil, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $9, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_50
-; KNL-NEXT: ## BB#49:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_50:
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %ecx
-; KNL-NEXT: movl $286, %edx ## imm = 0x11E
-; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
-; KNL-NEXT: movzbl %sil, %esi
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $10, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_52
-; KNL-NEXT: ## BB#51:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_52:
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
-; KNL-NEXT: bextrl %edx, %edi, %edx
-; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $11, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_54
-; KNL-NEXT: ## BB#53:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_54:
-; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; KNL-NEXT: shrl $31, %edi
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $12, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_56
-; KNL-NEXT: ## BB#55:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_56:
-; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $13, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_58
-; KNL-NEXT: ## BB#57:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_58:
-; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
-; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
-; KNL-NEXT: movzbl %cl, %ecx
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
-; KNL-NEXT: movq %r15, %rax
-; KNL-NEXT: shrq $14, %rax
-; KNL-NEXT: andb $1, %al
-; KNL-NEXT: je LBB23_60
-; KNL-NEXT: ## BB#59:
-; KNL-NEXT: movb $-1, %al
-; KNL-NEXT: LBB23_60:
-; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
-; KNL-NEXT: shrq $15, %r15
-; KNL-NEXT: andb $1, %r15b
-; KNL-NEXT: je LBB23_62
-; KNL-NEXT: ## BB#61:
-; KNL-NEXT: movb $-1, %r15b
-; KNL-NEXT: LBB23_62:
-; KNL-NEXT: movzbl %r15b, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
-; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
-; KNL-NEXT: leaq -40(%rbp), %rsp
-; KNL-NEXT: popq %rbx
-; KNL-NEXT: popq %r12
-; KNL-NEXT: popq %r13
-; KNL-NEXT: popq %r14
-; KNL-NEXT: popq %r15
-; KNL-NEXT: popq %rbp
-; KNL-NEXT: retq
;
; SKX-LABEL: test17:
; SKX: ## BB#0:
@@ -1790,3 +914,127 @@ L2:
End:
ret void
}
+
+define <8 x i64> @load_8i1(<8 x i1>* %a) {
+; KNL-LABEL: load_8i1:
+; KNL: ## BB#0:
+; KNL-NEXT: movzbw (%rdi), %ax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_8i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb (%rdi), %k0
+; SKX-NEXT: vpmovm2q %k0, %zmm0
+; SKX-NEXT: retq
+ %b = load <8 x i1>, <8 x i1>* %a
+ %c = sext <8 x i1> %b to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define <16 x i32> @load_16i1(<16 x i1>* %a) {
+; KNL-LABEL: load_16i1:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw (%rdi), %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_16i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovw (%rdi), %k0
+; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: retq
+ %b = load <16 x i1>, <16 x i1>* %a
+ %c = sext <16 x i1> %b to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <2 x i16> @load_2i1(<2 x i1>* %a) {
+; KNL-LABEL: load_2i1:
+; KNL: ## BB#0:
+; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_2i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb (%rdi), %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: retq
+ %b = load <2 x i1>, <2 x i1>* %a
+ %c = sext <2 x i1> %b to <2 x i16>
+ ret <2 x i16> %c
+}
+
+define <4 x i16> @load_4i1(<4 x i1>* %a) {
+; KNL-LABEL: load_4i1:
+; KNL: ## BB#0:
+; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_4i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb (%rdi), %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq
+ %b = load <4 x i1>, <4 x i1>* %a
+ %c = sext <4 x i1> %b to <4 x i16>
+ ret <4 x i16> %c
+}
+
+define <32 x i16> @load_32i1(<32 x i1>* %a) {
+; KNL-LABEL: load_32i1:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw (%rdi), %k1
+; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: kmovw 2(%rdi), %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdw %zmm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_32i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovd (%rdi), %k0
+; SKX-NEXT: vpmovm2w %k0, %zmm0
+; SKX-NEXT: retq
+ %b = load <32 x i1>, <32 x i1>* %a
+ %c = sext <32 x i1> %b to <32 x i16>
+ ret <32 x i16> %c
+}
+
+define <64 x i8> @load_64i1(<64 x i1>* %a) {
+; KNL-LABEL: load_64i1:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw (%rdi), %k1
+; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: kmovw 2(%rdi), %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: kmovw 4(%rdi), %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: kmovw 6(%rdi), %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
+; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: load_64i1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovq (%rdi), %k0
+; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: retq
+ %b = load <64 x i1>, <64 x i1>* %a
+ %c = sext <64 x i1> %b to <64 x i8>
+ ret <64 x i8> %c
+}
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Sun Apr 3 03:41:12 2016
@@ -291,7 +291,8 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_32-LABEL: test7:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=265259&r1=265258&r2=265259&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Sun Apr 3 03:41:12 2016
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
@@ -399,34 +400,17 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0
}
define i64 @shuf64i1_zero(i64 %a) {
-; AVX512F-LABEL: shuf64i1_zero:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .Ltmp0:
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .Ltmp1:
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .Ltmp2:
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: movb $0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %ecx
-; AVX512F-NEXT: movq %rcx, %rax
-; AVX512F-NEXT: shlq $32, %rax
-; AVX512F-NEXT: orq %rcx, %rax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
-; AVX512F-NEXT: retq
-;
; VL_BW_DQ-LABEL: shuf64i1_zero:
; VL_BW_DQ: # BB#0:
-; VL_BW_DQ-NEXT: kxorq %k0, %k0, %k0
+; VL_BW_DQ-NEXT: kmovq %rdi, %k0
+; VL_BW_DQ-NEXT: vpmovm2b %k0, %zmm0
+; VL_BW_DQ-NEXT: vpbroadcastb %xmm0, %zmm0
+; VL_BW_DQ-NEXT: vpsllw $7, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovb2m %zmm0, %k0
; VL_BW_DQ-NEXT: kmovq %k0, %rax
; VL_BW_DQ-NEXT: retq
%b = bitcast i64 %a to <64 x i1>
- %c = shufflevector < 64 x i1> zeroinitializer, <64 x i1> undef, <64 x i32> zeroinitializer
+ %c = shufflevector < 64 x i1> %b, <64 x i1> undef, <64 x i32> zeroinitializer
%d = bitcast <64 x i1> %c to i64
ret i64 %d
}
More information about the llvm-commits
mailing list