[llvm] r237793 - AVX-512: fixed algorithm of building vectors of i1 elements
Elena Demikhovsky
elena.demikhovsky at intel.com
Wed May 20 07:32:04 PDT 2015
Author: delena
Date: Wed May 20 09:32:03 2015
New Revision: 237793
URL: http://llvm.org/viewvc/llvm-project?rev=237793&view=rev
Log:
AVX-512: fixed algorithm of building vectors of i1 elements
fixed extract-insert i1 element,
load i1, zextload i1 should be with "and $1, %reg" to prevent loading garbage.
added a bunch of new tests.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-select.ll
llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 20 09:32:03 2015
@@ -1471,6 +1471,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -1500,6 +1504,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@@ -5188,12 +5194,27 @@ static SDValue buildFromShuffleMostly(SD
return NV;
}
+static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+ assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
+ Op.getScalarValueSizeInBits() == 1 &&
+ "Can not convert non-constant vector");
+ uint64_t Immediate = 0;
+ for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
+ SDValue In = Op.getOperand(idx);
+ if (In.getOpcode() != ISD::UNDEF)
+ Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+ }
+ SDLoc dl(Op);
+ MVT VT =
+ MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
+ return DAG.getConstant(Immediate, dl, VT);
+}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
- assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
+ assert((VT.getVectorElementType() == MVT::i1) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
@@ -5209,62 +5230,69 @@ X86TargetLowering::LowerBUILD_VECTORvXi1
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
- bool AllContants = true;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+ if (Imm.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+ SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ // Vector has one or more non-const elements
uint64_t Immediate = 0;
- int NonConstIdx = -1;
+ SmallVector<unsigned, 16> NonConstIdx;
bool IsSplat = true;
- unsigned NumNonConsts = 0;
- unsigned NumConsts = 0;
+ bool HasConstElts = false;
+ int SplatIdx = -1;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
- if (!isa<ConstantSDNode>(In)) {
- AllContants = false;
- NonConstIdx = idx;
- NumNonConsts++;
- } else {
- NumConsts++;
- if (cast<ConstantSDNode>(In)->getZExtValue())
- Immediate |= (1ULL << idx);
+ if (!isa<ConstantSDNode>(In))
+ NonConstIdx.push_back(idx);
+ else {
+ Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+ HasConstElts = true;
}
- if (In != Op.getOperand(0))
+ if (SplatIdx == -1)
+ SplatIdx = idx;
+ else if (In != Op.getOperand(SplatIdx))
IsSplat = false;
}
- if (AllContants) {
- SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
- DAG.getConstant(Immediate, dl, MVT::i16));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
- DAG.getIntPtrConstant(0, dl));
+ // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
+ if (IsSplat)
+ return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
+ DAG.getConstant(1, dl, VT),
+ DAG.getConstant(0, dl, VT));
+
+ // insert elements one by one
+ SDValue DstVec;
+ SDValue Imm;
+ if (Immediate) {
+ MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
+ Imm = DAG.getConstant(Immediate, dl, ImmVT);
+ }
+ else if (HasConstElts)
+ Imm = DAG.getConstant(0, dl, VT);
+ else
+ Imm = DAG.getUNDEF(VT);
+ if (Imm.getValueSizeInBits() == VT.getSizeInBits())
+ DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
+ else {
+ SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
+ DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
+ DAG.getIntPtrConstant(0, dl));
}
- if (NumNonConsts == 1 && NonConstIdx != 0) {
- SDValue DstVec;
- if (NumConsts) {
- SDValue VecAsImm = DAG.getConstant(Immediate, dl,
- MVT::getIntegerVT(VT.getSizeInBits()));
- DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
- }
- else
- DstVec = DAG.getUNDEF(VT);
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
- Op.getOperand(NonConstIdx),
- DAG.getIntPtrConstant(NonConstIdx, dl));
- }
- if (!IsSplat && (NonConstIdx != 0))
- llvm_unreachable("Unsupported BUILD_VECTOR operation");
- MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
- SDValue Select;
- if (IsSplat)
- Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
- DAG.getConstant(-1, dl, SelectVT),
- DAG.getConstant(0, dl, SelectVT));
- else
- Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
- DAG.getConstant((Immediate | 1), dl, SelectVT),
- DAG.getConstant(Immediate, dl, SelectVT));
- return DAG.getNode(ISD::BITCAST, dl, VT, Select);
+ for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
+ unsigned InsertIdx = NonConstIdx[i];
+ DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+ Op.getOperand(InsertIdx),
+ DAG.getIntPtrConstant(InsertIdx, dl));
+ }
+ return DstVec;
}
/// \brief Return true if \p N implements a horizontal binop and return the
@@ -10670,15 +10698,11 @@ X86TargetLowering::InsertBitToMaskVector
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
+ if (IdxVal)
+ EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
if (Vec.getOpcode() == ISD::UNDEF)
- return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- const TargetRegisterClass* rc = getRegClassFor(VecVT);
- unsigned MaxSift = rc->getSize()*8 - 1;
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
- DAG.getConstant(MaxSift, dl, MVT::i8));
- EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
- DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
+ return EltInVec;
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
@@ -13623,6 +13647,29 @@ SDValue X86TargetLowering::LowerSELECT(S
}
}
+ if (VT.isVector() && VT.getScalarType() == MVT::i1) {
+ SDValue Op1Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+ Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
+ else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+ Op1Scalar = Op1.getOperand(0);
+ SDValue Op2Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+ Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
+ else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+ Op2Scalar = Op2.getOperand(0);
+ if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ Op1Scalar.getValueType(),
+ Cond, Op1Scalar, Op2Scalar);
+ if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
+ SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+ DAG.getIntPtrConstant(0, DL));
+ }
+ }
+
if (VT == MVT::v4i1 || VT == MVT::v2i1) {
SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
@@ -20728,7 +20775,8 @@ static SDValue XFormVExtractWithShuffleI
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
- if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+ if (!BCVT.isVector() ||
+ BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
}
@@ -20833,7 +20881,7 @@ static SDValue PerformEXTRACT_VECTOR_ELT
return NewOp;
SDValue InputVector = N->getOperand(0);
-
+ SDLoc dl(InputVector);
// Detect mmx to i32 conversion through a v2i32 elt extract.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
N->getValueType(0) == MVT::i32 &&
@@ -20858,6 +20906,18 @@ static SDValue PerformEXTRACT_VECTOR_ELT
MMXSrcOp.getOperand(0));
}
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
+ InputVector.getOpcode() == ISD::BITCAST &&
+ dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
+ uint64_t ExtractedElt =
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ uint64_t InputValue =
+ cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+ uint64_t Res = (InputValue >> ExtractedElt) & 1;
+ return DAG.getConstant(Res, dl, MVT::i1);
+ }
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (InputVector.getValueType() != MVT::v4i32)
@@ -20903,7 +20963,6 @@ static SDValue PerformEXTRACT_VECTOR_ELT
// otherwise bounce the vector off the cache.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vals[4];
- SDLoc dl(InputVector);
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
@@ -23606,6 +23665,7 @@ static SDValue PerformSExtCombine(SDNode
const X86Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
@@ -23613,7 +23673,6 @@ static SDValue PerformSExtCombine(SDNode
// from AH (which we otherwise need to do contortions to access).
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
N0.getValueType() == MVT::i8 && VT == MVT::i32) {
- SDLoc dl(N);
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
@@ -23621,8 +23680,15 @@ static SDValue PerformSExtCombine(SDNode
return R.getValue(1);
}
- if (!DCI.isBeforeLegalizeOps())
+ if (!DCI.isBeforeLegalizeOps()) {
+ if (N0.getValueType() == MVT::i1) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue AllOnes =
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
+ return DAG.getNode(ISD::SELECT, dl, VT, N0, AllOnes, Zero);
+ }
return SDValue();
+ }
if (!Subtarget->hasFp256())
return SDValue();
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed May 20 09:32:03 2015
@@ -1855,7 +1855,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(i1 (load addr:$src)),
- (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
+ (COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
+ (MOV8rm addr:$src), sub_8bit)),
+ (i16 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
}
@@ -1920,13 +1922,13 @@ let Predicates = [HasAVX512, NoDQI] in {
// GR from/to 8-bit mask without native support
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
- VK8)>;
+ (KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
(EXTRACT_SUBREG
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit)>;
}
+
let Predicates = [HasAVX512] in {
def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK1)>;
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Wed May 20 09:32:03 2015
@@ -1064,11 +1064,12 @@ defm : CMOVmr<X86_COND_O , CMOVNO16rm, C
defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
+def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
+def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
def : Pat<(zextloadi64i1 addr:$src),
- (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+ (SUBREG_TO_REG (i64 0),
+ (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Wed May 20 09:32:03 2015
@@ -137,10 +137,12 @@ define i64 @test12(<16 x i64>%a, <16 x i
}
;CHECK-LABEL: test13
-;CHECK: cmpl
-;CHECK: sbbl
-;CHECK: orl $65532
-;CHECK: ret
+;CHECK: cmpl %esi, %edi
+;CHECK: setb %al
+;CHECK: andl $1, %eax
+;CHECK: kmovw %eax, %k0
+;CHECK: movw $-4
+;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
@@ -167,19 +169,22 @@ define i64 @test14(<8 x i64>%a, <8 x i64
}
;CHECK-LABEL: test15
-;CHECK: kshiftlw
-;CHECK: kmovw
-;CHECK: ret
+;CHECK: movb (%rdi), %al
+;CHECK: andb $1, %al
+;CHECK: movw $-1, %ax
+;CHECK: cmovew
define i16 @test15(i1 *%addr) {
- %x = load i1 , i1 * %addr, align 128
+ %x = load i1 , i1 * %addr, align 1
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test16
-;CHECK: kshiftlw
-;CHECK: kshiftrw
+;CHECK: movb (%rdi), %al
+;CHECK: andw $1, %ax
+;CHECK: kmovw
+;CHECK: kshiftlw $10
;CHECK: korw
;CHECK: ret
define i16 @test16(i1 *%addr, i16 %a) {
@@ -191,11 +196,11 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
-;KNL: kshiftlw
-;KNL: kshiftrw
+;KNL: movb (%rdi), %al
+;KNL: andw $1, %ax
+;KNL: kshiftlw $4
;KNL: korw
-;SKX: kshiftlb
-;SKX: kshiftrb
+;SKX: kshiftlb $4
;SKX: korb
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed May 20 09:32:03 2015
@@ -191,7 +191,7 @@ false:
; SKX-LABEL: test7
; SKX: vpmovw2m
-; SKX: kmovw %eax, %k1
+; SKX: kmovb %eax, %k1
; SKX: korb
define void @test7(<8 x i1> %mask) {
@@ -282,3 +282,114 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x
ret <4 x i1>%c
}
+; KNL-LABEL: test12
+; KNL: movl %edi, %eax
+define i32 @test12(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 0
+ %c = select i1 %b, i32 %x, i32 %y
+ ret i32 %c
+}
+
+; KNL-LABEL: test13
+; KNL: movl %esi, %eax
+define i32 @test13(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 3
+ %c = select i1 %b, i32 %x, i32 %y
+ ret i32 %c
+}
+
+; SKX-LABEL: test14
+; SKX: movb $11, %al
+; SKX: kmovb %eax, %k0
+; SKX: vpmovm2d %k0, %xmm0
+
+define <4 x i1> @test14() {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 2
+ %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
+ ret <4 x i1> %c
+}
+
+; KNL-LABEL: test15
+; KNL: cmovgw
+define <16 x i1> @test15(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = bitcast i16 1 to <16 x i1>
+ %mask = icmp sgt i32 %x, %y
+ %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
+ ret <16 x i1> %c
+}
+
+; SKX-LABEL: test16
+; SKX: kxnorw %k1, %k1, %k1
+; SKX: kshiftrw $15, %k1, %k1
+; SKX: kshiftlq $5, %k1, %k1
+; SKX: korq %k1, %k0, %k0
+; SKX: vpmovm2b %k0, %zmm0
+define <64 x i8> @test16(i64 %x) {
+ %a = bitcast i64 %x to <64 x i1>
+ %b = insertelement <64 x i1>%a, i1 true, i32 5
+ %c = sext <64 x i1>%b to <64 x i8>
+ ret <64 x i8>%c
+}
+
+; SKX-LABEL: test17
+; SKX: setg %al
+; SKX: andl $1, %eax
+; SKX: kmovw %eax, %k1
+; SKX: kshiftlq $5, %k1, %k1
+; SKX: korq %k1, %k0, %k0
+; SKX: vpmovm2b %k0, %zmm0
+define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
+ %a = bitcast i64 %x to <64 x i1>
+ %b = icmp sgt i32 %y, %z
+ %c = insertelement <64 x i1>%a, i1 %b, i32 5
+ %d = sext <64 x i1>%c to <64 x i8>
+ ret <64 x i8>%d
+}
+
+; KNL-LABEL: test18
+define <8 x i1> @test18(i8 %a, i16 %y) {
+ %b = bitcast i8 %a to <8 x i1>
+ %b1 = bitcast i16 %y to <16 x i1>
+ %el1 = extractelement <16 x i1>%b1, i32 8
+ %el2 = extractelement <16 x i1>%b1, i32 9
+ %c = insertelement <8 x i1>%b, i1 %el1, i32 7
+ %d = insertelement <8 x i1>%c, i1 %el2, i32 6
+ ret <8 x i1>%d
+}
+
+; KNL-LABEL: test19
+; KNL: movzbl %dil, %eax
+; KNL: kmovw %eax, %k0
+; KNL: kshiftlw $13, %k0, %k0
+; KNL: kshiftrw $15, %k0, %k0
+; KNL: kmovw %k0, %eax
+; KNL: andl $1, %eax
+; KNL: testb %al, %al
+
+define <8 x i1> @test19(i8 %a) {
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
+ ret <8 x i1> %c
+}
+
+; KNL-LABEL: test20
+; KNL: movzbl %dil, %eax
+; KNL: kmovw %eax, %k0
+; KNL: kshiftlw $13, %k0, %k1
+; KNL: kshiftrw $15, %k1, %k1
+; KNL: kshiftlw $12, %k0, %k0
+; KNL: kshiftrw $15, %k0, %k0
+; KNL: kshiftlw $4, %k0, %k0
+; KNL: kshiftlw $1, %k1, %k2
+; KNL: korw %k0, %k2, %k0
+; KNL: kshiftlw $6, %k1, %k1
+; KNL: korw %k1, %k0, %k1
+define <8 x i1> @test20(i8 %a, i16 %y) {
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
+ ret <8 x i1> %c
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Wed May 20 09:32:03 2015
@@ -50,8 +50,10 @@ define <16 x double> @select04(<16 x dou
}
; CHECK-LABEL: select05
-; CHECK: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
+; CHECK: movzbl %sil, %eax
+; CHECK: kmovw %eax, %k0
+; CHECK: movzbl %dil, %eax
+; CHECK: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select05(i8 %a.0, i8 %m) {
@@ -63,8 +65,10 @@ define i8 @select05(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select06
-; CHECK: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
+; CHECK: movzbl %sil, %eax
+; CHECK: kmovw %eax, %k0
+; CHECK: movzbl %dil, %eax
+; CHECK: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select06(i8 %a.0, i8 %m) {
@@ -76,9 +80,12 @@ define i8 @select06(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select07
-; CHECK-DAG: kmovw %edx, %k0
-; CHECK-DAG: kmovw %edi, %k1
-; CHECK-DAG: kmovw %esi, %k2
+; CHECK-DAG: movzbl %dl, %eax
+; CHECK-DAG: kmovw %eax, %k0
+; CHECK-DAG: movzbl %dil, %eax
+; CHECK-DAG: kmovw %eax, %k1
+; CHECK-DAG: movzbl %sil, %eax
+; CHECK-DAG: kmovw %eax, %k2
; CHECK: kandw %k0, %k1, %k1
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kandw %k0, %k2, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll?rev=237793&r1=237792&r2=237793&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll Wed May 20 09:32:03 2015
@@ -156,10 +156,9 @@ define <16 x i16> @trunc_v16i32_to_v16i1
}
; CHECK-LABEL: trunc_i32_to_i1
-; CHECK: testb
-; CHECK: setne
-; CKECK: orl
-; CHECK: ret
+; CHECK: movw $-4, %ax
+; CHECK: kmovw %eax, %k1
+; CKECK: korw
define i16 @trunc_i32_to_i1(i32 %a) {
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
More information about the llvm-commits
mailing list