[llvm] r263111 - AVX-512: Fixed a bug in i1 vector zero extending. (Skylake-avx512)
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 10 05:44:23 PST 2016
Author: delena
Date: Thu Mar 10 07:44:22 2016
New Revision: 263111
URL: http://llvm.org/viewvc/llvm-project?rev=263111&view=rev
Log:
AVX-512: Fixed a bug in i1 vector zero extending. (Skylake-avx512)
(failed on instruction selection phase)
Differential Revision: http://reviews.llvm.org/D17924
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=263111&r1=263110&r2=263111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Mar 10 07:44:22 2016
@@ -13948,16 +13948,21 @@ static SDValue LowerZERO_EXTEND_AVX512(
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
assert(InVT.getVectorElementType() == MVT::i1);
- MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
+
+ // Extend VT if the target is 256 or 128bit vector and VLX is not supported.
+ MVT ExtVT = VT;
+ if (!VT.is512BitVector() && !Subtarget.hasVLX())
+ ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+
SDValue One =
DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
SDValue Zero =
DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);
- SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
- if (VT.is512BitVector())
- return V;
- return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
+ SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
+ if (VT == ExtVT)
+ return SelectedVal;
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
@@ -15047,16 +15052,15 @@ static SDValue LowerBoolVSETCC_AVX512(SD
}
}
-static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
- Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ assert(VT.getVectorElementType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
@@ -15194,26 +15198,26 @@ static SDValue LowerVSETCC(SDValue Op, c
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
+ // Operands are boolean (vectors of i1)
MVT OpVT = Op1.getSimpleValueType();
if (OpVT.getVectorElementType() == MVT::i1)
return LowerBoolVSETCC_AVX512(Op, DAG);
- bool MaskResult = (VT.getVectorElementType() == MVT::i1);
- if (Subtarget.hasAVX512()) {
- if (Op1.getSimpleValueType().is512BitVector() ||
- (Subtarget.hasBWI() && Subtarget.hasVLX()) ||
- (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
- return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
-
+ // The result is boolean, but operands are int/float
+ if (VT.getVectorElementType() == MVT::i1) {
// In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements in KNL.
- // We are not talking about 512-bit operands in this case, these
- // types are illegal.
- if (MaskResult &&
- (OpVT.getVectorElementType().getSizeInBits() < 32 &&
- OpVT.getVectorElementType().getSizeInBits() >= 8))
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
+ // In this case use SSE compare
+ bool UseAVX512Inst =
+ (OpVT.is512BitVector() ||
+ OpVT.getVectorElementType().getSizeInBits() >= 32 ||
+ (Subtarget.hasBWI() && Subtarget.hasVLX()));
+
+ if (UseAVX512Inst)
+ return LowerIntVSETCC_AVX512(Op, DAG);
+
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
// Lower using XOP integer comparisons.
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=263111&r1=263110&r2=263111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Thu Mar 10 07:44:22 2016
@@ -1879,3 +1879,47 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
+
+define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
+; KNL-LABEL: zext_64xi1_to_64xi8:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
+; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_64xi1_to_64xi8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
+; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %mask = icmp eq <64 x i8> %x, %y
+ %1 = zext <64 x i1> %mask to <64 x i8>
+ ret <64 x i8> %1
+}
+
+define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
+; KNL-LABEL: zext_4xi1_to_4x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4xi1_to_4x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1
+; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %mask = icmp eq <4 x i8> %x, %y
+ %1 = zext <4 x i1> %mask to <4 x i32>
+ ret <4 x i32> %1
+}
More information about the llvm-commits
mailing list