[llvm] r320267 - [X86] Improve lowering of vXi1 insert_subvectors to better utilize (insert_subvector zero, vec, 0) for zeroing upper bits.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 9 14:44:42 PST 2017
Author: ctopper
Date: Sat Dec 9 14:44:42 2017
New Revision: 320267
URL: http://llvm.org/viewvc/llvm-project?rev=320267&view=rev
Log:
[X86] Improve lowering of vXi1 insert_subvectors to better utilize (insert_subvector zero, vec, 0) for zeroing upper bits.
This can be better recognized during isel when the producer already zeroed the upper bits.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320267&r1=320266&r2=320267&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 9 14:44:42 2017
@@ -5013,6 +5013,10 @@ static SDValue insert1BitVector(SDValue
if (!isa<ConstantSDNode>(Idx))
return SDValue();
+ // Inserting undef is a nop. We can just return the original vector.
+ if (SubVec.isUndef())
+ return Vec;
+
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
return Op;
@@ -5020,19 +5024,21 @@ static SDValue insert1BitVector(SDValue
MVT OpVT = Op.getSimpleValueType();
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+
+ // Extend to natively supported kshift.
+ MVT WideOpVT = OpVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
+ WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+
// Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
// if necessary.
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
- if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
- // Need to promote to v16i1, do the insert, then extract back.
- Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- getZeroVector(MVT::v16i1, Subtarget, DAG, dl),
- SubVec, Idx);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
- DAG.getIntPtrConstant(0, dl));
- }
-
- return Op;
+ // May need to promote to a legal type.
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ SubVec, Idx);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
MVT SubVecVT = SubVec.getSimpleValueType();
@@ -5042,30 +5048,32 @@ static SDValue insert1BitVector(SDValue
IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR");
- // extend to natively supported kshift
- MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
- MVT WideOpVT = OpVT;
- if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
- WideOpVT = MinVT;
-
- SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Undef = DAG.getUNDEF(WideOpVT);
- SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- Undef, SubVec, ZeroIdx);
- // Extract sub-vector if require.
- auto ExtractSubVec = [&](SDValue V) {
- return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
- OpVT, V, ZeroIdx);
- };
+ if (IdxVal == 0) {
+ // Zero lower bits of the Vec
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
+ ZeroIdx);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ // Merge them together, SubVec should be zero extended.
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ SubVec, ZeroIdx);
+ Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op,
+ ZeroIdx);
+ }
+
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ Undef, SubVec, ZeroIdx);
if (Vec.isUndef()) {
- if (IdxVal != 0) {
- SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
- WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- ShiftBits);
- }
- return ExtractSubVec(WideSubVec);
+ assert(IdxVal != 0 && "Unexpected index");
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
@@ -5073,48 +5081,60 @@ static SDValue insert1BitVector(SDValue
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
- DAG.getConstant(ShiftRight, dl, MVT::i8));
- return ExtractSubVec(Vec);
- }
-
- if (IdxVal == 0) {
- // Zero lower bits of the Vec
- SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
- // Merge them together, SubVec should be zero extended.
- WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
- getZeroVector(WideOpVT, Subtarget, DAG, dl),
- SubVec, ZeroIdx);
- Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
- return ExtractSubVec(Vec);
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftRight, dl, MVT::i8));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
- // Zero upper bits of the Vec
- WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- NumElems = WideOpVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
- return ExtractSubVec(Vec);
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ if (SubVecNumElems * 2 == NumElems) {
+ // Special case, use legal zero extending insert_subvector. This allows
+ // isel to opimitize when bits are known zero.
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ getZeroVector(WideOpVT, Subtarget, DAG, dl),
+ Vec, ZeroIdx);
+ } else {
+ // Otherwise use explicit shifts to zero the bits.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ Undef, Vec, ZeroIdx);
+ NumElems = WideOpVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ }
+ Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
- // Subvector should be inserted in the middle - use shuffle
- WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- SmallVector<int, 64> Mask;
- for (unsigned i = 0; i < NumElems; ++i)
- Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
- i : i + NumElems);
- return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
+
+ // Inserting into the middle is more complicated.
+
+ NumElems = WideOpVT.getVectorNumElements();
+
+ // Widen the vector if needed.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
+ // Move the current value of the bit to be replace to the lsbs.
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ // Xor with the new bit.
+ Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
+ // Shift to MSB, filling bottom bits with 0.
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ // Shift to the final position, filling upper bits with 0.
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
+ DAG.getConstant(ShiftRight, dl, MVT::i8));
+ // Xor with original vector leaving the new value.
+ Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
+ // Reduce to original width if needed.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
Modified: llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll?rev=320267&r1=320266&r2=320267&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-skx-insert-subvec.ll Sat Dec 9 14:44:42 2017
@@ -56,14 +56,12 @@ define <8 x i1> @test3(<4 x i1> %a) {
define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
+; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $4, %k1, %k1
; CHECK-NEXT: kshiftlb $4, %k0, %k0
-; CHECK-NEXT: kshiftrb $4, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
@@ -74,14 +72,12 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x
define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
+; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $2, %k1, %k1
-; CHECK-NEXT: kshiftlb $6, %k0, %k0
-; CHECK-NEXT: kshiftrb $6, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: kshiftlb $2, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: retq
@@ -92,14 +88,12 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x
define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
+; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $2, %k1, %k1
-; CHECK-NEXT: kshiftlb $6, %k0, %k0
-; CHECK-NEXT: kshiftrb $6, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: kshiftlb $2, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2b %k0, %xmm0
; CHECK-NEXT: retq
@@ -110,14 +104,12 @@ define <16 x i1> @test6(<2 x i1> %a, <2
define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
+; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: vpslld $31, %xmm1, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1
-; CHECK-NEXT: kshiftlb $4, %k1, %k1
; CHECK-NEXT: kshiftlb $4, %k0, %k0
-; CHECK-NEXT: kshiftrb $4, %k0, %k0
-; CHECK-NEXT: korb %k1, %k0, %k0
+; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: vpmovm2b %k0, %ymm0
; CHECK-NEXT: retq
More information about the llvm-commits
mailing list