[llvm] r315587 - [X86][SSE] Pull out repeated INSERT_VECTOR_ELT code from LowerBUILD_VECTOR v16i8/v8i16 insertion. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 12 08:52:02 PDT 2017
Author: rksimon
Date: Thu Oct 12 08:52:01 2017
New Revision: 315587
URL: http://llvm.org/viewvc/llvm-project?rev=315587&view=rev
Log:
[X86][SSE] Pull out repeated INSERT_VECTOR_ELT code from LowerBUILD_VECTOR v16i8/v8i16 insertion. NFCI.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=315587&r1=315586&r2=315587&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Oct 12 08:52:01 2017
@@ -6149,6 +6149,49 @@ static SDValue getShuffleScalarElt(SDNod
return SDValue();
}
+// Use PINSRB/PINSRW/PINSRD to create a build vector.
+static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) ||
+ ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) &&
+ "Illegal vector insertion");
+
+ SDLoc dl(Op);
+ SDValue V;
+ bool First = true;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ if (!IsNonZero)
+ continue;
+
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
+ if (First) {
+ First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(VT, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(VT, V);
+ continue;
+ }
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i, dl));
+ }
+
+ return V;
+}
+
/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
@@ -6157,39 +6200,15 @@ static SDValue LowerBuildVectorv16i8(SDV
if (NumNonZero > 8 && !Subtarget.hasSSE41())
return SDValue();
+ // SSE4.1 - use PINSRB to insert each byte directly.
+ if (Subtarget.hasSSE41())
+ return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ Subtarget);
+
SDLoc dl(Op);
SDValue V;
bool First = true;
- // SSE4.1 - use PINSRB to insert each byte directly.
- if (Subtarget.hasSSE41()) {
- for (unsigned i = 0; i < 16; ++i) {
- bool IsNonZero = (NonZeros & (1 << i)) != 0;
- if (IsNonZero) {
- // If the build vector contains zeros or our first insertion is not the
- // first index then insert into zero vector to break any register
- // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
- if (First) {
- First = false;
- if (NumZero || 0 != i)
- V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
- else {
- assert(0 == i && "Expected insertion into zero-index");
- V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
- V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
- V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
- V = DAG.getBitcast(MVT::v16i8, V);
- continue;
- }
- }
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
- Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
- }
- }
-
- return V;
- }
-
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
@@ -6245,34 +6264,9 @@ static SDValue LowerBuildVectorv8i16(SDV
if (NumNonZero > 4 && !Subtarget.hasSSE41())
return SDValue();
- SDLoc dl(Op);
- SDValue V;
- bool First = true;
- for (unsigned i = 0; i < 8; ++i) {
- bool IsNonZero = (NonZeros & (1 << i)) != 0;
- if (IsNonZero) {
- // If the build vector contains zeros or our first insertion is not the
- // first index then insert into zero vector to break any register
- // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
- if (First) {
- First = false;
- if (NumZero || 0 != i)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
- else {
- assert(0 == i && "Expected insertion into zero-index");
- V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
- V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
- V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
- V = DAG.getBitcast(MVT::v8i16, V);
- continue;
- }
- }
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
- Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
- }
- }
-
- return V;
+ // Use PINSRW to insert each byte directly.
+ return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
+ Subtarget);
}
/// Custom lower build_vector of v4i32 or v4f32.
More information about the llvm-commits
mailing list