[llvm-commits] [llvm] r58964 - in /llvm/trunk: docs/LangRef.html lib/Bitcode/Reader/BitcodeReader.cpp lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp lib/Transforms/Scalar/InstructionCombining.cpp lib/VMCore/ConstantFold.cpp lib/VMCore/Instructions.cpp lib/VMCore/Verifier.cpp

Wed Nov 12 09:05:32 PST 2008

Hi Mon Ping, what do you think of this for splitting
vector shuffle?  I didn't bother trying to handle
non-power-of-two vector sizes since that is (a) awkward
in this setup, and (b) they are going away soon anyway
thanks to widening.

Ciao,

Duncan.

PS: I've attached the final function, since that may
be easier to read.
PPS: I didn't test it much but it seems to work!

Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================

--- llvm.orig/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	2008-11-12 11:18:51.000000000 +0100
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp	2008-11-12 17:56:08.000000000 +0100
@@ -650,52 +650,110 @@
 
 void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
                                                   SDValue &Hi) {
-  // Build the low part.
+  // The low and high parts of the original input give four input vectors.
+  SDValue Inputs[4];
+  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+  MVT NewVT = Inputs[0].getValueType();
+  unsigned NewElts = NewVT.getVectorNumElements();
+  assert(NewVT == Inputs[1].getValueType() &&
+         "Non power-of-two vectors not supported!");
+
+  // If Lo or Hi uses elements from at most two of the four input vectors, then
+  // express it as a vector shuffle of those two inputs.  Otherwise extract the
+  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
   SDValue Mask = N->getOperand(2);
-  SmallVector<SDValue, 16> Ops;
-  MVT LoVT, HiVT;
-  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
-  MVT EltVT = LoVT.getVectorElementType();
-  unsigned LoNumElts = LoVT.getVectorNumElements();
-  unsigned NumElements = Mask.getNumOperands();
-
-  // Insert all of the elements from the input that are needed.  We use
-  // buildvector of extractelement here because the input vectors will have
-  // to be legalized, so this makes the code simpler.
-  for (unsigned i = 0; i != LoNumElts; ++i) {
-    SDValue Arg = Mask.getOperand(i);
-    if (Arg.getOpcode() == ISD::UNDEF) {
-      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
-    } else {
-      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
-      SDValue InVec = N->getOperand(0);
-      if (Idx >= NumElements) {
-        InVec = N->getOperand(1);
-        Idx -= NumElements;
+  MVT IdxVT = Mask.getValueType().getVectorElementType();
+  MVT EltVT = NewVT.getVectorElementType();
+  SmallVector<SDValue, 16> BuildOps;
+  SmallVector<SDValue, 16> MaskOps;
+  for (unsigned High = 0; High < 2; ++High) {
+    SDValue &Output = High ? Hi : Lo;
+
+    // Build a shuffle mask for the output, discovering on the fly which
+    // input vectors to use as shuffle operands (recorded in InputUsed).
+    // At the same time, accumulate the final vector elements in BuildOps,
+    // for use if building a suitable shuffle vector proves too hard.
+    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+    unsigned FirstMaskIdx = High * NewElts;
+    bool useBuildVector = false;
+    for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+      SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
+      if (Arg.getOpcode() == ISD::UNDEF) {
+        MaskOps.push_back(Arg);
+        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+        continue;
       }
-      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InVec,
-                                DAG.getIntPtrConstant(Idx)));
+
+      // The mask element.  This indexes into the input.
+      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+      // The input vector this mask element indexes into.
+      unsigned Input = Idx / NewElts;
+
+      if (Input >= array_lengthof(Inputs)) {
+        // The mask element indexes off the end of the input!  This is illegal
+        // but handle it anyway.
+        MaskOps.push_back(DAG.getNode(ISD::UNDEF, IdxVT));
+        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+        continue;
+      }
+
+      // Turn the index into an offset from the start of the input vector.
+      Idx -= Input * NewElts;
+
+      // Extract the vector element by hand, and record it for later use.
+      BuildOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
+                                     Inputs[Input],
+                                     DAG.getIntPtrConstant(Idx)));
+
+      if (useBuildVector)
+        // The following logic is only needed when creating a new shuffle, so
+        // skip it for a small speedup if we won't be building a vector shuffle.
+        continue;
+
+      // Find or create a shuffle vector operand to hold this input.
+      unsigned OpNo;
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+        if (InputUsed[OpNo] == Input) {
+          // This input vector is already an operand.
+          break;
+        } else if (InputUsed[OpNo] == -1U) {
+          // Create a new operand for this input vector.
+          InputUsed[OpNo] = Input;
+          break;
+        }
+      }
+
+      if (OpNo < array_lengthof(InputUsed))
+        // Add the mask index for the new shuffle vector.
+        MaskOps.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
+      else
+        // More than two input vectors used!  Give up on trying to create a
+        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
+        useBuildVector = true;
     }
-  }
-  Lo = DAG.getNode(ISD::BUILD_VECTOR, LoVT, &Ops[0], Ops.size());
-  Ops.clear();
 
-  for (unsigned i = LoNumElts; i != NumElements; ++i) {
-    SDValue Arg = Mask.getOperand(i);
-    if (Arg.getOpcode() == ISD::UNDEF) {
-      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+    if (useBuildVector) {
+      Output = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &BuildOps[0],
+                           BuildOps.size());
+    } else if (InputUsed[0] == -1U) {
+      // No input vectors were used!  The result is undefined.
+      Output = DAG.getNode(ISD::UNDEF, NewVT);
     } else {
-      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
-      SDValue InVec = N->getOperand(0);
-      if (Idx >= NumElements) {
-        InVec = N->getOperand(1);
-        Idx -= NumElements;
-      }
-      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InVec,
-                                DAG.getIntPtrConstant(Idx)));
+      // At least one input vector was used.  Create a new shuffle vector.
+      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR,
+                                    MVT::getVectorVT(IdxVT, MaskOps.size()),
+                                    &MaskOps[0], MaskOps.size());
+      SDValue Op0 = Inputs[InputUsed[0]];
+      // If only one input was used, use an undefined vector for the other.
+      SDValue Op1 = InputUsed[1] == -1U ?
+        DAG.getNode(ISD::UNDEF, NewVT) : Inputs[InputUsed[1]];
+      Output = DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, Op0, Op1, NewMask);
     }
+
+    BuildOps.clear();
+    MaskOps.clear();
   }
-  Hi = DAG.getNode(ISD::BUILD_VECTOR, HiVT, &Ops[0], Ops.size());
 }
 
 void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo,
-------------- next part --------------
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
                                                  SDValue &Hi) {
  // The low and high parts of the original input give four input vectors.
  SDValue Inputs[4];
  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
  MVT NewVT = Inputs[0].getValueType();
  unsigned NewElts = NewVT.getVectorNumElements();
  assert(NewVT == Inputs[1].getValueType() &&
         "Non power-of-two vectors not supported!");

  // If Lo or Hi uses elements from at most two of the four input vectors, then
  // express it as a vector shuffle of those two inputs.  Otherwise extract the
  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
  SDValue Mask = N->getOperand(2);
  MVT IdxVT = Mask.getValueType().getVectorElementType();
  MVT EltVT = NewVT.getVectorElementType();
  SmallVector<SDValue, 16> BuildOps;
  SmallVector<SDValue, 16> MaskOps;
  for (unsigned High = 0; High < 2; ++High) {
    SDValue &Output = High ? Hi : Lo;

    // Build a shuffle mask for the output, discovering on the fly which
    // input vectors to use as shuffle operands (recorded in InputUsed).
    // At the same time, accumulate the final vector elements in BuildOps,
    // for use if building a suitable shuffle vector proves too hard.
    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
    unsigned FirstMaskIdx = High * NewElts;
    bool useBuildVector = false;
    for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
      SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
      if (Arg.getOpcode() == ISD::UNDEF) {
        MaskOps.push_back(Arg);
        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
        continue;
      }

      // The mask element.  This indexes into the input.
      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
      // The input vector this mask element indexes into.
      unsigned Input = Idx / NewElts;

      if (Input >= array_lengthof(Inputs)) {
        // The mask element indexes off the end of the input!  This is illegal
        // but handle it anyway.
        MaskOps.push_back(DAG.getNode(ISD::UNDEF, IdxVT));
        BuildOps.push_back(DAG.getNode(ISD::UNDEF, EltVT));
        continue;
      }

      // Turn the index into an offset from the start of the input vector.
      Idx -= Input * NewElts;

      // Extract the vector element by hand, and record it for later use.
      BuildOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT,
                                     Inputs[Input],
                                     DAG.getIntPtrConstant(Idx)));

      if (useBuildVector)
        // The following logic is only needed when creating a new shuffle, so
        // skip it for a small speedup if we won't be building a vector shuffle.
        continue;

      // Find or create a shuffle vector operand to hold this input.
      unsigned OpNo;
      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
        if (InputUsed[OpNo] == Input) {
          // This input vector is already an operand.
          break;
        } else if (InputUsed[OpNo] == -1U) {
          // Create a new operand for this input vector.
          InputUsed[OpNo] = Input;
          break;
        }
      }

      if (OpNo < array_lengthof(InputUsed))
        // Add the mask index for the new shuffle vector.
        MaskOps.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
      else
        // More than two input vectors used!  Give up on trying to create a
        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
        useBuildVector = true;
    }

    if (useBuildVector) {
      Output = DAG.getNode(ISD::BUILD_VECTOR, NewVT, &BuildOps[0],
                           BuildOps.size());
    } else if (InputUsed[0] == -1U) {
      // No input vectors were used!  The result is undefined.
      Output = DAG.getNode(ISD::UNDEF, NewVT);
    } else {
      // At least one input vector was used.  Create a new shuffle vector.
      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR,
                                    MVT::getVectorVT(IdxVT, MaskOps.size()),
                                    &MaskOps[0], MaskOps.size());
      SDValue Op0 = Inputs[InputUsed[0]];
      // If only one input was used, use an undefined vector for the other.
      SDValue Op1 = InputUsed[1] == -1U ?
        DAG.getNode(ISD::UNDEF, NewVT) : Inputs[InputUsed[1]];
      Output = DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, Op0, Op1, NewMask);
    }

    BuildOps.clear();
    MaskOps.clear();
  }
}