[llvm] r213830 - [AArch64] Fix a bug generating incorrect instruction when building small vector.

Wed Jul 23 19:05:43 PDT 2014

Author: kevinqin
Date: Wed Jul 23 21:05:42 2014
New Revision: 213830

URL: http://llvm.org/viewvc/llvm-project?rev=213830&view=rev
Log:
[AArch64] Fix a bug generating incorrect instruction when building small vector.

This bug is introduced by r211144. The element of operand may be
smaller than the element of result, but previous commit can
only handle the contrary condition. This commit is to handle this
scenario and generate optimized codes like ZIP1.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/neon-perm.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=213830&r1=213829&r2=213830&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Jul 23 21:05:42 2014
@@ -4180,9 +4180,22 @@ SDValue AArch64TargetLowering::Reconstru
   if (SourceVecs.size() > 2)
     return SDValue();
 
-  SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  // Find out the smallest element size among result and two sources, and use
+  // it as element size to build the shuffle_vector.
+  EVT SmallestEltTy = VT.getVectorElementType();
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
+    if (SrcEltTy.bitsLT(SmallestEltTy)) {
+      SmallestEltTy = SrcEltTy;
+    }
+  }
+  unsigned ResMultiplier =
+      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
   int VEXTOffsets[2] = { 0, 0 };
   int OffsetMultipliers[2] = { 1, 1 };
+  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
 
   // This loop extracts the usage patterns of the source vectors
   // and prepares appropriate SDValues for a shuffle if possible.
@@ -4190,15 +4203,15 @@ SDValue AArch64TargetLowering::Reconstru
     unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
     SDValue CurSource = SourceVecs[i];
     if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType()) {
-      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
-      // Then bitcast it to the vector which holds asserted element type,
-      // and record the multiplier of element width between SourceVecs and
-      // Build_vector which is needed to extract the correct lanes later.
-      EVT CastVT =
-          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-                           SourceVecs[i].getValueSizeInBits() /
-                               VT.getVectorElementType().getSizeInBits());
+        ShuffleVT.getVectorElementType()) {
+      // As ShuffleVT holds smallest element size, it may hit here only if
+      // the element type of SourceVecs is bigger than that of ShuffleVT.
+      // Adjust the element size of SourceVecs to match ShuffleVT, and record
+      // the multipliers.
+      EVT CastVT = EVT::getVectorVT(
+          *DAG.getContext(), ShuffleVT.getVectorElementType(),
+          SourceVecs[i].getValueSizeInBits() /
+              ShuffleVT.getVectorElementType().getSizeInBits());
 
       CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
       OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
@@ -4207,7 +4220,7 @@ SDValue AArch64TargetLowering::Reconstru
       MinElts[i] *= OffsetMultipliers[i];
     }
 
-    if (CurSource.getValueType() == VT) {
+    if (CurSource.getValueType() == ShuffleVT) {
       // No VEXT necessary
       ShuffleSrcs[i] = CurSource;
       VEXTOffsets[i] = 0;
@@ -4215,8 +4228,9 @@ SDValue AArch64TargetLowering::Reconstru
     } else if (NumSrcElts < NumElts) {
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
-                                   DAG.getUNDEF(CurSource.getValueType()));
+      ShuffleSrcs[i] =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
+                      DAG.getUNDEF(CurSource.getValueType()));
       continue;
     }
 
@@ -4233,50 +4247,61 @@ SDValue AArch64TargetLowering::Reconstru
     if (MinElts[i] >= NumElts) {
       // The extraction can just take the second half
       VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(NumElts));
     } else if (MaxElts[i] < NumElts) {
       // The extraction can just take the first half
       VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(0));
     } else {
       // An actual VEXT is needed
       VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(NumElts));
       unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
-      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(Imm, MVT::i32));
+      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
+                                   VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
     }
   }
 
   SmallVector<int, 8> Mask;
+  unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();
 
-  for (unsigned i = 0; i < NumElts; ++i) {
+  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
     SDValue Entry = Op.getOperand(i);
-    if (Entry.getOpcode() == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
-    }
-
-    SDValue ExtractVec = Entry.getOperand(0);
-    int ExtractElt =
-        cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
-    if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
-    } else {
-      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
-                     VEXTOffsets[1]);
+    int SourceNum = 1;
+    unsigned LanePartNum = 0;
+    int ExtractElt;
+    if (Entry.getOpcode() != ISD::UNDEF) {
+      // Check how many parts of source lane should be inserted.
+      SDValue ExtractVec = Entry.getOperand(0);
+      if (ExtractVec == SourceVecs[0])
+        SourceNum = 0;
+      ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+      unsigned ExtEltSize =
+          ExtractVec.getValueType().getVectorElementType().getSizeInBits();
+      unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
+      LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
+    }
+
+    for (unsigned j = 0; j != ResMultiplier; ++j) {
+      if (j < LanePartNum)
+        Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
+                       NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
+      else
+        Mask.push_back(-1);
     }
   }
 
   // Final check before we try to produce nonsense...
-  if (isShuffleMaskLegal(Mask, VT))
-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
-                                &Mask[0]);
+  if (isShuffleMaskLegal(Mask, ShuffleVT)) {
+    SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
+                                           ShuffleSrcs[1], &Mask[0]);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+  }
 
   return SDValue();
 }

Modified: llvm/trunk/test/CodeGen/AArch64/neon-perm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-perm.ll?rev=213830&r1=213829&r2=213830&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-perm.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-perm.ll Wed Jul 23 21:05:42 2014
@@ -1387,6 +1387,13 @@ entry:
   ret <8 x i16> %shuffle.i
 }
 
+define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
+; CHECK-LABEL: test_vzip1_v4i8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i8> %lo
+}
+
 define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
 ; CHECK-LABEL: test_same_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b