[llvm] r322498 - [Hexagon] Rewrite LowerVECTOR_SHUFFLE for 32-/64-bit vectors

Mon Jan 15 10:33:33 PST 2018

Author: kparzysz
Date: Mon Jan 15 10:33:33 2018
New Revision: 322498

URL: http://llvm.org/viewvc/llvm-project?rev=322498&view=rev
Log:
[Hexagon] Rewrite LowerVECTOR_SHUFFLE for 32-/64-bit vectors

The old implementation was not always correct. The new one recognizes
more shuffles that match specific instructions.

Added:
    llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll
    llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp?rev=322498&r1=322497&r2=322498&view=diff
==============================================================================

--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp Mon Jan 15 10:33:33 2018
@@ -1999,6 +1999,7 @@ HexagonTargetLowering::HexagonTargetLowe
 
   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
 
@@ -2377,49 +2378,125 @@ HexagonTargetLowering::getPreferredVecto
 SDValue
 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
       const {
-  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-
-  if (V2.isUndef())
-    V2 = V1;
-
-  if (SVN->isSplat()) {
-    int Lane = SVN->getSplatIndex();
-    if (Lane == -1) Lane = 0;
-
-    // Test if V1 is a SCALAR_TO_VECTOR.
-    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
-
-    // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
-    // (and probably will turn into a SCALAR_TO_VECTOR once legalization
-    // reaches it).
-    if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
-        !isa<ConstantSDNode>(V1.getOperand(0))) {
-      bool IsScalarToVector = true;
-      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) {
-        if (!V1.getOperand(i).isUndef()) {
-          IsScalarToVector = false;
-          break;
-        }
-      }
-      if (IsScalarToVector)
-        return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
+  const auto *SVN = cast<ShuffleVectorSDNode>(Op);
+  ArrayRef<int> AM = SVN->getMask();
+  assert(AM.size() <= 8 && "Unexpected shuffle mask");
+  unsigned VecLen = AM.size();
+
+  MVT VecTy = ty(Op);
+  assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
+
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  // If the inputs are not the same as the output, bail. This is not an
+  // error situation, but complicates the handling and the default expansion
+  // (into BUILD_VECTOR) should be adequate.
+  if (ty(Op0) != VecTy || ty(Op1) != VecTy)
+    return SDValue();
+
+  // Normalize the mask so that the first non-negative index comes from
+  // the first operand.
+  SmallVector<int,8> Mask(AM.begin(), AM.end());
+  unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
+  if (F == AM.size())
+    return DAG.getUNDEF(VecTy);
+  if (AM[F] >= int(VecLen)) {
+    ShuffleVectorSDNode::commuteMask(Mask);
+    std::swap(Op0, Op1);
+  }
+
+  // Express the shuffle mask in terms of bytes.
+  SmallVector<int,8> ByteMask;
+  unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
+  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+    int M = Mask[i];
+    if (M < 0) {
+      for (unsigned j = 0; j != ElemBytes; ++j)
+        ByteMask.push_back(-1);
+    } else {
+      for (unsigned j = 0; j != ElemBytes; ++j)
+        ByteMask.push_back(M*ElemBytes + j);
     }
-    return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
-                       DAG.getConstant(Lane, dl, MVT::i32));
   }
+  assert(ByteMask.size() <= 8);
 
-  // FIXME: We need to support more general vector shuffles.  See
-  // below the comment from the ARM backend that deals in the general
-  // case with the vector shuffles.  For now, let expand handle these.
-  return SDValue();
+  // All non-undef (non-negative) indexes are well within [0..127], so they
+  // fit in a single byte. Build two 64-bit words:
+  // - MaskIdx where each byte is the corresponding index (for non-negative
+  //   indexes), and 0xFF for negative indexes, and
+  // - MaskUnd that has 0xFF for each negative index.
+  uint64_t MaskIdx = 0;
+  uint64_t MaskUnd = 0;
+  for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
+    unsigned S = 8*i;
+    uint64_t M = ByteMask[i] & 0xFF;
+    if (M == 0xFF)
+      MaskUnd |= M << S;
+    MaskIdx |= M << S;
+  }
+
+  const SDLoc &dl(Op);
+
+  if (ByteMask.size() == 4) {
+    // Identity.
+    if (MaskIdx == (0x03020100 | MaskUnd))
+      return Op0;
+    // Byte swap.
+    if (MaskIdx == (0x00010203 | MaskUnd)) {
+      SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
+      SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
+      return DAG.getBitcast(VecTy, T1);
+    }
+
+    // Byte packs.
+    SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
+                                   typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
+    if (MaskIdx == (0x06040200 | MaskUnd))
+      return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
+    if (MaskIdx == (0x07050301 | MaskUnd))
+      return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
+
+    SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
+                                   typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
+    if (MaskIdx == (0x02000604 | MaskUnd))
+      return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
+    if (MaskIdx == (0x03010705 | MaskUnd))
+      return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
+  }
+
+  if (ByteMask.size() == 8) {
+    // Identity.
+    if (MaskIdx == (0x0706050403020100ull | MaskUnd))
+      return Op0;
+    // Byte swap.
+    if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
+      SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
+      SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
+      return DAG.getBitcast(VecTy, T1);
+    }
 
-  // If the shuffle is not directly supported and it has 4 elements, use
-  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+    // Halfword picks.
+    if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
+      return getNode(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
+    if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
+      return getNode(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
+    if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
+      return getNode(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
+    if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
+      return getNode(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
+    if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
+      VectorPair P = opSplit(Op0, dl, DAG);
+      return getNode(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
+    }
+
+    // Byte packs.
+    if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
+      return getNode(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
+    if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
+      return getNode(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
+  }
+
+  return SDValue();
 }
 
 // If BUILD_VECTOR has same base element repeated several times,

Added: llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll?rev=322498&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll Mon Jan 15 10:33:33 2018
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: test_00:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_00(<4 x i8> %a0) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_01:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_01(<4 x i8> %a0) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_02:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_02(<4 x i8> %a0) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_03:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_03(<4 x i8> %a0) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_10:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_10(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_11:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_11(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 2, i32 4, i32 undef>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_12:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_12(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 undef, i32 4, i32 6>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_13:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_13(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_20:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_20(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_21:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_21(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_22:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_22(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 undef, i32 5, i32 7>
+  ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_23:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_23(<4 x i8> %a0, <4 x i8> %a1) {
+  %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 undef>
+  ret <4 x i8> %p
+}
+

Added: llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll?rev=322498&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll Mon Jan 15 10:33:33 2018
@@ -0,0 +1,66 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: test_00:
+; CHECK-DAG: r[[REG00:[0-9]+]] = swiz(r0)
+; CHECK-DAG: r[[REG01:[0-9]+]] = swiz(r1)
+; CHECK: r1:0 = combine(r[[REG00]],r[[REG01]])
+define <8 x i8> @test_00(<8 x i8> %a0) {
+  %p = shufflevector <8 x i8> %a0, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %p
+}
+
+; CHECK-LABEL: test_10:
+; CHECK: r1:0 = packhl(r1,r0)
+define <4 x i16> @test_10(<4 x i16> %a0) {
+  %p = shufflevector <4 x i16> %a0, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_11:
+; CHECK: r1:0 = packhl(r1,r0)
+define <4 x i16> @test_11(<4 x i16> %a0) {
+  %p = shufflevector <4 x i16> undef, <4 x i16> %a0, <4 x i32> <i32 4, i32 6, i32 5, i32 7>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_20:
+; CHECK: r1:0 = shuffeh(r3:2,r1:0)
+define <4 x i16> @test_20(<4 x i16> %a0, <4 x i16> %a1) {
+  %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_30:
+; CHECK: r1:0 = shuffoh(r3:2,r1:0)
+define <4 x i16> @test_30(<4 x i16> %a0, <4 x i16> %a1) {
+  %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_40:
+; CHECK: r1:0 = vtrunewh(r3:2,r1:0)
+define <4 x i16> @test_40(<4 x i16> %a0, <4 x i16> %a1) {
+  %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_50:
+; CHECK: r1:0 = vtrunowh(r3:2,r1:0)
+define <4 x i16> @test_50(<4 x i16> %a0, <4 x i16> %a1) {
+  %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_60:
+; r1:0 = shuffeb(r3:2,r1:0)
+define <8 x i8> @test_60(<8 x i8> %a0, <8 x i8> %a1) {
+  %p = shufflevector <8 x i8> %a0, <8 x i8> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %p
+}
+
+; CHECK-LABEL: test_70:
+; r1:0 = shuffob(r3:2,r1:0)
+define <8 x i8> @test_70(<8 x i8> %a0, <8 x i8> %a1) {
+  %p = shufflevector <8 x i8> %a0, <8 x i8> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %p
+}