[llvm] r322498 - [Hexagon] Rewrite LowerVECTOR_SHUFFLE for 32-/64-bit vectors
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 10:33:33 PST 2018
Author: kparzysz
Date: Mon Jan 15 10:33:33 2018
New Revision: 322498
URL: http://llvm.org/viewvc/llvm-project?rev=322498&view=rev
Log:
[Hexagon] Rewrite LowerVECTOR_SHUFFLE for 32-/64-bit vectors
The old implementation was not always correct. The new one recognizes
more shuffles that match specific instructions.
Added:
llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll
llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp?rev=322498&r1=322497&r2=322498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp Mon Jan 15 10:33:33 2018
@@ -1999,6 +1999,7 @@ HexagonTargetLowering::HexagonTargetLowe
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
@@ -2377,49 +2378,125 @@ HexagonTargetLowering::getPreferredVecto
SDValue
HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
const {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
-
- if (V2.isUndef())
- V2 = V1;
-
- if (SVN->isSplat()) {
- int Lane = SVN->getSplatIndex();
- if (Lane == -1) Lane = 0;
-
- // Test if V1 is a SCALAR_TO_VECTOR.
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
-
- // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
- // (and probably will turn into a SCALAR_TO_VECTOR once legalization
- // reaches it).
- if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
- !isa<ConstantSDNode>(V1.getOperand(0))) {
- bool IsScalarToVector = true;
- for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) {
- if (!V1.getOperand(i).isUndef()) {
- IsScalarToVector = false;
- break;
- }
- }
- if (IsScalarToVector)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
+ const auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> AM = SVN->getMask();
+ assert(AM.size() <= 8 && "Unexpected shuffle mask");
+ unsigned VecLen = AM.size();
+
+ MVT VecTy = ty(Op);
+ assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ // If the inputs are not the same as the output, bail. This is not an
+ // error situation, but complicates the handling and the default expansion
+ // (into BUILD_VECTOR) should be adequate.
+ if (ty(Op0) != VecTy || ty(Op1) != VecTy)
+ return SDValue();
+
+ // Normalize the mask so that the first non-negative index comes from
+ // the first operand.
+ SmallVector<int,8> Mask(AM.begin(), AM.end());
+ unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
+ if (F == AM.size())
+ return DAG.getUNDEF(VecTy);
+ if (AM[F] >= int(VecLen)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(Op0, Op1);
+ }
+
+ // Express the shuffle mask in terms of bytes.
+ SmallVector<int,8> ByteMask;
+ unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(-1);
+ } else {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(M*ElemBytes + j);
}
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
- DAG.getConstant(Lane, dl, MVT::i32));
}
+ assert(ByteMask.size() <= 8);
- // FIXME: We need to support more general vector shuffles. See
- // below the comment from the ARM backend that deals in the general
- // case with the vector shuffles. For now, let expand handle these.
- return SDValue();
+ // All non-undef (non-negative) indexes are well within [0..127], so they
+ // fit in a single byte. Build two 64-bit words:
+ // - MaskIdx where each byte is the corresponding index (for non-negative
+ // indexes), and 0xFF for negative indexes, and
+ // - MaskUnd that has 0xFF for each negative index.
+ uint64_t MaskIdx = 0;
+ uint64_t MaskUnd = 0;
+ for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
+ unsigned S = 8*i;
+ uint64_t M = ByteMask[i] & 0xFF;
+ if (M == 0xFF)
+ MaskUnd |= M << S;
+ MaskIdx |= M << S;
+ }
+
+ const SDLoc &dl(Op);
+
+ if (ByteMask.size() == 4) {
+ // Identity.
+ if (MaskIdx == (0x03020100 | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x00010203 | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
+
+ // Byte packs.
+ SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
+ if (MaskIdx == (0x06040200 | MaskUnd))
+ return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
+ if (MaskIdx == (0x07050301 | MaskUnd))
+ return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
+
+ SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
+ if (MaskIdx == (0x02000604 | MaskUnd))
+ return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
+ if (MaskIdx == (0x03010705 | MaskUnd))
+ return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
+ }
+
+ if (ByteMask.size() == 8) {
+ // Identity.
+ if (MaskIdx == (0x0706050403020100ull | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
- // If the shuffle is not directly supported and it has 4 elements, use
- // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ // Halfword picks.
+ if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
+ return getNode(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
+ return getNode(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
+ return getNode(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
+ return getNode(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
+ VectorPair P = opSplit(Op0, dl, DAG);
+ return getNode(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
+ }
+
+ // Byte packs.
+ if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
+ return getNode(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
+ return getNode(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
+ }
+
+ return SDValue();
}
// If BUILD_VECTOR has same base element repeated several times,
Added: llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll?rev=322498&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/shuff-32.ll Mon Jan 15 10:33:33 2018
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: test_00:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_00(<4 x i8> %a0) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_01:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_01(<4 x i8> %a0) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_02:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_02(<4 x i8> %a0) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 undef, i32 1, i32 0>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_03:
+; CHECK: r0 = swiz(r0)
+define <4 x i8> @test_03(<4 x i8> %a0) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_10:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_10(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_11:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_11(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 2, i32 4, i32 undef>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_12:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_12(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 undef, i32 4, i32 6>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_13:
+; CHECK: r0 = vtrunehb(r1:0)
+define <4 x i8> @test_13(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_20:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_20(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_21:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_21(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_22:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_22(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 undef, i32 undef, i32 5, i32 7>
+ ret <4 x i8> %p
+}
+
+; CHECK-LABEL: test_23:
+; CHECK: r0 = vtrunohb(r1:0)
+define <4 x i8> @test_23(<4 x i8> %a0, <4 x i8> %a1) {
+ %p = shufflevector <4 x i8> %a0, <4 x i8> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 undef>
+ ret <4 x i8> %p
+}
+
Added: llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll?rev=322498&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/shuff-64.ll Mon Jan 15 10:33:33 2018
@@ -0,0 +1,66 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: test_00:
+; CHECK-DAG: r[[REG00:[0-9]+]] = swiz(r0)
+; CHECK-DAG: r[[REG01:[0-9]+]] = swiz(r1)
+; CHECK: r1:0 = combine(r[[REG00]],r[[REG01]])
+define <8 x i8> @test_00(<8 x i8> %a0) {
+ %p = shufflevector <8 x i8> %a0, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret <8 x i8> %p
+}
+
+; CHECK-LABEL: test_10:
+; CHECK: r1:0 = packhl(r1,r0)
+define <4 x i16> @test_10(<4 x i16> %a0) {
+ %p = shufflevector <4 x i16> %a0, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_11:
+; CHECK: r1:0 = packhl(r1,r0)
+define <4 x i16> @test_11(<4 x i16> %a0) {
+ %p = shufflevector <4 x i16> undef, <4 x i16> %a0, <4 x i32> <i32 4, i32 6, i32 5, i32 7>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_20:
+; CHECK: r1:0 = shuffeh(r3:2,r1:0)
+define <4 x i16> @test_20(<4 x i16> %a0, <4 x i16> %a1) {
+ %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_30:
+; CHECK: r1:0 = shuffoh(r3:2,r1:0)
+define <4 x i16> @test_30(<4 x i16> %a0, <4 x i16> %a1) {
+ %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_40:
+; CHECK: r1:0 = vtrunewh(r3:2,r1:0)
+define <4 x i16> @test_40(<4 x i16> %a0, <4 x i16> %a1) {
+ %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_50:
+; CHECK: r1:0 = vtrunowh(r3:2,r1:0)
+define <4 x i16> @test_50(<4 x i16> %a0, <4 x i16> %a1) {
+ %p = shufflevector <4 x i16> %a0, <4 x i16> %a1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i16> %p
+}
+
+; CHECK-LABEL: test_60:
+; r1:0 = shuffeb(r3:2,r1:0)
+define <8 x i8> @test_60(<8 x i8> %a0, <8 x i8> %a1) {
+ %p = shufflevector <8 x i8> %a0, <8 x i8> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+ ret <8 x i8> %p
+}
+
+; CHECK-LABEL: test_70:
+; r1:0 = shuffob(r3:2,r1:0)
+define <8 x i8> @test_70(<8 x i8> %a0, <8 x i8> %a1) {
+ %p = shufflevector <8 x i8> %a0, <8 x i8> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+ ret <8 x i8> %p
+}
More information about the llvm-commits
mailing list