[llvm] r317111 - Adds code to PPC ISEL lowering to recognize half-word inserts from vector_shuffles, and use P9 shift and vector insert instructions instead of vperm.
Graham Yiu via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 11:06:56 PDT 2017
Author: gyiu
Date: Wed Nov 1 11:06:56 2017
New Revision: 317111
URL: http://llvm.org/viewvc/llvm-project?rev=317111&view=rev
Log:
Adds code to PPC ISEL lowering to recognize half-word inserts from vector_shuffles, and use P9 shift and vector insert instructions instead of vperm.
Differential Revision: https://reviews.llvm.org/D34160
Added:
llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=317111&r1=317110&r2=317111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed Nov 1 11:06:56 2017
@@ -114,6 +114,8 @@ cl::desc("disable sibling call optimizat
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -7886,6 +7888,118 @@ static SDValue GeneratePerfectShuffle(un
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
+/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
+/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
+/// SDValue.
+SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
+ SelectionDAG &DAG) const {
+ const unsigned NumHalfWords = 8;
+ const unsigned BytesInVector = NumHalfWords * 2;
+ // Check that the shuffle is on half-words.
+ if (!isNByteElemShuffleMask(N, 2, 1))
+ return SDValue();
+
+ bool IsLE = Subtarget.isLittleEndian();
+ SDLoc dl(N);
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned ShiftElts = 0, InsertAtByte = 0;
+ bool Swap = false;
+
+ // Shifts required to get the half-word we want at element 3.
+ unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
+ unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
+
+ uint32_t Mask = 0;
+ uint32_t OriginalOrderLow = 0x1234567;
+ uint32_t OriginalOrderHigh = 0x89ABCDEF;
+ // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
+ // 32-bit space, only need 4-bit nibbles per element.
+ for (unsigned i = 0; i < NumHalfWords; ++i) {
+ unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
+ Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
+ }
+
+ // For each mask element, find out if we're just inserting something
+ // from V2 into V1 or vice versa. Possible permutations inserting an element
+ // from V2 into V1:
+ // X, 1, 2, 3, 4, 5, 6, 7
+ // 0, X, 2, 3, 4, 5, 6, 7
+ // 0, 1, X, 3, 4, 5, 6, 7
+ // 0, 1, 2, X, 4, 5, 6, 7
+ // 0, 1, 2, 3, X, 5, 6, 7
+ // 0, 1, 2, 3, 4, X, 6, 7
+ // 0, 1, 2, 3, 4, 5, X, 7
+ // 0, 1, 2, 3, 4, 5, 6, X
+ // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
+
+ bool FoundCandidate = false;
+ // Go through the mask of half-words to find an element that's being moved
+ // from one vector to the other.
+ for (unsigned i = 0; i < NumHalfWords; ++i) {
+ unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
+ uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
+ uint32_t MaskOtherElts = ~(0xF << MaskShift);
+ uint32_t TargetOrder = 0x0;
+
+ // If both vector operands for the shuffle are the same vector, the mask
+ // will contain only elements from the first one and the second one will be
+ // undef.
+ if (V2.isUndef()) {
+ ShiftElts = 0;
+ unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
+ TargetOrder = OriginalOrderLow;
+ Swap = false;
+ // Skip if not the correct element or mask of other elements don't equal
+ // to our expected order.
+ if (MaskOneElt == VINSERTHSrcElem &&
+ (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
+ FoundCandidate = true;
+ break;
+ }
+ } else { // If both operands are defined.
+ // Target order is [8,15] if the current mask is between [0,7].
+ TargetOrder =
+ (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
+ // Skip if mask of other elements don't equal our expected order.
+ if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
+ // We only need the last 3 bits for the number of shifts.
+ ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
+ : BigEndianShifts[MaskOneElt & 0x7];
+ InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
+ Swap = MaskOneElt < NumHalfWords;
+ FoundCandidate = true;
+ break;
+ }
+ }
+ }
+
+ if (!FoundCandidate)
+ return SDValue();
+
+ // Candidate found, construct the proper SDAG sequence with VINSERTH,
+ // optionally with VECSHL if shift is required.
+ if (Swap)
+ std::swap(V1, V2);
+ if (V2.isUndef())
+ V2 = V1;
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ if (ShiftElts) {
+ // Double ShiftElts because we're left shifting on v16i8 type.
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
+ DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
+ SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
+ SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+ }
+ SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+ SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
+ DAG.getConstant(InsertAtByte, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
+}
+
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
@@ -7920,6 +8034,11 @@ SDValue PPCTargetLowering::LowerVECTOR_S
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+ if (Subtarget.hasP9Altivec()) {
+ SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
+ if (NewISDNode)
+ return NewISDNode;
+ }
if (Subtarget.hasVSX() &&
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=317111&r1=317110&r2=317111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Wed Nov 1 11:06:56 2017
@@ -1072,7 +1072,14 @@ namespace llvm {
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
- };
+
+ /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
+ /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
+ /// essentially any shuffle of v8i16 vectors that just inserts one element
+ /// from one vector into the other.
+ SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
+ }; // end class PPCTargetLowering
namespace PPC {
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td?rev=317111&r1=317110&r2=317111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td Wed Nov 1 11:06:56 2017
@@ -477,10 +477,10 @@ def VPERM : VA1a_Int_Ty3<43, "vperm
def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
-def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
+def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u4imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
- [(set v16i8:$vD,
- (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
+ [(set v16i8:$vD,
+ (PPCvecshl v16i8:$vA, v16i8:$vB, imm32SExt16:$SH))]>;
// VX-Form instructions. AltiVec arithmetic ops.
let isCommutable = 1 in {
@@ -908,6 +908,9 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA
(VPKUWUM $vA, $vA)>;
def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
(VPKUHUM $vA, $vA)>;
+def:Pat<(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB),
+ (VSLDOI v16i8:$vA, v16i8:$vB, (VSLDOI_get_imm $SH))>;
+
// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
// These fragments are matched for little-endian, where the inputs must
@@ -1310,7 +1313,12 @@ def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "ve
// Vector Insert Element Instructions
def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>;
-def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>;
+def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
+ (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
+ "vinserth $vD, $vB, $UIM", IIC_VecGeneral,
+ [(set v8i16:$vD, (PPCvecinsert v8i16:$vDi, v8i16:$vB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>;
def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>;
Added: llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll?rev=317111&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll Wed Nov 1 11:06:56 2017
@@ -0,0 +1,300 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; The following testcases take one halfword element from the second vector and
+; inserts it at various locations in the first vector
+define <8 x i16> @shuffle_vector_halfword_0_8(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_0_8
+; CHECK: vsldoi 3, 3, 3, 8
+; CHECK: vinserth 2, 3, 14
+; CHECK-BE-LABEL: shuffle_vector_halfword_0_8
+; CHECK-BE: vsldoi 3, 3, 3, 10
+; CHECK-BE: vinserth 2, 3, 0
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_1_15(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_1_15
+; CHECK: vsldoi 3, 3, 3, 10
+; CHECK: vinserth 2, 3, 12
+; CHECK-BE-LABEL: shuffle_vector_halfword_1_15
+; CHECK-BE: vsldoi 3, 3, 3, 8
+; CHECK-BE: vinserth 2, 3, 2
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 15, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_2_9(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_2_9
+; CHECK: vsldoi 3, 3, 3, 6
+; CHECK: vinserth 2, 3, 10
+; CHECK-BE-LABEL: shuffle_vector_halfword_2_9
+; CHECK-BE: vsldoi 3, 3, 3, 12
+; CHECK-BE: vinserth 2, 3, 4
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_3_13(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_3_13
+; CHECK: vsldoi 3, 3, 3, 14
+; CHECK: vinserth 2, 3, 8
+; CHECK-BE-LABEL: shuffle_vector_halfword_3_13
+; CHECK-BE: vsldoi 3, 3, 3, 4
+; CHECK-BE: vinserth 2, 3, 6
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_4_10(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_4_10
+; CHECK: vsldoi 3, 3, 3, 4
+; CHECK: vinserth 2, 3, 6
+; CHECK-BE-LABEL: shuffle_vector_halfword_4_10
+; CHECK-BE: vsldoi 3, 3, 3, 14
+; CHECK-BE: vinserth 2, 3, 8
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_5_14(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_5_14
+; CHECK: vsldoi 3, 3, 3, 12
+; CHECK: vinserth 2, 3, 4
+; CHECK-BE-LABEL: shuffle_vector_halfword_5_14
+; CHECK-BE: vsldoi 3, 3, 3, 6
+; CHECK-BE: vinserth 2, 3, 10
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 14, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_6_11(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_6_11
+; CHECK: vsldoi 3, 3, 3, 2
+; CHECK: vinserth 2, 3, 2
+; CHECK-BE-LABEL: shuffle_vector_halfword_6_11
+; CHECK-BE: vinserth 2, 3, 12
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 11, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_7_12(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_7_12
+; CHECK: vinserth 2, 3, 0
+; CHECK-BE-LABEL: shuffle_vector_halfword_7_12
+; CHECK-BE: vsldoi 3, 3, 3, 2
+; CHECK-BE: vinserth 2, 3, 14
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_8_1
+; CHECK: vsldoi 2, 2, 2, 6
+; CHECK: vinserth 3, 2, 14
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_8_1
+; CHECK-BE: vsldoi 2, 2, 2, 12
+; CHECK-BE: vinserth 3, 2, 0
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+; The following testcases take one halfword element from the first vector and
+; inserts it at various locations in the second vector
+define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_9_7
+; CHECK: vsldoi 2, 2, 2, 10
+; CHECK: vinserth 3, 2, 12
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_9_7
+; CHECK-BE: vsldoi 2, 2, 2, 8
+; CHECK-BE: vinserth 3, 2, 2
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 7, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_10_4
+; CHECK: vinserth 3, 2, 10
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_10_4
+; CHECK-BE: vsldoi 2, 2, 2, 2
+; CHECK-BE: vinserth 3, 2, 4
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 4, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_11_2
+; CHECK: vsldoi 2, 2, 2, 4
+; CHECK: vinserth 3, 2, 8
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_11_2
+; CHECK-BE: vsldoi 2, 2, 2, 14
+; CHECK-BE: vinserth 3, 2, 6
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 2, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_12_6
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: vinserth 3, 2, 6
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_12_6
+; CHECK-BE: vsldoi 2, 2, 2, 6
+; CHECK-BE: vinserth 3, 2, 8
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 6, i32 13, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_13_3
+; CHECK: vsldoi 2, 2, 2, 2
+; CHECK: vinserth 3, 2, 4
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_13_3
+; CHECK-BE: vinserth 3, 2, 10
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_14_5
+; CHECK: vsldoi 2, 2, 2, 14
+; CHECK: vinserth 3, 2, 2
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_14_5
+; CHECK-BE: vsldoi 2, 2, 2, 4
+; CHECK-BE: vinserth 3, 2, 12
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 5, i32 15>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_15_0
+; CHECK: vsldoi 2, 2, 2, 8
+; CHECK: vinserth 3, 2, 0
+; CHECK: vmr 2, 3
+; CHECK-BE-LABEL: shuffle_vector_halfword_15_0
+; CHECK-BE: vsldoi 2, 2, 2, 10
+; CHECK-BE: vinserth 3, 2, 14
+; CHECK-BE: vmr 2, 3
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
+ ret <8 x i16> %vecins
+}
+
+; The following testcases use the same vector in both arguments of the
+; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one
+; we're attempting to insert, then we can use the vector insert instruction
+define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_0_4
+; CHECK: vinserth 2, 2, 14
+; CHECK-BE-LABEL: shuffle_vector_halfword_0_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_1_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_1_3
+; CHECK-BE: vinserth 2, 2, 2
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 3, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_2_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_2_3
+; CHECK-BE: vinserth 2, 2, 4
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_3_4
+; CHECK: vinserth 2, 2, 8
+; CHECK-BE-LABEL: shuffle_vector_halfword_3_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_4_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_4_3
+; CHECK-BE: vinserth 2, 2, 8
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_5_3
+; CHECK-NOT: vinserth
+; CHECK-BE-LABEL: shuffle_vector_halfword_5_3
+; CHECK-BE: vinserth 2, 2, 10
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 3, i32 6, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_6_4
+; CHECK: vinserth 2, 2, 2
+; CHECK-BE-LABEL: shuffle_vector_halfword_6_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 4, i32 7>
+ ret <8 x i16> %vecins
+}
+
+define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) {
+entry:
+; CHECK-LABEL: shuffle_vector_halfword_7_4
+; CHECK: vinserth 2, 2, 0
+; CHECK-BE-LABEL: shuffle_vector_halfword_7_4
+; CHECK-BE-NOT: vinserth
+ %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
+ ret <8 x i16> %vecins
+}
+
More information about the llvm-commits
mailing list