[llvm] r304298 - [PowerPC] Fix a performance bug for PPC::XXPERMDI.

Tony Jiang via llvm-commits llvm-commits at lists.llvm.org
Wed May 31 06:09:57 PDT 2017


Author: jtony
Date: Wed May 31 08:09:57 2017
New Revision: 304298

URL: http://llvm.org/viewvc/llvm-project?rev=304298&view=rev
Log:
[PowerPC] Fix a performance bug for PPC::XXPERMDI.

There are some VectorShuffle Nodes in SDAG which can be selected to XXPERMDI
Instruction, this patch recognizes them and does the selection to improve
the PPC performance.

Differential Revision: https://reviews.llvm.org/D33404

Added:
    llvm/trunk/test/CodeGen/PowerPC/vec_xxpermdi.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=304298&r1=304297&r2=304298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed May 31 08:09:57 2017
@@ -1112,6 +1112,7 @@ const char *PPCTargetLowering::getTarget
   case PPCISD::VPERM:           return "PPCISD::VPERM";
   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
   case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
+  case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
   case PPCISD::Hi:              return "PPCISD::Hi";
@@ -1593,17 +1594,25 @@ bool PPC::isSplatShuffleMask(ShuffleVect
   return true;
 }
 
-  // Check that the mask is shuffling words
-static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
-  for (unsigned i = 0; i < 4; ++i) {
-    unsigned B0 = N->getMaskElt(i*4);
-    unsigned B1 = N->getMaskElt(i*4+1);
-    unsigned B2 = N->getMaskElt(i*4+2);
-    unsigned B3 = N->getMaskElt(i*4+3);
-    if (B0 % 4)
-      return false;
-    if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
+// Check that the mask is shuffling N byte elements.
+static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) {
+  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
+         "Unexpected element width.");
+
+  unsigned NumOfElem = 16 / Width;
+  unsigned MaskVal[16]; //  Width is never greater than 16
+  for (unsigned i = 0; i < NumOfElem; ++i) {
+    MaskVal[0] = N->getMaskElt(i * Width);
+    if (MaskVal[0] % Width) {
       return false;
+    }
+
+    for (unsigned int j = 1; j < Width; ++j) {
+      MaskVal[j] = N->getMaskElt(i * Width + j);
+      if (MaskVal[j] != MaskVal[j-1] + 1) {
+        return false;
+      }
+    }
   }
 
   return true;
@@ -1611,7 +1620,7 @@ static bool isWordShuffleMask(ShuffleVec
 
 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
-  if (!isWordShuffleMask(N))
+  if (!isNByteElemShuffleMask(N, 4))
     return false;
 
   // Now we look at mask elements 0,4,8,12
@@ -1688,7 +1697,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVe
                                bool &Swap, bool IsLE) {
   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
   // Ensure each byte index of the word is consecutive.
-  if (!isWordShuffleMask(N))
+  if (!isNByteElemShuffleMask(N, 4))
     return false;
 
   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
@@ -1746,6 +1755,66 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVe
   }
 }
 
+/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
+/// if the inputs to the instruction should be swapped and set \p DM to the
+/// value for the immediate.
+/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
+/// AND element 0 of the result comes from the first input (LE) or second input
+/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
+/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
+/// mask.
+bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
+                               bool &Swap, bool IsLE) {
+  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+
+  // Ensure each byte index of the double word is consecutive.
+  if (!isNByteElemShuffleMask(N, 8))
+    return false;
+
+  unsigned M0 = N->getMaskElt(0) / 8;
+  unsigned M1 = N->getMaskElt(8) / 8;
+  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
+
+  // If both vector operands for the shuffle are the same vector, the mask will
+  // contain only elements from the first one and the second one will be undef.
+  if (N->getOperand(1).isUndef()) {
+    if ((M0 | M1) < 2) {
+      DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
+      Swap = false;
+      return true;
+    } else
+      return false;
+  }
+
+  if (IsLE) {
+    if (M0 > 1 && M1 < 2) {
+      Swap = false;
+    } else if (M0 < 2 && M1 > 1) {
+      M0 = (M0 + 2) % 4;
+      M1 = (M1 + 2) % 4;
+      Swap = true;
+    } else
+      return false;
+
+    // Note: if control flow comes here that means Swap is already set above
+    DM = (((~M1) & 1) << 1) + ((~M0) & 1);
+    return true;
+  } else { // BE
+    if (M0 < 2 && M1 > 1) {
+      Swap = false;
+    } else if (M0 > 1 && M1 < 2) {
+      M0 = (M0 + 2) % 4;
+      M1 = (M1 + 2) % 4;
+      Swap = true;
+    } else
+      return false;
+
+    // Note: if control flow comes here that means Swap is already set above
+    DM = (M0 << 1) + (M1 & 1);
+    return true;
+  }
+}
+
 
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
@@ -7760,6 +7829,19 @@ SDValue PPCTargetLowering::LowerVECTOR_S
     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
   }
 
+  if (Subtarget.hasVSX() &&
+    PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+    if (Swap)
+      std::swap(V1, V2);
+    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
+    SDValue Conv2 =
+        DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
+
+    SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
+                              DAG.getConstant(ShiftElts, dl, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
+  }
+
   if (Subtarget.hasVSX()) {
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=304298&r1=304297&r2=304298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Wed May 31 08:09:57 2017
@@ -90,6 +90,10 @@ namespace llvm {
       ///
       VECSHL,
 
+      /// XXPERMDI - The PPC XXPERMDI instruction
+      ///
+      XXPERMDI,
+
       /// The CMPB instruction (takes two operands of i32 or i64).
       CMPB,
 
@@ -454,6 +458,10 @@ namespace llvm {
     /// for a XXSLDWI instruction.
     bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
                               bool &Swap, bool IsLE);
+    /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXPERMDI instruction.
+    bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                              bool &Swap, bool IsLE);
 
     /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
     /// shift amount, otherwise return -1.

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=304298&r1=304297&r2=304298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Wed May 31 08:09:57 2017
@@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1,
   SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
 ]>;
 
+def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
+  SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+]>;
+
 def SDT_PPCvcmp : SDTypeProfile<1, 3, [
   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
 ]>;
@@ -170,6 +174,7 @@ def PPCaddiDtprelL   : SDNode<"PPCISD::A
 def PPCvperm     : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 def PPCxxsplt    : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
 def PPCxxinsert  : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>;
+def PPCxxpermdi  : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
 def PPCvecshl    : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
 
 def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td?rev=304298&r1=304297&r2=304298&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td Wed May 31 08:09:57 2017
@@ -843,7 +843,9 @@ let Uses = [RM] in {
 
   def XXPERMDI : XX3Form_2<60, 10,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
-                       "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+                       "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
+                       [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
+                         imm32SExt16:$DM))]>;
   let isCodeGenOnly = 1 in
   def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
                              "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;

Added: llvm/trunk/test/CodeGen/PowerPC/vec_xxpermdi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_xxpermdi.ll?rev=304298&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_xxpermdi.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_xxpermdi.ll Wed May 31 08:09:57 2017
@@ -0,0 +1,307 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN:   FileCheck %s  -check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN:   FileCheck %s -check-prefix=CHECK-BE
+
+; Possible LE ShuffleVector masks (Case 1):
+; ShuffleVector((vector double)a, (vector double)b, 3, 1)
+; ShuffleVector((vector double)a, (vector double)b, 2, 1)
+; ShuffleVector((vector double)a, (vector double)b, 3, 0)
+; ShuffleVector((vector double)a, (vector double)b, 2, 0)
+; which targets at:
+; xxpermdi a, b, 0
+; xxpermdi a, b, 1
+; xxpermdi a, b, 2
+; xxpermdi a, b, 3
+; Possible LE Swap ShuffleVector masks (Case 2):
+; ShuffleVector((vector double)a, (vector double)b, 1, 3)
+; ShuffleVector((vector double)a, (vector double)b, 0, 3)
+; ShuffleVector((vector double)a, (vector double)b, 1, 2)
+; ShuffleVector((vector double)a, (vector double)b, 0, 2)
+; which targets at:
+; xxpermdi b, a, 0
+; xxpermdi b, a, 1
+; xxpermdi b, a, 2
+; xxpermdi b, a, 3
+; Possible LE ShuffleVector masks when a == b, b is undef (Case 3):
+; ShuffleVector((vector double)a, (vector double)a, 1, 1)
+; ShuffleVector((vector double)a, (vector double)a, 0, 1)
+; ShuffleVector((vector double)a, (vector double)a, 1, 0)
+; ShuffleVector((vector double)a, (vector double)a, 0, 0)
+; which targets at:
+; xxpermdi a, a, 0
+; xxpermdi a, a, 1
+; xxpermdi a, a, 2
+; xxpermdi a, a, 3
+
+; Possible BE ShuffleVector masks (Case 4):
+; ShuffleVector((vector double)a, (vector double)b, 0, 2)
+; ShuffleVector((vector double)a, (vector double)b, 0, 3)
+; ShuffleVector((vector double)a, (vector double)b, 1, 2)
+; ShuffleVector((vector double)a, (vector double)b, 1, 3)
+; which targets at:
+; xxpermdi a, b, 0
+; xxpermdi a, b, 1
+; xxpermdi a, b, 2
+; xxpermdi a, b, 3
+; Possible BE Swap ShuffleVector masks (Case 5):
+; ShuffleVector((vector double)a, (vector double)b, 2, 0)
+; ShuffleVector((vector double)a, (vector double)b, 3, 0)
+; ShuffleVector((vector double)a, (vector double)b, 2, 1)
+; ShuffleVector((vector double)a, (vector double)b, 3, 1)
+; which targets at:
+; xxpermdi b, a, 0
+; xxpermdi b, a, 1
+; xxpermdi b, a, 2
+; xxpermdi b, a, 3
+; Possible BE ShuffleVector masks when a == b, b is undef (Case 6):
+; ShuffleVector((vector double)a, (vector double)a, 0, 0)
+; ShuffleVector((vector double)a, (vector double)a, 0, 1)
+; ShuffleVector((vector double)a, (vector double)a, 1, 0)
+; ShuffleVector((vector double)a, (vector double)a, 1, 1)
+; which targets at:
+; xxpermdi a, a, 0
+; xxpermdi a, a, 1
+; xxpermdi a, a, 2
+; xxpermdi a, a, 3
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 1>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_0
+; CHECK-LE: xxmrghd 34, 34, 35
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 1>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_1
+; CHECK-LE: xxpermdi 34, 34, 35, 1
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 0>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_2
+; CHECK-LE: xxpermdi 34, 34, 35, 2
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 0>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_3
+; CHECK-LE: xxmrgld 34, 34, 35
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 3>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_0
+; CHECK-LE: xxmrghd 34, 35, 34
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 3>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_1
+; CHECK-LE: xxpermdi 34, 35, 34, 1
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 2>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_2
+; CHECK-LE: xxpermdi 34, 35, 34, 2
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 2>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_3
+; CHECK-LE: xxmrgld 34, 35, 34
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_0
+; CHECK-LE: xxspltd 34, 34, 0
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_1
+; CHECK-LE: blr
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_2
+; CHCECK-LE: xxswapd 34, 34
+}
+
+define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+      ret <2 x double> %0
+; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_3
+; CHECK-LE: xxspltd 34, 34, 1
+; CHECK-LE: blr
+}
+
+; Start testing BE
+define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 2>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_0
+; CHECK-BE: xxmrghd 34, 34, 35
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 0, i32 3>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_1
+; CHECK-BE: xxpermdi 34, 34, 35, 1
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 2>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_2
+; CHECK-BE: xxpermdi 34, 34, 35, 2
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 1, i32 3>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_3
+; CHECK-BE: xxmrgld 34, 34, 35
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 0>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_0
+; CHECK-BE: xxmrghd 34, 35, 34
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 2, i32 1>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_1
+; CHECK-BE: xxpermdi 34, 35, 34, 1
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 0>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_2
+; CHECK-BE: xxpermdi 34, 35, 34, 2
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> <i32 3, i32 1>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_3
+; CHECK-BE: xxmrgld 34, 35, 34
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_0
+; CHECK-BE: xxspltd 34, 34, 0
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_1
+; CHECK-BE: blr
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_2
+; CHCECK-LE: xxswapd 34, 34
+}
+
+define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) {
+     entry:
+      %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+      ret <2 x double> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_3
+; CHECK-BE: xxspltd 34, 34, 1
+; CHECK-BE: blr
+}
+
+; More test cases to test different types of vector inputs
+define <16 x i8> @test_be_vec_xxpermdi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) {
+     entry:
+      %0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+      ret <16 x i8> %0
+; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v16i8_v16i8
+; CHECK-BE: xxpermdi 34, 34, 35, 1
+; CHECK-BE: blr
+}
+
+define <8 x i16> @test_le_swap_vec_xxpermdi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) {
+     entry:
+      %0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+      ret <8 x i16> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v8i16_v8i16
+; CHECK-LE: xxpermdi 34, 35, 34, 1
+; CHECK-LE: blr
+}
+
+define <4 x i32> @test_le_swap_vec_xxpermdi_v4i32_v4i32(<4 x i32> %VA, <4 x i32> %VB) {
+     entry:
+      %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB,<4 x i32> <i32 0, i32 1, i32 6, i32 7>
+      ret <4 x i32> %0
+; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v4i32_v4i32
+; CHECK-LE: xxpermdi 34, 35, 34, 1
+; CHECK-LE: blr
+}




More information about the llvm-commits mailing list