[llvm-commits] [llvm] r137519 - in /llvm/trunk: lib/Target/X86/InstPrinter/X86InstComments.cpp lib/Target/X86/Utils/X86ShuffleDecode.cpp lib/Target/X86/Utils/X86ShuffleDecode.h lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-vperm2f128.ll

Fri Aug 12 14:48:26 PDT 2011

Author: bruno
Date: Fri Aug 12 16:48:26 2011
New Revision: 137519

URL: http://llvm.org/viewvc/llvm-project?rev=137519&view=rev
Log:
The VPERM2F128 is a AVX instruction which permutes between two 256-bit
vectors. It operates on 128-bit elements instead of regular scalar
types. Recognize shuffles that are suitable for VPERM2F128 and teach
the x86 legalizer how to handle them.

Added:
    llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll
Modified:
    llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original)
+++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Fri Aug 12 16:48:26 2011
@@ -225,6 +225,11 @@
                         ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VPERM2F128rr:
+    DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
   }
 
 

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Fri Aug 12 16:48:26 2011
@@ -220,4 +220,24 @@
   }
 }
 
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+                          SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned HalfSize = VT.getVectorNumElements()/2;
+  unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
+  unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
+
+  for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i)
+    ShuffleMask.push_back(i);
+  for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i)
+    ShuffleMask.push_back(i);
+}
+
+void DecodeVPERM2F128Mask(unsigned Imm,
+                          SmallVectorImpl<unsigned> &ShuffleMask) {
+  // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
+  // has information about the instruction and not the types. So for
+  // instruction comments purpose, assume the 256-bit vector is v4i64.
+  return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
+}
+
 } // llvm namespace

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Fri Aug 12 16:48:26 2011
@@ -97,6 +97,11 @@
 void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
                         SmallVectorImpl<unsigned> &ShuffleMask);
 
+void DecodeVPERM2F128Mask(unsigned Imm,
+                          SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
+                          SmallVectorImpl<unsigned> &ShuffleMask);
+
 } // llvm namespace
 
 #endif

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 12 16:48:26 2011
@@ -2753,6 +2753,7 @@
   case X86ISD::VPERMILPSY:
   case X86ISD::VPERMILPD:
   case X86ISD::VPERMILPDY:
+  case X86ISD::VPERM2F128:
     return true;
   }
   return false;
@@ -2795,6 +2796,7 @@
   case X86ISD::PALIGN:
   case X86ISD::SHUFPD:
   case X86ISD::SHUFPS:
+  case X86ISD::VPERM2F128:
     return DAG.getNode(Opc, dl, VT, V1, V2,
                        DAG.getConstant(TargetMask, MVT::i8));
   }
@@ -3033,6 +3035,17 @@
   return false;
 }
 
+/// isUndefOrInRange - Return true if every element in Mask, begining from
+/// position Pos and ending in Pos+Size, falls within the specified sequential
+/// range (L, L+Pos]. or is undef.
+static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+                                       int Pos, int Size, int Low) {
+  for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+    if (!isUndefOrEqual(Mask[i], Low))
+      return false;
+  return true;
+}
+
 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
 /// the second operand.
@@ -3444,6 +3457,67 @@
   return ::isMOVLMask(M, N->getValueType(0));
 }
 
+/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// as permutations between 128-bit chunks or halves. As an example: this
+/// shuffle bellow:
+///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
+/// The first half comes from the second half of V1 and the second half from the
+/// the second half of V2.
+static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+                             const X86Subtarget *Subtarget) {
+  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+    return false;
+
+  // The shuffle result is divided into half A and half B. In total the two
+  // sources have 4 halves, namely: C, D, E, F. The final values of A and
+  // B must come from C, D, E or F.
+  int HalfSize = VT.getVectorNumElements()/2;
+  bool MatchA = false, MatchB = false;
+
+  // Check if A comes from one of C, D, E, F.
+  for (int Half = 0; Half < 4; ++Half) {
+    if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
+      MatchA = true;
+      break;
+    }
+  }
+
+  // Check if B comes from one of C, D, E, F.
+  for (int Half = 0; Half < 4; ++Half) {
+    if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
+      MatchB = true;
+      break;
+    }
+  }
+
+  return MatchA && MatchB;
+}
+
+/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
+static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  EVT VT = SVOp->getValueType(0);
+
+  int HalfSize = VT.getVectorNumElements()/2;
+
+  int FstHalf = 0, SndHalf = 0;
+  for (int i = 0; i < HalfSize; ++i) {
+    if (SVOp->getMaskElt(i) > 0) {
+      FstHalf = SVOp->getMaskElt(i)/HalfSize;
+      break;
+    }
+  }
+  for (int i = HalfSize; i < HalfSize*2; ++i) {
+    if (SVOp->getMaskElt(i) > 0) {
+      SndHalf = SVOp->getMaskElt(i)/HalfSize;
+      break;
+    }
+  }
+
+  return (FstHalf | (SndHalf << 4));
+}
+
 /// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
 /// Note that VPERMIL mask matching is different depending whether theunderlying
@@ -4317,6 +4391,11 @@
       DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                         ShuffleMask);
       break;
+    case X86ISD::VPERM2F128:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                           ShuffleMask);
+      break;
     default:
       assert("not implemented for target shuffle node");
       return SDValue();
@@ -6335,6 +6414,11 @@
     return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
                                 getShuffleVPERMILPDImmediate(SVOp), DAG);
 
+  // Handle VPERM2F128 permutations
+  if (isVPERM2F128Mask(M, VT, Subtarget))
+    return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
+                                getShuffleVPERM2F128Immediate(SVOp), DAG);
+
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
   // lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -10052,6 +10136,7 @@
   case X86ISD::VPERMILPSY:         return "X86ISD::VPERMILPSY";
   case X86ISD::VPERMILPD:          return "X86ISD::VPERMILPD";
   case X86ISD::VPERMILPDY:         return "X86ISD::VPERMILPDY";
+  case X86ISD::VPERM2F128:         return "X86ISD::VPERM2F128";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
   case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
@@ -13134,6 +13219,7 @@
   case X86ISD::VPERMILPSY:
   case X86ISD::VPERMILPD:
   case X86ISD::VPERMILPDY:
+  case X86ISD::VPERM2F128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
   }
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Aug 12 16:48:26 2011
@@ -275,6 +275,7 @@
       VPERMILPSY,
       VPERMILPD,
       VPERMILPDY,
+      VPERM2F128,
 
       // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
       // according to %al. An operator is needed so that this can be expanded

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Fri Aug 12 16:48:26 2011
@@ -158,6 +158,8 @@
 def X86VPermilpd  : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
 def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
 
+def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=137519&r1=137518&r2=137519&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Aug 12 16:48:26 2011
@@ -5695,6 +5695,19 @@
                   VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
           (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
 
+def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
 //===----------------------------------------------------------------------===//
 // VZERO - Zero YMM registers
 //

Added: llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll?rev=137519&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll Fri Aug 12 16:48:26 2011
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vperm2f128 $1
+define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $48
+define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $0
+define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $17
+define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vperm2f128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2f128 $33
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+;;;; Cases with undef indicies mixed in the mask
+
+; CHECK: vperm2f128 $33
+define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
+  ret <8 x float> %shuffle
+}
+
+;;;; Cases we must not select vperm2f128
+
+; CHECK: _G
+; CHECK-NOT: vperm2f128
+define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
+  ret <8 x float> %shuffle
+}