[llvm-commits] [llvm] r154313 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-shuffle.ll

Mon Apr 9 01:33:21 PDT 2012

Author: nadav
Date: Mon Apr  9 03:33:21 2012
New Revision: 154313

URL: http://llvm.org/viewvc/llvm-project?rev=154313&view=rev
Log:
Lower some x86 shuffle sequences to the vblend family of instructions.


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-shuffle.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154313&r1=154312&r2=154313&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr  9 03:33:21 2012
@@ -5377,6 +5377,69 @@
   return LowerAVXCONCAT_VECTORS(Op, DAG);
 }
 
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+                                          const X86Subtarget *Subtarget,
+                                          SelectionDAG &DAG, EVT PtrTy) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
+  EVT VT = Op.getValueType();
+  EVT InVT = V1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  unsigned EltSize = EltVT.getSizeInBits();
+  int MaskSize = VT.getVectorNumElements();
+  int InSize = InVT.getVectorNumElements();
+
+  // TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
+  if (!Subtarget->hasAVX())
+    return SDValue();
+
+  if (MaskSize != InSize)
+    return SDValue();
+
+  SmallVector<Constant*,2> MaskVals;
+  ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
+  ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
+
+  for (int i = 0; i < MaskSize; ++i) {
+    int EltIdx = SVOp->getMaskElt(i);
+    if (EltIdx == i || EltIdx == -1)
+      MaskVals.push_back(NegOne);
+    else if (EltIdx == (i + MaskSize))
+      MaskVals.push_back(Zero);
+    else return SDValue();
+  }
+
+  Constant *MaskC = ConstantVector::get(MaskVals);
+  EVT MaskTy = EVT::getEVT(MaskC->getType());
+  assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
+  SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
+  unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
+  SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, false, Alignment);
+
+  if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
+    return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+
+  if (Subtarget->hasAVX()) {
+    switch (MaskTy.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v16i8:
+    case MVT::v4i32:
+    case MVT::v2i64:
+    case MVT::v8i32:
+    case MVT::v4i64:
+             return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+    }
+  }
+
+  return SDValue();
+}
+
 // v8i16 shuffles - Prefer shuffles in the following order:
 // 1. [all]   pshuflw, pshufhw, optional move
 // 2. [ssse3] 1 x pshufb
@@ -6539,6 +6602,10 @@
     return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
                                 V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
 
+  SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy());
+  if (BlendOp.getNode())
+    return BlendOp;
+
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
   // lower it into other known shuffles. FIXME: this isn't true yet, but

Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154313&r1=154312&r2=154313&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Mon Apr  9 03:33:21 2012
@@ -162,3 +162,43 @@
 62>
  ret <32 x i8> %0
 }
+
+; CHECK: blend1
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendvps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendvpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i64> %t
+}