[llvm-commits] [llvm] r137067 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-shift.ll

Mon Aug 8 14:31:08 PDT 2011

Author: bruno
Date: Mon Aug  8 16:31:08 2011
New Revision: 137067

URL: http://llvm.org/viewvc/llvm-project?rev=137067&view=rev
Log:
Add support for several vector shifts operations while in AVX mode. Fix PR10581

Added:
    llvm/trunk/test/CodeGen/X86/avx-shift.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=137067&r1=137066&r2=137067&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Aug  8 16:31:08 2011
@@ -883,7 +883,7 @@
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
   }
 
-  if (Subtarget->hasSSE41()) {
+  if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
     setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
     setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
     setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
@@ -922,10 +922,11 @@
     }
   }
 
-  if (Subtarget->hasSSE2()) {
+  if (Subtarget->hasSSE2() || Subtarget->hasAVX()) {
     setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
     setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
     setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
+    setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
 
     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
@@ -935,7 +936,7 @@
     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
   }
 
-  if (Subtarget->hasSSE42())
+  if (Subtarget->hasSSE42() || Subtarget->hasAVX())
     setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
 
   if (!UseSoftFloat && Subtarget->hasAVX()) {
@@ -975,6 +976,19 @@
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i8,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i16, Custom);
 
+    setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
+    setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
+    setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
+
+    setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
+    setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
+
+    setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
+    setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
+
     // Custom lower several nodes for 256-bit types.
     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
                   i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -9195,11 +9209,42 @@
   DebugLoc dl = Op.getDebugLoc();
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
-
   LLVMContext *Context = DAG.getContext();
 
-  // Must have SSE2.
-  if (!Subtarget->hasSSE2()) return SDValue();
+  if (!(Subtarget->hasSSE2() || Subtarget->hasAVX()))
+    return SDValue();
+
+  // Decompose 256-bit shifts into smaller 128-bit shifts.
+  if (VT.getSizeInBits() == 256) {
+    int NumElems = VT.getVectorNumElements();
+    MVT EltVT = VT.getVectorElementType().getSimpleVT();
+    EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+    // Extract the two vectors
+    SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+    SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+                                     DAG, dl);
+
+    // Recreate the shift amount vectors
+    SmallVector<SDValue, 4> Amt1Csts;
+    SmallVector<SDValue, 4> Amt2Csts;
+    for (int i = 0; i < NumElems/2; ++i)
+      Amt1Csts.push_back(Amt->getOperand(i));
+    for (int i = NumElems/2; i < NumElems; ++i)
+      Amt2Csts.push_back(Amt->getOperand(i));
+
+    SDValue Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+                               &Amt1Csts[0], NumElems/2);
+    SDValue Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+                               &Amt2Csts[0], NumElems/2);
+
+    // Issue new vector shifts for the smaller types
+    V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+    V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+    // Concatenate the result back
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+  }
 
   // Optimize shl/srl/sra with constant shift amount.
   if (isSplatVector(Amt.getNode())) {
@@ -9250,9 +9295,6 @@
   }
 
   // Lower SHL with variable shift amount.
-  // Cannot lower SHL without SSE2 or later.
-  if (!Subtarget->hasSSE2()) return SDValue();
-
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                      DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
@@ -12099,7 +12141,7 @@
   // all elements are shifted by the same amount.  We can't do this in legalize
   // because the a constant vector is typically transformed to a constant pool
   // so we have no knowledge of the shift amount.
-  if (!Subtarget->hasSSE2())
+  if (!(Subtarget->hasSSE2() || Subtarget->hasAVX()))
     return SDValue();
 
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)

Added: llvm/trunk/test/CodeGen/X86/avx-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shift.ll?rev=137067&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shift.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx-shift.ll Mon Aug  8 16:31:08 2011
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;;; Shift left
+; CHECK: vpslld
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+