[llvm-commits] [llvm] r144380 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-logic.ll test/CodeGen/X86/avx2-shift.ll

Thu Nov 10 23:39:24 PST 2011

Author: ctopper
Date: Fri Nov 11 01:39:23 2011
New Revision: 144380

URL: http://llvm.org/viewvc/llvm-project?rev=144380&view=rev
Log:
Add lowering for AVX2 shift instructions.

Added:
    llvm/trunk/test/CodeGen/X86/avx2-shift.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-logic.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=144380&r1=144379&r2=144380&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov 11 01:39:23 2011
@@ -1050,21 +1050,9 @@
       setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
       setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
       setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
+      // Don't lower v32i8 because there is no 128-bit byte mul
 
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
-
-      setOperationAction(ISD::SHL,         MVT::v4i32, Legal);
-      setOperationAction(ISD::SHL,         MVT::v2i64, Legal);
-      setOperationAction(ISD::SRL,         MVT::v4i32, Legal);
-      setOperationAction(ISD::SRL,         MVT::v2i64, Legal);
-      setOperationAction(ISD::SRA,         MVT::v4i32, Legal);
-
-      setOperationAction(ISD::SHL,         MVT::v8i32, Legal);
-      setOperationAction(ISD::SHL,         MVT::v4i64, Legal);
-      setOperationAction(ISD::SRL,         MVT::v8i32, Legal);
-      setOperationAction(ISD::SRL,         MVT::v4i64, Legal);
-      setOperationAction(ISD::SRA,         MVT::v8i32, Legal);
-      // Don't lower v32i8 because there is no 128-bit byte mul
     } else {
       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
@@ -10130,47 +10118,6 @@
   if (!Subtarget->hasXMMInt())
     return SDValue();
 
-  // Decompose 256-bit shifts into smaller 128-bit shifts.
-  if (VT.getSizeInBits() == 256) {
-    int NumElems = VT.getVectorNumElements();
-    MVT EltVT = VT.getVectorElementType().getSimpleVT();
-    EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
-
-    // Extract the two vectors
-    SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
-    SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
-                                     DAG, dl);
-
-    // Recreate the shift amount vectors
-    SDValue Amt1, Amt2;
-    if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
-      // Constant shift amount
-      SmallVector<SDValue, 4> Amt1Csts;
-      SmallVector<SDValue, 4> Amt2Csts;
-      for (int i = 0; i < NumElems/2; ++i)
-        Amt1Csts.push_back(Amt->getOperand(i));
-      for (int i = NumElems/2; i < NumElems; ++i)
-        Amt2Csts.push_back(Amt->getOperand(i));
-
-      Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
-                                 &Amt1Csts[0], NumElems/2);
-      Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
-                                 &Amt2Csts[0], NumElems/2);
-    } else {
-      // Variable shift amount
-      Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
-      Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
-                                 DAG, dl);
-    }
-
-    // Issue new vector shifts for the smaller types
-    V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
-    V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
-
-    // Concatenate the result back
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
-  }
-
   // Optimize shl/srl/sra with constant shift amount.
   if (isSplatVector(Amt.getNode())) {
     SDValue SclrAmt = Amt->getOperand(0);
@@ -10259,9 +10206,97 @@
         Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
         return Res;
       }
+
+      if (Subtarget->hasAVX2()) {
+        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+        if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA)
+         return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
+                       R, DAG.getConstant(ShiftAmt, MVT::i32));
+        }
     }
   }
 
+  // AVX2 variable shifts
+  if (Subtarget->hasAVX2()) {
+    if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psllv_d, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psllv_d_256, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psllv_q, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SHL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psllv_q_256, MVT::i32),
+                    R, Amt);
+
+    if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrlv_d, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrlv_d_256, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrlv_q, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SRL)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrlv_q_256, MVT::i32),
+                     R, Amt);
+
+    if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRA)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrav_d, MVT::i32),
+                     R, Amt);
+    if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRA)
+       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_avx2_psrav_d_256, MVT::i32),
+                     R, Amt);
+  }
+
   // Lower SHL with variable shift amount.
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -10328,6 +10363,48 @@
                     R, DAG.getNode(ISD::ADD, dl, VT, R, R));
     return R;
   }
+
+  // Decompose 256-bit shifts into smaller 128-bit shifts.
+  if (VT.getSizeInBits() == 256) {
+    int NumElems = VT.getVectorNumElements();
+    MVT EltVT = VT.getVectorElementType().getSimpleVT();
+    EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+    // Extract the two vectors
+    SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl);
+    SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32),
+                                     DAG, dl);
+
+    // Recreate the shift amount vectors
+    SDValue Amt1, Amt2;
+    if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+      // Constant shift amount
+      SmallVector<SDValue, 4> Amt1Csts;
+      SmallVector<SDValue, 4> Amt2Csts;
+      for (int i = 0; i < NumElems/2; ++i)
+        Amt1Csts.push_back(Amt->getOperand(i));
+      for (int i = NumElems/2; i < NumElems; ++i)
+        Amt2Csts.push_back(Amt->getOperand(i));
+
+      Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+                                 &Amt1Csts[0], NumElems/2);
+      Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
+                                 &Amt2Csts[0], NumElems/2);
+    } else {
+      // Variable shift amount
+      Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl);
+      Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32),
+                                 DAG, dl);
+    }
+
+    // Issue new vector shifts for the smaller types
+    V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
+    V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
+
+    // Concatenate the result back
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
+  }
+
   return SDValue();
 }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=144380&r1=144379&r2=144380&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Nov 11 01:39:23 2011
@@ -7655,7 +7655,6 @@
 // Variable Bit Shifts
 //
 multiclass avx2_var_shift<bits<8> opc, string OpcodeStr,
-                          PatFrag pf128, PatFrag pf256,
                           Intrinsic Int128, Intrinsic Int256> {
   def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
              (ins VR128:$src1, VR128:$src2),
@@ -7664,7 +7663,8 @@
   def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
              (ins VR128:$src1, i128mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set VR128:$dst, (Int128 VR128:$src1, (pf128 addr:$src2)))]>,
+             [(set VR128:$dst,
+              (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
              VEX_4V;
   def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
              (ins VR256:$src1, VR256:$src2),
@@ -7673,70 +7673,43 @@
   def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, i256mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set VR256:$dst, (Int256 VR256:$src1, (pf256 addr:$src2)))]>,
+             [(set VR256:$dst,
+               (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>,
              VEX_4V;
 }
 
-defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32,
-                              int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>;
-defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64,
-                              int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>,
-                              VEX_W;
-defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32,
-                              int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>;
-defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64,
-                              int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>,
-                              VEX_W;
-defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32,
-                              int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>;
-
-
-let Predicates = [HasAVX2] in {
-
-  def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
-            (VPSLLVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
-            (VPSLLVQrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
-            (VPSRLVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
-            (VPSRLVQrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
-            (VPSRAVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
-            (VPSLLVDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
-            (VPSLLVQYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
-            (VPSRLVDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
-            (VPSRLVQYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
-            (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
-
-  def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))),
-            (VPSLLVDrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv2i64 addr:$src2))),
-            (VPSLLVDrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v2i64 (shl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))),
-            (VPSLLVQrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v4i32 (srl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))),
-            (VPSRLVDrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v2i64 (srl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))),
-            (VPSRLVQrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v4i32 (sra (v4i32 VR128:$src1),(loadv4i32 addr:$src2))),
-            (VPSRAVDrm VR128:$src1,  addr:$src2)>;
-  def : Pat<(v8i32 (shl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))),
-            (VPSLLVDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (shl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))),
-            (VPSLLVQYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (srl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))),
-            (VPSRLVDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (srl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))),
-            (VPSRLVQYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (sra (v8i32 VR256:$src1),(loadv8i32 addr:$src2))),
-            (VPSRAVDYrm VR256:$src1, addr:$src2)>;
+multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr,
+                              Intrinsic Int128, Intrinsic Int256> {
+  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
+  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+              (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>,
+             VEX_4V;
+  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+             (ins VR256:$src1, VR256:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
+  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+             (ins VR256:$src1, i256mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst,
+               (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>,
+             VEX_4V;
 }
 
-
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d,
+                              int_x86_avx2_psllv_d_256>;
+defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q,
+                                  int_x86_avx2_psllv_q_256>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d,
+                              int_x86_avx2_psrlv_d_256>;
+defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q,
+                                  int_x86_avx2_psrlv_q_256>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d,
+                              int_x86_avx2_psrav_d_256>;
 

Modified: llvm/trunk/test/CodeGen/X86/avx2-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-logic.ll?rev=144380&r1=144379&r2=144380&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-logic.ll Fri Nov 11 01:39:23 2011
@@ -53,76 +53,3 @@
   %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
   ret <32 x i8> %min
 }
-
-
-; CHECK: variable_shl0
-; CHECK: psllvd
-; CHECK: ret
-define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
-  %k = shl <4 x i32> %x, %y
-  ret <4 x i32> %k
-}
-; CHECK: variable_shl1
-; CHECK: psllvd
-; CHECK: ret
-define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
-  %k = shl <8 x i32> %x, %y
-  ret <8 x i32> %k
-}
-; CHECK: variable_shl2
-; CHECK: psllvq
-; CHECK: ret
-define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
-  %k = shl <2 x i64> %x, %y
-  ret <2 x i64> %k
-}
-; CHECK: variable_shl3
-; CHECK: psllvq
-; CHECK: ret
-define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
-  %k = shl <4 x i64> %x, %y
-  ret <4 x i64> %k
-}
-; CHECK: variable_srl0
-; CHECK: psrlvd
-; CHECK: ret
-define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
-  %k = lshr <4 x i32> %x, %y
-  ret <4 x i32> %k
-}
-; CHECK: variable_srl1
-; CHECK: psrlvd
-; CHECK: ret
-define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
-  %k = lshr <8 x i32> %x, %y
-  ret <8 x i32> %k
-}
-; CHECK: variable_srl2
-; CHECK: psrlvq
-; CHECK: ret
-define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
-  %k = lshr <2 x i64> %x, %y
-  ret <2 x i64> %k
-}
-; CHECK: variable_srl3
-; CHECK: psrlvq
-; CHECK: ret
-define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
-  %k = lshr <4 x i64> %x, %y
-  ret <4 x i64> %k
-}
-
-; CHECK: variable_sra0
-; CHECK: psravd
-; CHECK: ret
-define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
-  %k = ashr <4 x i32> %x, %y
-  ret <4 x i32> %k
-}
-; CHECK: variable_sra1
-; CHECK: psravd
-; CHECK: ret
-define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
-  %k = ashr <8 x i32> %x, %y
-  ret <8 x i32> %k
-}

Added: llvm/trunk/test/CodeGen/X86/avx2-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-shift.ll?rev=144380&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-shift.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx2-shift.ll Fri Nov 11 01:39:23 2011
@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: variable_shl0
+; CHECK: psllvd
+; CHECK: ret
+define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = shl <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1
+; CHECK: psllvd
+; CHECK: ret
+define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = shl <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2
+; CHECK: psllvq
+; CHECK: ret
+define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = shl <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3
+; CHECK: psllvq
+; CHECK: ret
+define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = shl <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0
+; CHECK: psrlvd
+; CHECK: ret
+define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = lshr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1
+; CHECK: psrlvd
+; CHECK: ret
+define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = lshr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = lshr <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3
+; CHECK: psrlvq
+; CHECK: ret
+define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = lshr <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+
+; CHECK: variable_sra0
+; CHECK: psravd
+; CHECK: ret
+define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
+  %k = ashr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_sra1
+; CHECK: psravd
+; CHECK: ret
+define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
+  %k = ashr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+
+;;; Shift left
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: variable_sra0_load
+; CHECK: psravd (%
+; CHECK: ret
+define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = ashr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+
+; CHECK: variable_sra1_load
+; CHECK: psravd (%
+; CHECK: ret
+define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = ashr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+
+; CHECK: variable_shl0_load
+; CHECK: psllvd (%
+; CHECK: ret
+define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = shl <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1_load
+; CHECK: psllvd (%
+; CHECK: ret
+define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = shl <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2_load
+; CHECK: psllvq (%
+; CHECK: ret
+define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = shl <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3_load
+; CHECK: psllvq (%
+; CHECK: ret
+define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = shl <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: psrlvd (%
+; CHECK: ret
+define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = lshr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1_load
+; CHECK: psrlvd (%
+; CHECK: ret
+define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = lshr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2_load
+; CHECK: psrlvq (%
+; CHECK: ret
+define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = lshr <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3_load
+; CHECK: psrlvq (%
+; CHECK: ret
+define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = lshr <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}