[llvm-commits] [llvm] r154761 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-vperm.ll

Elena Demikhovsky elena.demikhovsky at intel.com
Sun Apr 15 04:19:00 PDT 2012


Author: delena
Date: Sun Apr 15 06:18:59 2012
New Revision: 154761

URL: http://llvm.org/viewvc/llvm-project?rev=154761&view=rev
Log:
Added VPERM optimization for AVX2 shuffles

Added:
    llvm/trunk/test/CodeGen/X86/avx2-vperm.ll   (with props)
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154761&r1=154760&r2=154761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Apr 15 06:18:59 2012
@@ -2935,6 +2935,8 @@
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
   case X86ISD::VPERMILP:
+  case X86ISD::VPERMQ:
+  case X86ISD::VPERMPD:
     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
   }
 }
@@ -3976,6 +3978,27 @@
   return Index / NumElemsPerChunk;
 }
 
+/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
+/// Handles 256-bit.
+static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  assert((VT.is256BitVector() && VT.getVectorNumElements() == 4) &&
+         "Unsupported vector type for VPERMQ/VPERMPD");
+
+  unsigned NumElts = VT.getVectorNumElements();
+
+  unsigned Mask = 0;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt < 0) 
+      continue;
+    Mask |= Elt << (i*2);
+  }
+
+  return Mask;
+}
 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
 /// constant +0.0.
 bool X86::isZeroNode(SDValue Elt) {
@@ -6627,6 +6650,20 @@
   SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG);
   if (BlendOp.getNode())
     return BlendOp;
+  if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+    SmallVector<SDValue,8> permclMask;
+    for (unsigned i = 0; i != 8; ++i) {
+        permclMask.push_back(DAG.getConstant((M[i] >= 0)?M[i]:0x80, MVT::i32));
+    }
+    return DAG.getNode(VT.isInteger()? X86ISD::VPERMD:X86ISD::VPERMPS, dl, VT,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
+                                   &permclMask[0], 8), V1);
+
+  }
+  if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+    return getTargetShuffleNode(VT.isInteger()? X86ISD::VPERMQ : X86ISD::VPERMPD, dl, VT, V1,
+                                getShuffleCLImmediate(SVOp), DAG);
+
 
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
@@ -11141,6 +11178,10 @@
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
   case X86ISD::VPERMILP:           return "X86ISD::VPERMILP";
   case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
+  case X86ISD::VPERMD:             return "X86ISD::VPERMD";
+  case X86ISD::VPERMQ:             return "X86ISD::VPERMQ";
+  case X86ISD::VPERMPS:            return "X86ISD::VPERMPS";
+  case X86ISD::VPERMPD:            return "X86ISD::VPERMPD";
   case X86ISD::PMULUDQ:            return "X86ISD::PMULUDQ";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154761&r1=154760&r2=154761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Apr 15 06:18:59 2012
@@ -285,6 +285,10 @@
       UNPCKL,
       UNPCKH,
       VPERMILP,
+      VPERMD,
+      VPERMQ,
+      VPERMPS,
+      VPERMPD,
       VPERM2X128,
       VBROADCAST,
 

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154761&r1=154760&r2=154761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Apr 15 06:18:59 2012
@@ -155,6 +155,10 @@
 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
 def X86VPermilp  : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
+def X86VPermd    : SDNode<"X86ISD::VPERMD",   SDTShuff2Op>;
+def X86VPermps   : SDNode<"X86ISD::VPERMPS",  SDTShuff2Op>;
+def X86VPermq    : SDNode<"X86ISD::VPERMQ",   SDTShuff2OpI>;
+def X86VPermpd   : SDNode<"X86ISD::VPERMPD",  SDTShuff2OpI>;
 
 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=154761&r1=154760&r2=154761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Apr 15 06:18:59 2012
@@ -1049,9 +1049,9 @@
     { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        TB_ALIGN_32 },
     { X86::VPERM2I128rr,      X86::VPERM2I128rm,       TB_ALIGN_32 },
     { X86::VPERMDYrr,         X86::VPERMDYrm,          TB_ALIGN_32 },
-    { X86::VPERMPDYrr,        X86::VPERMPDYrm,         TB_ALIGN_32 },
+    { X86::VPERMPDYri,        X86::VPERMPDYmi,         TB_ALIGN_32 },
     { X86::VPERMPSYrr,        X86::VPERMPSYrm,         TB_ALIGN_32 },
-    { X86::VPERMQYrr,         X86::VPERMQYrm,          TB_ALIGN_32 },
+    { X86::VPERMQYri,         X86::VPERMQYmi,          TB_ALIGN_32 },
     { X86::VPHADDDYrr,        X86::VPHADDDYrm,         TB_ALIGN_32 },
     { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      TB_ALIGN_32 },
     { X86::VPHADDWYrr,        X86::VPHADDWYrm,         TB_ALIGN_32 },

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154761&r1=154760&r2=154761&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Apr 15 06:18:59 2012
@@ -7746,12 +7746,12 @@
 
 multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                          Intrinsic Int> {
-  def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+  def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
                      (ins VR256:$src1, i8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
-  def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+  def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
                      (ins i256mem:$src1, i8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7765,6 +7765,29 @@
 defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
                              VEX_W;
 
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPermd VR256:$src1, VR256:$src2)),
+          (VPERMDYrr VR256:$src1, VR256:$src2)>;
+def : Pat<(v8f32 (X86VPermps VR256:$src1, VR256:$src2)),
+          (VPERMPSYrr VR256:$src1, VR256:$src2)>;
+          
+def : Pat<(v4i64 (X86VPermq VR256:$src1, (i8 imm:$imm))),
+          (VPERMQYri VR256:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermpd VR256:$src1, (i8 imm:$imm))),
+          (VPERMPDYri VR256:$src1, imm:$imm)>;
+
+def : Pat<(v8i32 (X86VPermps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+          (VPERMDYrm VR256:$src1, addr:$src2)>;
+def : Pat<(v8f32 (X86VPermps VR256:$src1, (memopv8f32 addr:$src2))),
+          (VPERMPSYrm VR256:$src1, addr:$src2)>;
+          
+def : Pat<(v4i64 (X86VPermq (memopv4i64 addr:$src1), (i8 imm:$imm))),
+          (VPERMQYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
+          (VPERMPDYmi addr:$src1, imm:$imm)>;
+
+}
+
 //===----------------------------------------------------------------------===//
 // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
 //

Added: llvm/trunk/test/CodeGen/X86/avx2-vperm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vperm.ll?rev=154761&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vperm.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx2-vperm.ll Sun Apr 15 06:18:59 2012
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_8x32
+; CHECK: vpermd
+  %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32> <i32 0, i32 7, i32 2, i32 1, i32 2, i32 7, i32 6, i32 0>
+  ret <8 x i32> %B
+}
+
+
+define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_8x32
+; CHECK: vpermps
+  %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32> <i32 undef, i32 7, i32 2, i32 undef, i32 4, i32 undef, i32 1, i32 6>
+  ret <8 x float> %B
+}
+
+define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_4x64
+; CHECK: vpermq
+  %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i64> %B
+}
+
+define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_4x64
+; CHECK: vpermpd
+  %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x double> %B
+}

Propchange: llvm/trunk/test/CodeGen/X86/avx2-vperm.ll
------------------------------------------------------------------------------
    svn:executable = *





More information about the llvm-commits mailing list