[llvm-commits] [llvm] r145926 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86ISelLowering.h X86InstrFragmentsSIMD.td X86InstrSSE.td

Craig Topper craig.topper at gmail.com
Tue Dec 6 00:21:25 PST 2011


Author: ctopper
Date: Tue Dec  6 02:21:25 2011
New Revision: 145926

URL: http://llvm.org/viewvc/llvm-project?rev=145926&view=rev
Log:
Merge floating point and integer UNPCK X86ISD node types.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145926&r1=145925&r2=145926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec  6 02:21:25 2011
@@ -2851,10 +2851,8 @@
   case X86ISD::MOVDDUP:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLP:
-  case X86ISD::PUNPCKL:
-  case X86ISD::UNPCKHP:
-  case X86ISD::PUNPCKH:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
   case X86ISD::VPERMILP:
   case X86ISD::VPERM2X128:
     return true;
@@ -2914,10 +2912,8 @@
   case X86ISD::MOVLPD:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLP:
-  case X86ISD::PUNPCKL:
-  case X86ISD::UNPCKHP:
-  case X86ISD::PUNPCKH:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
     return DAG.getNode(Opc, dl, VT, V1, V2);
   }
   return SDValue();
@@ -4460,12 +4456,10 @@
       DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                       ShuffleMask);
       break;
-    case X86ISD::PUNPCKH:
-    case X86ISD::UNPCKHP:
+    case X86ISD::UNPCKH:
       DecodeUNPCKHMask(VT, ShuffleMask);
       break;
-    case X86ISD::PUNPCKL:
-    case X86ISD::UNPCKLP:
+    case X86ISD::UNPCKL:
       DecodeUNPCKLMask(VT, ShuffleMask);
       break;
     case X86ISD::MOVHLPS:
@@ -6364,50 +6358,6 @@
                               X86::getShuffleSHUFImmediate(SVOp), DAG);
 }
 
-static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v32i8:
-  case MVT::v16i8:
-  case MVT::v16i16:
-  case MVT::v8i16:
-  case MVT::v4i32:
-  case MVT::v2i64: return X86ISD::PUNPCKL;
-  case MVT::v8i32:
-  case MVT::v4i64:
-    if (HasAVX2)   return X86ISD::PUNPCKL;
-    // else use fp unit for int unpack.
-  case MVT::v8f32:
-  case MVT::v4f32:
-  case MVT::v4f64:
-  case MVT::v2f64: return X86ISD::UNPCKLP;
-  default:
-    llvm_unreachable("Unknown type for unpckl");
-  }
-  return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v32i8:
-  case MVT::v16i8:
-  case MVT::v16i16:
-  case MVT::v8i16:
-  case MVT::v4i32:
-  case MVT::v2i64: return X86ISD::PUNPCKH;
-  case MVT::v4i64:
-  case MVT::v8i32:
-    if (HasAVX2)   return X86ISD::PUNPCKH;
-    // else use fp unit for int unpack.
-  case MVT::v8f32:
-  case MVT::v4f32:
-  case MVT::v4f64:
-  case MVT::v2f64: return X86ISD::UNPCKHP;
-  default:
-    llvm_unreachable("Unknown type for unpckh");
-  }
-  return 0;
-}
-
 static
 SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
                                const TargetLowering &TLI,
@@ -6518,11 +6468,9 @@
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
   if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
       V2IsUndef && RelaxedMayFoldVectorLoad(V1))
@@ -6534,8 +6482,7 @@
   // Use to match splats
   if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
       (VT == MVT::v2f64 || VT == MVT::v2i64))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (X86::isPSHUFDMask(SVOp)) {
     // The actual implementation will match the mask in the if above and then
@@ -6635,12 +6582,10 @@
   }
 
   if (isUNPCKLMask(M, VT, HasAVX2))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
   if (isUNPCKHMask(M, VT, HasAVX2))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
     // Normalize mask so all entries that point to V2 points to its first
@@ -6664,12 +6609,10 @@
     ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
 
     if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
-      return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
-                                  DAG);
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
 
     if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
-      return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
-                                  DAG);
+      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
   }
 
   // Normalize the node to match x86 shuffle ops if needed
@@ -6689,8 +6632,7 @@
   if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
       SVOp->getSplatIndex() == 0 && V2IsUndef) {
     if (VT == MVT::v2f64 || VT == MVT::v2i64)
-      return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                  DAG);
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   }
 
   if (isPSHUFHWMask(M, VT))
@@ -6708,11 +6650,9 @@
                                 X86::getShuffleSHUFImmediate(SVOp), DAG);
 
   if (isUNPCKL_v_undef_Mask(M, VT))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   if (isUNPCKH_v_undef_Mask(M, VT))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   //===--------------------------------------------------------------------===//
   // Generate target specific nodes for 128 or 256-bit shuffles only
@@ -11023,10 +10963,8 @@
   case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
   case X86ISD::MOVSD:              return "X86ISD::MOVSD";
   case X86ISD::MOVSS:              return "X86ISD::MOVSS";
-  case X86ISD::UNPCKLP:            return "X86ISD::UNPCKLP";
-  case X86ISD::UNPCKHP:            return "X86ISD::UNPCKHP";
-  case X86ISD::PUNPCKL:            return "X86ISD::PUNPCKL";
-  case X86ISD::PUNPCKH:            return "X86ISD::PUNPCKH";
+  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
+  case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
   case X86ISD::VPERMILP:           return "X86ISD::VPERMILP";
   case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
@@ -14616,10 +14554,8 @@
   case X86ISD::SHUFPS:      // Handle all target specific shuffles
   case X86ISD::SHUFPD:
   case X86ISD::PALIGN:
-  case X86ISD::PUNPCKH:
-  case X86ISD::UNPCKHP:
-  case X86ISD::PUNPCKL:
-  case X86ISD::UNPCKLP:
+  case X86ISD::UNPCKH:
+  case X86ISD::UNPCKL:
   case X86ISD::MOVHLPS:
   case X86ISD::MOVLHPS:
   case X86ISD::PSHUFD:

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=145926&r1=145925&r2=145926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Dec  6 02:21:25 2011
@@ -273,10 +273,8 @@
       MOVLPD,
       MOVSD,
       MOVSS,
-      UNPCKLP,
-      UNPCKHP,
-      PUNPCKL,
-      PUNPCKH,
+      UNPCKL,
+      UNPCKH,
       VPERMILP,
       VPERM2X128,
       VBROADCAST,

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=145926&r1=145925&r2=145926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Dec  6 02:21:25 2011
@@ -130,11 +130,8 @@
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
-def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>;
-def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
-
-def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
-def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
 def X86VPermilp  : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
 

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145926&r1=145925&r2=145926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Dec  6 02:21:25 2011
@@ -1157,11 +1157,11 @@
                  (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
 
-  // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
   // is during lowering, where it's not possible to recognize the load fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
@@ -1172,10 +1172,10 @@
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (VMOVHPSmr addr:$dst, VR128:$src)>;
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (VMOVHPDmr addr:$dst, VR128:$src)>;
 }
 
@@ -1195,16 +1195,16 @@
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (MOVHPSmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [HasSSE2] in {
-  // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
   // is during lowering, where it's not possible to recognize the load fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
 
@@ -1215,7 +1215,7 @@
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
             (MOVHPDmr addr:$dst, VR128:$src)>;
 }
 
@@ -2431,27 +2431,27 @@
 } // AddedComplexity
 
 let Predicates = [HasSSE1] in {
-  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
             (UNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
             (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
             (UNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
             (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
 }
 
 let Predicates = [HasSSE2] in {
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
             (UNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
             (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
             (UNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
             (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
 
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
@@ -2464,59 +2464,43 @@
 }
 
 let Predicates = [HasAVX] in {
-  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
             (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
             (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
             (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
             (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
 
-  def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),
+  def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
             (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),
+  def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
             (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),
-            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
-            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),
+  def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
             (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),
-            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
-            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),
+  def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
             (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
 
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
             (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
             (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
             (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
             (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
 
-  def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),
-            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),
-            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),
+  def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
             (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),
+  def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
             (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),
-            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),
-            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),
+  def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
             (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),
+  def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
             (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
 
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
@@ -4199,66 +4183,88 @@
 }
 
 let Predicates = [HasAVX] in {
-  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
                                  bc_v4i32, 0>, VEX_4V;
-  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,
+  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
                                  bc_v2i64, 0>, VEX_4V;
 
-  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
                                  bc_v4i32, 0>, VEX_4V;
-  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,
+  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
                                  bc_v2i64, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,
+  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
                                    bc_v32i8>, VEX_4V;
-  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,
+  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
                                    bc_v16i16>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,
+  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
                                    bc_v8i32>, VEX_4V;
-  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,
+  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
                                    bc_v4i64>, VEX_4V;
 
-  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,
+  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
                                    bc_v32i8>, VEX_4V;
-  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,
+  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
                                    bc_v16i16>, VEX_4V;
-  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,
+  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
                                    bc_v8i32>, VEX_4V;
-  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,
+  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
                                    bc_v4i64>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,
+  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
                                 bc_v16i8>;
-  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,
+  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
                                 bc_v8i16>;
-  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,
+  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
                                 bc_v4i32>;
-  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,
+  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
                                 bc_v2i64>;
 
-  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,
+  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
                                 bc_v16i8>;
-  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,
+  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
                                 bc_v8i16>;
-  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,
+  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
                                 bc_v4i32>;
-  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,
+  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
                                 bc_v2i64>;
 }
 } // ExeDomain = SSEPackedInt
 
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
 // Splat v2f64 / v2i64
 let AddedComplexity = 10 in {
   def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),





More information about the llvm-commits mailing list