[llvm-commits] [llvm] r145028 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-unpack.ll

Craig Topper craig.topper at gmail.com
Mon Nov 21 00:26:50 PST 2011


Author: ctopper
Date: Mon Nov 21 02:26:50 2011
New Revision: 145028

URL: http://llvm.org/viewvc/llvm-project?rev=145028&view=rev
Log:
Lowering for v32i8 to VPUNPCKLBW/VPUNPCKHBW when AVX2 is enabled.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-unpack.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145028&r1=145027&r2=145028&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 21 02:26:50 2011
@@ -2852,6 +2852,7 @@
   case X86ISD::PUNPCKLDQ:
   case X86ISD::PUNPCKLQDQ:
   case X86ISD::VPUNPCKLWDY:
+  case X86ISD::VPUNPCKLBWY:
   case X86ISD::VPUNPCKLDQY:
   case X86ISD::VPUNPCKLQDQY:
   case X86ISD::UNPCKHPS:
@@ -2863,6 +2864,7 @@
   case X86ISD::PUNPCKHDQ:
   case X86ISD::PUNPCKHQDQ:
   case X86ISD::VPUNPCKHWDY:
+  case X86ISD::VPUNPCKHBWY:
   case X86ISD::VPUNPCKHDQY:
   case X86ISD::VPUNPCKHQDQY:
   case X86ISD::VPERMILPS:
@@ -2939,6 +2941,7 @@
   case X86ISD::PUNPCKLDQ:
   case X86ISD::PUNPCKLQDQ:
   case X86ISD::VPUNPCKLWDY:
+  case X86ISD::VPUNPCKLBWY:
   case X86ISD::VPUNPCKLDQY:
   case X86ISD::VPUNPCKLQDQY:
   case X86ISD::UNPCKHPS:
@@ -2950,6 +2953,7 @@
   case X86ISD::PUNPCKHDQ:
   case X86ISD::PUNPCKHQDQ:
   case X86ISD::VPUNPCKHWDY:
+  case X86ISD::VPUNPCKHBWY:
   case X86ISD::VPUNPCKHDQY:
   case X86ISD::VPUNPCKHQDQY:
     return DAG.getNode(Opc, dl, VT, V1, V2);
@@ -3569,7 +3573,7 @@
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || NumElts != 16))
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3619,7 +3623,7 @@
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || NumElts != 16))
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -4639,6 +4643,7 @@
     case X86ISD::PUNPCKHWD:
     case X86ISD::PUNPCKHDQ:
     case X86ISD::PUNPCKHQDQ:
+    case X86ISD::VPUNPCKHBWY:
     case X86ISD::VPUNPCKHWDY:
     case X86ISD::VPUNPCKHDQY:
     case X86ISD::VPUNPCKHQDQY:
@@ -4654,6 +4659,7 @@
     case X86ISD::PUNPCKLWD:
     case X86ISD::PUNPCKLDQ:
     case X86ISD::PUNPCKLQDQ:
+    case X86ISD::VPUNPCKLBWY:
     case X86ISD::VPUNPCKLWDY:
     case X86ISD::VPUNPCKLDQY:
     case X86ISD::VPUNPCKLQDQY:
@@ -6595,6 +6601,7 @@
   case MVT::v16i8: return X86ISD::PUNPCKLBW;
   case MVT::v8i16: return X86ISD::PUNPCKLWD;
   case MVT::v16i16: return X86ISD::VPUNPCKLWDY;
+  case MVT::v32i8: return X86ISD::VPUNPCKLBWY;
   default:
     llvm_unreachable("Unknown type for unpckl");
   }
@@ -6618,6 +6625,7 @@
   case MVT::v16i8: return X86ISD::PUNPCKHBW;
   case MVT::v8i16: return X86ISD::PUNPCKHWD;
   case MVT::v16i16: return X86ISD::VPUNPCKHWDY;
+  case MVT::v32i8: return X86ISD::VPUNPCKHBWY;
   default:
     llvm_unreachable("Unknown type for unpckh");
   }
@@ -11270,6 +11278,7 @@
   case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
   case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
   case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
+  case X86ISD::VPUNPCKLBWY:        return "X86ISD::VPUNPCKLBWY";
   case X86ISD::VPUNPCKLWDY:        return "X86ISD::VPUNPCKLWDY";
   case X86ISD::VPUNPCKLDQY:        return "X86ISD::VPUNPCKLDQY";
   case X86ISD::VPUNPCKLQDQY:       return "X86ISD::VPUNPCKLQDQY";
@@ -11277,6 +11286,7 @@
   case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
   case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
   case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
+  case X86ISD::VPUNPCKHBWY:        return "X86ISD::VPUNPCKHBWY";
   case X86ISD::VPUNPCKHWDY:        return "X86ISD::VPUNPCKHWDY";
   case X86ISD::VPUNPCKHDQY:        return "X86ISD::VPUNPCKHDQY";
   case X86ISD::VPUNPCKHQDQY:       return "X86ISD::VPUNPCKHQDQY";
@@ -14867,6 +14877,7 @@
   case X86ISD::PUNPCKHWD:
   case X86ISD::PUNPCKHDQ:
   case X86ISD::PUNPCKHQDQ:
+  case X86ISD::VPUNPCKHBWY:
   case X86ISD::VPUNPCKHWDY:
   case X86ISD::VPUNPCKHDQY:
   case X86ISD::VPUNPCKHQDQY:
@@ -14878,6 +14889,7 @@
   case X86ISD::PUNPCKLWD:
   case X86ISD::PUNPCKLDQ:
   case X86ISD::PUNPCKLQDQ:
+  case X86ISD::VPUNPCKLBWY:
   case X86ISD::VPUNPCKLWDY:
   case X86ISD::VPUNPCKLDQY:
   case X86ISD::VPUNPCKLQDQY:

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=145028&r1=145027&r2=145028&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Nov 21 02:26:50 2011
@@ -285,6 +285,7 @@
       PUNPCKLWD,
       PUNPCKLDQ,
       PUNPCKLQDQ,
+      VPUNPCKLBWY,
       VPUNPCKLWDY,
       VPUNPCKLDQY,
       VPUNPCKLQDQY,
@@ -292,6 +293,7 @@
       PUNPCKHWD,
       PUNPCKHDQ,
       PUNPCKHQDQ,
+      VPUNPCKHBWY,
       VPUNPCKHWDY,
       VPUNPCKHDQY,
       VPUNPCKHQDQY,

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=145028&r1=145027&r2=145028&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Nov 21 02:26:50 2011
@@ -144,6 +144,7 @@
 def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
 def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
 def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+def X86Punpcklbwy  : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>;
 def X86Punpcklwdy  : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>;
 def X86Punpckldqy  : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>;
 def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>;
@@ -152,6 +153,7 @@
 def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
 def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
 def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Punpckhbwy  : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>;
 def X86Punpckhwdy  : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>;
 def X86Punpckhdqy  : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>;
 def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>;

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145028&r1=145027&r2=145028&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Nov 21 02:26:50 2011
@@ -4204,19 +4204,8 @@
                                  bc_v8i16, 0>, VEX_4V;
   defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
                                  bc_v4i32, 0>, VEX_4V;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
-            (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
-                                                    VR128:$src2)))]>, VEX_4V;
-  def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
-            (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
-                                        (memopv2i64 addr:$src2))))]>, VEX_4V;
+  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+                                 bc_v2i64, 0>, VEX_4V;
 
   defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
                                  bc_v16i8, 0>, VEX_4V;
@@ -4224,99 +4213,40 @@
                                  bc_v8i16, 0>, VEX_4V;
   defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
                                  bc_v4i32, 0>, VEX_4V;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
-             (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
-                                                     VR128:$src2)))]>, VEX_4V;
-  def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
-             (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
-                                        (memopv2i64 addr:$src2))))]>, VEX_4V;
+  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+                                 bc_v2i64, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy,
                                    bc_v32i8>, VEX_4V;
   defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy,
                                    bc_v16i16>, VEX_4V;
   defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy,
                                    bc_v8i32>, VEX_4V;
+  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy,
+                                   bc_v4i64>, VEX_4V;
 
-  /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
-            (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
-                                                     VR256:$src2)))]>, VEX_4V;
-  def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
-            (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1,
-                                        (memopv4i64 addr:$src2))))]>, VEX_4V;
-
-  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy,
                                    bc_v32i8>, VEX_4V;
   defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy,
                                    bc_v16i16>, VEX_4V;
   defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy,
                                    bc_v8i32>, VEX_4V;
-
-  /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
-             (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
-                                                      VR256:$src2)))]>, VEX_4V;
-  def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
-             (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1,
-                                        (memopv4i64 addr:$src2))))]>, VEX_4V;
+  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy,
+                                   bc_v4i64>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
   defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
   defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpcklqdq VR128:$src1,
-                                         (memopv2i64 addr:$src2))))]>;
+  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>;
 
   defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
   defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
   defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpckhqdq VR128:$src1,
-                                         (memopv2i64 addr:$src2))))]>;
+  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>;
 }
 } // ExeDomain = SSEPackedInt
 

Modified: llvm/trunk/test/CodeGen/X86/avx2-unpack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-unpack.ll?rev=145028&r1=145027&r2=145028&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-unpack.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Mon Nov 21 02:26:50 2011
@@ -41,3 +41,17 @@
   %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
   ret <16 x i16> %shuffle.i
 }
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}





More information about the llvm-commits mailing list