[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86InstrSSE.td

Evan Cheng evan.cheng at apple.com
Thu Mar 30 11:55:08 PST 2006



Changes in directory llvm/lib/Target/X86:

X86ISelLowering.cpp updated: 1.145 -> 1.146
X86InstrSSE.td updated: 1.49 -> 1.50
---
Log message:

Make sure all possible shuffles are matched.
Use pshufd, pshuhw, and pshulw to shuffle v4f32 if shufps doesn't match.
Use shufps to shuffle v4f32 if pshufd, pshuhw, and pshulw don't match.


---
Diffs of the changes:  (+89 -30)

 X86ISelLowering.cpp |   53 ++++++++++++++++++++++++++---------------
 X86InstrSSE.td      |   66 +++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 89 insertions(+), 30 deletions(-)


Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.145 llvm/lib/Target/X86/X86ISelLowering.cpp:1.146
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.145	Wed Mar 29 17:07:14 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp	Thu Mar 30 13:54:57 2006
@@ -1486,23 +1486,17 @@
   if (NumElems != 4) return false;
 
   // Each half must refer to only one of the vector.
-  SDOperand Elt = N->getOperand(0);
-  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
-  for (unsigned i = 1; i < NumElems / 2; ++i) {
+  for (unsigned i = 0; i < 2; ++i) {
     assert(isa<ConstantSDNode>(N->getOperand(i)) &&
            "Invalid VECTOR_SHUFFLE mask!");
-    if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != 
-        cast<ConstantSDNode>(Elt)->getValue())
-      return false;
+    unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+    if (Val >= 4) return false;
   }
-  Elt = N->getOperand(NumElems / 2);
-  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
-  for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
+  for (unsigned i = 2; i < 4; ++i) {
     assert(isa<ConstantSDNode>(N->getOperand(i)) &&
            "Invalid VECTOR_SHUFFLE mask!");
-    if (cast<ConstantSDNode>(N->getOperand(i))->getValue() != 
-        cast<ConstantSDNode>(Elt)->getValue())
-      return false;
+    unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+    if (Val < 4) return false;
   }
 
   return true;
@@ -2489,11 +2483,7 @@
     unsigned NumElems = PermMask.getNumOperands();
 
     // Splat && PSHUFD's 2nd vector must be undef.
-    if (X86::isSplatMask(PermMask.Val) ||
-        ((MVT::isInteger(VT) &&
-          (X86::isPSHUFDMask(PermMask.Val) ||
-           X86::isPSHUFHWMask(PermMask.Val) ||
-           X86::isPSHUFLWMask(PermMask.Val))))) {
+    if (X86::isSplatMask(PermMask.Val)) {
       if (V2.getOpcode() != ISD::UNDEF)
         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
                            DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
@@ -2505,9 +2495,34 @@
       // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
       return SDOperand();
 
-    if (NumElems == 2 ||
-        X86::isSHUFPMask(PermMask.Val)) {
+    if (NumElems == 2)
       return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+
+    // If VT is integer, try PSHUF* first, then SHUFP*.
+    if (MVT::isInteger(VT)) {
+      if (X86::isPSHUFDMask(PermMask.Val) ||
+          X86::isPSHUFHWMask(PermMask.Val) ||
+          X86::isPSHUFLWMask(PermMask.Val)) {
+        if (V2.getOpcode() != ISD::UNDEF)
+          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+        return SDOperand();
+      }
+
+      if (X86::isSHUFPMask(PermMask.Val))
+        return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+    } else {
+      // Floating point cases in the other order.
+      if (X86::isSHUFPMask(PermMask.Val))
+        return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+      if (X86::isPSHUFDMask(PermMask.Val) ||
+          X86::isPSHUFHWMask(PermMask.Val) ||
+          X86::isPSHUFLWMask(PermMask.Val)) {
+        if (V2.getOpcode() != ISD::UNDEF)
+          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+        return SDOperand();
+      }
     }
 
     assert(0 && "Unexpected VECTOR_SHUFFLE to lower");


Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.49 llvm/lib/Target/X86/X86InstrSSE.td:1.50
--- llvm/lib/Target/X86/X86InstrSSE.td:1.49	Thu Mar 30 01:33:32 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td	Thu Mar 30 13:54:57 2006
@@ -106,14 +106,32 @@
   return X86::isPSHUFLWMask(N);
 }], SHUFFLE_get_pshuflw_imm>;
 
+// Only use PSHUF* for v4f32 if SHUFP does not match.
+def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{
+  return !X86::isSHUFPMask(N) &&
+          X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+  return !X86::isSHUFPMask(N) &&
+          X86::isPSHUFHWMask(N);
+}], SHUFFLE_get_pshufhw_imm>;
+
+def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+  return !X86::isSHUFPMask(N) &&
+          X86::isPSHUFLWMask(N);
+}], SHUFFLE_get_pshuflw_imm>;
+
 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
   return X86::isSHUFPMask(N);
 }], SHUFFLE_get_shuf_imm>;
 
-// Only use SHUFP for v4i32 if no other options are available.
-// FIXME: add tblgen hook to reduce the complexity of pattern.
-def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
-  return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
+// Only use SHUFP for v4i32 if PSHUF* do not match.
+def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{
+  return !X86::isPSHUFDMask(N) &&
+         !X86::isPSHUFHWMask(N) &&
+         !X86::isPSHUFLWMask(N) &&
+          X86::isSHUFPMask(N);
 }], SHUFFLE_get_shuf_imm>;
 
 //===----------------------------------------------------------------------===//
@@ -1278,14 +1296,14 @@
 // SSE2 with ImmT == Imm8 and XD prefix.
 def PSHUFLWrr : Ii8<0x70, MRMDestReg,
                     (ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
-                    "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                               VR128:$src1, (undef),
                                               PSHUFLW_shuffle_mask:$src2)))]>,
                 XD, Requires<[HasSSE2]>;
 def PSHUFLWrm : Ii8<0x70, MRMDestMem,
                     (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
-                    "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+                    "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (v8i16 (vector_shuffle
                                      (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
                                               PSHUFLW_shuffle_mask:$src2)))]>,
@@ -1593,15 +1611,41 @@
           (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
       Requires<[HasSSE1]>;
 
-// Shuffle v4i32 if others do not match
+// Shuffle v4i32 with SHUFP* if others do not match.
 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
-           SHUFP_shuffle_mask:$sm),
+           SHUFP_int_shuffle_mask:$sm),
           (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
-                  SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+                  SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
 def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
-           SHUFP_shuffle_mask:$sm),
+           SHUFP_int_shuffle_mask:$sm),
           (v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
-                  SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+                  SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+
+// Shuffle v4f32 with PSHUF* if others do not match.
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+           PSHUFD_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFDrr VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+           PSHUFD_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFDrm addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+           PSHUFHW_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFHWrr VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+           PSHUFHW_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFHWrm addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+           PSHUFLW_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFLWrr VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+           PSHUFLW_fp_shuffle_mask:$sm),
+          (v4f32 (PSHUFLWrm addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+      Requires<[HasSSE2]>;
 
 // Logical ops
 def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),






More information about the llvm-commits mailing list