[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86InstrSSE.td

Evan Cheng evan.cheng at apple.com
Tue Mar 28 17:31:03 PST 2006



Changes in directory llvm/lib/Target/X86:

X86ISelLowering.cpp updated: 1.141 -> 1.142
X86InstrSSE.td updated: 1.40 -> 1.41
---
Log message:

- Only use pshufd for v4i32 vector shuffles.
- Other shuffle related fixes.


---
Diffs of the changes:  (+83 -61)

 X86ISelLowering.cpp |   51 +++++++++++++++++++++-------
 X86InstrSSE.td      |   93 +++++++++++++++++++++++++---------------------------
 2 files changed, 83 insertions(+), 61 deletions(-)


Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.141 llvm/lib/Target/X86/X86ISelLowering.cpp:1.142
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.141	Tue Mar 28 17:41:33 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp	Tue Mar 28 19:30:51 2006
@@ -1583,15 +1583,21 @@
   return Mask;
 }
 
-/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as
-/// values in ther permute mask if needed. Return an empty SDOperand is it is
-/// already well formed.
-static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2,
-                                              SDOperand Mask, MVT::ValueType VT,
-                                              SelectionDAG &DAG) {
+/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as
+/// values in ther permute mask if needed. Use V1 as second vector if it is
+/// undef. Return an empty SDOperand is it is already well formed.
+static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2,
+                                        SDOperand Mask, MVT::ValueType VT,
+                                        SelectionDAG &DAG) {
   unsigned NumElems = Mask.getNumOperands();
   SDOperand Half1 = Mask.getOperand(0);
   SDOperand Half2 = Mask.getOperand(NumElems/2);
+  bool V2Undef = false;
+  if (V2.getOpcode() == ISD::UNDEF) {
+    V2Undef = true;
+    V2 = V1;
+  }
+
   if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems &&
       cast<ConstantSDNode>(Half2)->getValue() <  NumElems) {
     // Swap the operands and change mask.
@@ -1604,6 +1610,10 @@
       DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
     return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
   }
+
+  if (V2Undef)
+    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+
   return SDOperand();
 }
 
@@ -2387,8 +2397,26 @@
     MVT::ValueType VT = Op.getValueType();
     unsigned NumElems = PermMask.getNumOperands();
 
-    if (NumElems == 2)
-      return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+    if (X86::isUNPCKLMask(PermMask.Val) ||
+        X86::isUNPCKHMask(PermMask.Val))
+      // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
+      return SDOperand();
+
+    // PSHUFD's 2nd vector must be undef.
+    if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val))
+      if (V2.getOpcode() == ISD::UNDEF)
+        return SDOperand();
+      else
+        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+                           DAG.getNode(ISD::UNDEF, V1.getValueType()),
+                           PermMask);
+
+    if (NumElems == 2 ||
+        X86::isSplatMask(PermMask.Val) ||
+        X86::isSHUFPMask(PermMask.Val)) {
+      return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+    }
+#if 0
     else if (X86::isSplatMask(PermMask.Val)) {
       // Handle splat cases.
       if (V2.getOpcode() == ISD::UNDEF)
@@ -2400,10 +2428,6 @@
         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
                            DAG.getNode(ISD::UNDEF, V1.getValueType()),
                            PermMask);
-    } else if (X86::isUNPCKLMask(PermMask.Val) ||
-               X86::isUNPCKHMask(PermMask.Val)) {
-      // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
-      return SDOperand();
     } else if (X86::isPSHUFDMask(PermMask.Val)) {
       if (V2.getOpcode() == ISD::UNDEF)
         // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD.
@@ -2414,7 +2438,8 @@
                            DAG.getNode(ISD::UNDEF, V1.getValueType()),
                            PermMask);
     } else if (X86::isSHUFPMask(PermMask.Val))
-      return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG);
+      return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+#endif
 
     assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
     abort();


Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.40 llvm/lib/Target/X86/X86InstrSSE.td:1.41
--- llvm/lib/Target/X86/X86InstrSSE.td:1.40	Tue Mar 28 17:51:43 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td	Tue Mar 28 19:30:51 2006
@@ -79,9 +79,8 @@
   return X86::isUNPCKHMask(N);
 }]>;
 
-// Only use PSHUF if it is not a splat.
 def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
-  return !X86::isSplatMask(N) && X86::isPSHUFDMask(N);
+  return X86::isPSHUFDMask(N);
 }], SHUFFLE_get_shuf_imm>;
 
 def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
@@ -918,86 +917,92 @@
                      "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
 def PSHUFDrr : PDIi8<0x70, MRMDestReg,
                      (ops VR128:$dst, VR128:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set VR128:$dst, (v4i32 (vector_shuffle
+                                               VR128:$src1, (undef),
+                                               PSHUFD_shuffle_mask:$src2)))]>;
 def PSHUFDrm : PDIi8<0x70, MRMSrcMem,
                      (ops VR128:$dst, i128mem:$src1, i8imm:$src2),
-                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+                     "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set VR128:$dst, (v4i32 (vector_shuffle
+                                               (load addr:$src1), (undef),
+                                               PSHUFD_shuffle_mask:$src2)))]>;
 
 let isTwoAddress = 1 in {
 def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3),
                      "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v4f32 VR128:$src1), (v4f32 VR128:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v4f32 (vector_shuffle
+                                               VR128:$src1, VR128:$src2,
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, 
                    (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3),
                      "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v4f32 VR128:$src1), (load addr:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v4f32 (vector_shuffle
+                                               VR128:$src1, (load addr:$src2),
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, 
                      (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3),
                      "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v2f64 VR128:$src1), (v2f64 VR128:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v2f64 (vector_shuffle
+                                               VR128:$src1, VR128:$src2,
+                                               SHUFP_shuffle_mask:$src3)))]>;
 def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, 
                      (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3),
                      "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
-                    [(set VR128:$dst, (vector_shuffle
-                                       (v2f64 VR128:$src1), (load addr:$src2),
-                                       SHUFP_shuffle_mask:$src3))]>;
+                     [(set VR128:$dst, (v2f64 (vector_shuffle
+                                               VR128:$src1, (load addr:$src2),
+                                               SHUFP_shuffle_mask:$src3)))]>;
 
 def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpckhps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpckhps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpckhpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKH_shuffle_mask)))]>;
 def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpckhpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKH_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKH_shuffle_mask)))]>;
 
 def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpcklps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpcklps {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v4f32 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpcklpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, VR128:$src2,
+                                              UNPCKL_shuffle_mask)))]>;
 def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                     "unpcklpd {$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst,
-                      (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
-                              UNPCKL_shuffle_mask)))]>;
+                    [(set VR128:$dst, (v2f64 (vector_shuffle
+                                              VR128:$src1, (load addr:$src2),
+                                              UNPCKL_shuffle_mask)))]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1354,11 +1359,3 @@
           (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
 def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm),
           (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>;
-
-// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not.
-def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
-          (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
-      Requires<[HasSSE2]>;
-def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm),
-          (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>,
-      Requires<[HasSSE2]>;






More information about the llvm-commits mailing list