[llvm-commits] [llvm] r53939 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_insert-2.ll test/CodeGen/X86/vec_insert-6.ll test/CodeGen/X86/vec_shuffle-19.ll

Evan Cheng evan.cheng at apple.com
Tue Jul 22 17:22:17 PDT 2008


Author: evancheng
Date: Tue Jul 22 19:22:17 2008
New Revision: 53939

URL: http://llvm.org/viewvc/llvm-project?rev=53939&view=rev
Log:
Fix PR2485: do all 4-element SSE shuffles in max. of 2 shuffle instructions.
Based on patch by Nicolas Capens.

Added:
    llvm/trunk/test/CodeGen/X86/vec_shuffle-19.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vec_insert-2.ll
    llvm/trunk/test/CodeGen/X86/vec_insert-6.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=53939&r1=53938&r2=53939&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 22 19:22:17 2008
@@ -3655,13 +3655,8 @@
   SmallVector<std::pair<int, int>, 8> Locs;
   Locs.reserve(4);
   SmallVector<SDOperand, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT));
-  SmallVector<SDOperand, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT));
   unsigned NumHi = 0;
   unsigned NumLo = 0;
-  // If no more than two elements come from either vector. This can be
-  // implemented with two shuffles. First shuffle gather the elements.
-  // The second shuffle, which takes the first shuffle as both of its
-  // vector operands, put the elements into the right order.
   for (unsigned i = 0; i != 4; ++i) {
     SDOperand Elt = PermMask.getOperand(i);
     if (Elt.getOpcode() == ISD::UNDEF) {
@@ -3680,10 +3675,17 @@
       }
     }
   }
+
   if (NumLo <= 2 && NumHi <= 2) {
+    // If no more than two elements come from either vector. This can be
+    // implemented with two shuffles. First shuffle gather the elements.
+    // The second shuffle, which takes the first shuffle as both of its
+    // vector operands, put the elements into the right order.
     V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
                      DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
                                  &Mask1[0], Mask1.size()));
+
+    SmallVector<SDOperand, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT));
     for (unsigned i = 0; i != 4; ++i) {
       if (Locs[i].first == -1)
         continue;
@@ -3697,6 +3699,59 @@
     return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
                        DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
                                    &Mask2[0], Mask2.size()));
+  } else if (NumLo == 3 || NumHi == 3) {
+    // Otherwise, we must have three elements from one vector, call it X, and
+    // one element from the other, call it Y.  First, use a shufps to build an
+    // intermediate vector with the one element from Y and the element from X
+    // that will be in the same half in the final destination (the indexes don't
+    // matter). Then, use a shufps to build the final vector, taking the half
+    // containing the element from Y from the intermediate, and the other half
+    // from X.
+    if (NumHi == 3) {
+      // Normalize it so the 3 elements come from V1.
+      PermMask = CommuteVectorShuffleMask(PermMask, DAG);
+      std::swap(V1, V2);
+    }
+
+    // Find the element from V2.
+    unsigned HiIndex;
+    for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+      SDOperand Elt = PermMask.getOperand(HiIndex);
+      if (Elt.getOpcode() == ISD::UNDEF)
+        continue;
+      unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
+      if (Val >= 4)
+        break;
+    }
+
+    Mask1[0] = PermMask.getOperand(HiIndex);
+    Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT);
+    Mask1[2] = PermMask.getOperand(HiIndex^1);
+    Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT);
+    V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
+
+    if (HiIndex >= 2) {
+      Mask1[0] = PermMask.getOperand(0);
+      Mask1[1] = PermMask.getOperand(1);
+      Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT);
+      Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT);
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
+                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
+    } else {
+      Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT);
+      Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT);
+      Mask1[2] = PermMask.getOperand(2);
+      Mask1[3] = PermMask.getOperand(3);
+      if (Mask1[2].getOpcode() != ISD::UNDEF)
+        Mask1[2] = DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getValue()+4,
+                                   MaskEVT);
+      if (Mask1[3].getOpcode() != ISD::UNDEF)
+        Mask1[3] = DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getValue()+4,
+                                   MaskEVT);
+      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1,
+                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
+    }
   }
 
   // Break it into (shuffle shuffle_hi, shuffle_lo).

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-2.ll?rev=53939&r1=53938&r2=53939&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-2.ll Tue Jul 22 19:22:17 2008
@@ -1,6 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {\$132,} | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {\$2,}  | count 2
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep shufps | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {\$36,} | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep shufps | count 2
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 1
 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movhpd | count 1
 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep unpcklpd | count 1

Modified: llvm/trunk/test/CodeGen/X86/vec_insert-6.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_insert-6.ll?rev=53939&r1=53938&r2=53939&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_insert-6.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_insert-6.ll Tue Jul 22 19:22:17 2008
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 6
 
 define <4 x float> @t3(<4 x float>* %P) nounwind  {
 	%tmp1 = load <4 x float>* %P

Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-19.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-19.ll?rev=53939&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-19.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-19.ll Tue Jul 22 19:22:17 2008
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 4
+; PR2485
+
+define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %shuffle
+}





More information about the llvm-commits mailing list