[llvm-commits] [llvm] r154266 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/rotate_ops.ll test/CodeGen/X86/2011-10-27-tstore.ll test/CodeGen/X86/SwizzleShuff.ll test/CodeGen/X86/vec_compare-2.ll test/CodeGen/X86/vec_shuffle-37.ll test/CodeGen/X86/widen_shuffle-1.ll
Nadav Rotem
nadav.rotem at intel.com
Sat Apr 7 14:19:09 PDT 2012
Author: nadav
Date: Sat Apr 7 16:19:08 2012
New Revision: 154266
URL: http://llvm.org/viewvc/llvm-project?rev=154266&view=rev
Log:
1. Remove the part of r153848 which optimizes shuffle-of-shuffle into a new
shuffle node because it could introduce new shuffle nodes that were not
supported efficiently by the target.
2. Add a more restrictive shuffle-of-shuffle optimization for cases where the
second shuffle reverses the transformation of the first shuffle.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Apr 7 16:19:08 2012
@@ -7795,19 +7795,20 @@
}
// If this shuffle node is simply a swizzle of another shuffle node,
- // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
- SmallVector<int, 8> NewMask;
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // If the source shuffle has more than one user then do not try to optimize
- // it because it may generate a more complex shuffle node. However, if the
- // source shuffle is also a swizzle (a single source shuffle), our
- // transformation is still likely to reduce the number of shuffles and only
- // generate a simple shuffle node.
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF && !N0.hasOneUse())
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the current
+ // shuffle.
+ if (OtherSV->getOperand(0).getValueType() != VT)
return SDValue();
EVT InVT = N0.getValueType();
@@ -7824,11 +7825,12 @@
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
- NewMask.push_back(Idx);
+ // The combined shuffle must map each index to itself.
+ if (Idx != i && Idx != -1)
+ return SDValue();
}
- assert(NewMask.size() == VT.getVectorNumElements() && "Invalid mask size");
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
- OtherSV->getOperand(1), &NewMask[0]);
+
+ return OtherSV->getOperand(0);
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll (original)
+++ llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll Sat Apr 7 16:19:08 2012
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot %t1.s | count 85
+; RUN: grep rot %t1.s | count 86
; RUN: grep roth %t1.s | count 8
; RUN: grep roti.*5 %t1.s | count 1
; RUN: grep roti.*27 %t1.s | count 1
Modified: llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll Sat Apr 7 16:19:08 2012
@@ -4,13 +4,13 @@
;CHECK: ltstore
;CHECK: movq
-;CHECK-NEXT: movq
-;CHECK-NEXT: ret
-define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {
+;CHECK: movq
+;CHECK: ret
+define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
entry:
- %in = load <4 x i32>* %pIn
+ %in = load <4 x i32>* %pA
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- store <2 x i32> %j, <2 x i32>* %pOut
+ store <2 x i32> %j, <2 x i32>* %pB
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll (original)
+++ llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll Sat Apr 7 16:19:08 2012
@@ -41,3 +41,28 @@
store <4 x i8> %C, <4 x i8>* %pA
ret <4 x i8> %C
}
+
+
+
+; CHECK: reverse_1
+; CHECK-NOT: shuf
+; CHECK: ret
+define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
+ %A = load <4 x i32>* %pA
+ %B = load <4 x i32>* %pB
+ %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x i32> %S1
+}
+
+
+; CHECK: no_reverse_shuff
+; CHECK: shuf
+; CHECK: ret
+define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
+ %A = load <4 x i32>* %pA
+ %B = load <4 x i32>* %pB
+ %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+ ret <4 x i32> %S1
+}
Modified: llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_compare-2.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_compare-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_compare-2.ll Sat Apr 7 16:19:08 2012
@@ -10,8 +10,10 @@
entry:
; CHECK: cfi_def_cfa_offset
; CHECK-NOT: set
-; CHECK: pcmpgt
-; CHECK: blendvps
+; CHECK: movzwl
+; CHECK: movzwl
+; CHECK: pshufd
+; CHECK: pshufb
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
%cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
%sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll Sat Apr 7 16:19:08 2012
@@ -27,11 +27,11 @@
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
entry:
; CHECK: t02
-; CHECK: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: ret
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
%0 = bitcast <8 x i32>* %source to <4 x i32>*
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
%tmp2 = load <4 x i32>* %arrayidx, align 16
Modified: llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll?rev=154266&r1=154265&r2=154266&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll Sat Apr 7 16:19:08 2012
@@ -33,7 +33,7 @@
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
entry:
; CHECK: shuf3:
-; CHECK: shufd
+; CHECK: shufps
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list