[llvm-commits] [llvm] r153848 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/ARM/reg_sequence.ll test/CodeGen/CellSPU/rotate_ops.ll test/CodeGen/X86/2011-10-27-tstore.ll test/CodeGen/X86/SwizzleShuff.ll test/CodeGen/X86/vec_shuffle-37.ll test/CodeGen/X86/widen_shuffle-1.ll
Nadav Rotem
nadav.rotem at intel.com
Sun Apr 1 12:31:22 PDT 2012
Author: nadav
Date: Sun Apr 1 14:31:22 2012
New Revision: 153848
URL: http://llvm.org/viewvc/llvm-project?rev=153848&view=rev
Log:
This commit contains a few changes that had to go in together.
1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
(and also scalar_to_vector).
2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src).
Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B))
3. Optimize swizzles of shuffles: shuff(shuff(x, y), undef) -> shuff(x, y).
4. Fix an X86ISelLowering optimization which was very bitcast-sensitive.
Code which was previously compiled to this:
movd (%rsi), %xmm0
movdqa .LCPI0_0(%rip), %xmm2
pshufb %xmm2, %xmm0
movd (%rdi), %xmm1
pshufb %xmm2, %xmm1
pxor %xmm0, %xmm1
pshufb .LCPI0_1(%rip), %xmm1
movd %xmm1, (%rdi)
ret
Now compiles to this:
movl (%rsi), %eax
xorl %eax, (%rdi)
ret
Added:
llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Apr 1 14:31:22 2012
@@ -2336,6 +2336,68 @@
ORNode, N0.getOperand(1));
}
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ && Level == AfterLegalizeVectorOps) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ // If both incoming values are integers, and the original types are the same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+ SDValue In0 = SVN0->getOperand(0);
+ SDValue In1 = SVN1->getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ // Check that both shuffles are swizzles.
+ bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF);
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
+ SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ AddToWorkList(Op.getNode());
+ return Shuff;
+ }
+ }
return SDValue();
}
@@ -7721,6 +7783,36 @@
return N0;
}
}
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ SmallVector<int, 8> NewMask;
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ EVT InVT = N0.getValueType();
+ int InNumElts = InVT.getVectorNumElements();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ // If we access the second (undef) operand then this index can be
+ // canonicalized to undef as well.
+ if (Idx >= InNumElts)
+ Idx = -1;
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ NewMask.push_back(Idx);
+ }
+
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
+ OtherSV->getOperand(1), &NewMask[0]);
+ }
+
return SDValue();
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Apr 1 14:31:22 2012
@@ -14000,13 +14000,14 @@
return SDValue();
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
- if (Mask.getOpcode() != ISD::BITCAST ||
- X.getOpcode() != ISD::BITCAST ||
- Y.getOpcode() != ISD::BITCAST)
- return SDValue();
-
// Look through mask bitcast.
- Mask = Mask.getOperand(0);
+ if (Mask.getOpcode() == ISD::BITCAST)
+ Mask = Mask.getOperand(0);
+ if (X.getOpcode() == ISD::BITCAST)
+ X = X.getOperand(0);
+ if (Y.getOpcode() == ISD::BITCAST)
+ Y = Y.getOperand(0);
+
EVT MaskVT = Mask.getValueType();
// Validate that the Mask operand is a vector sra node.
@@ -14027,8 +14028,6 @@
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
- X = X.getOperand(0);
- Y = Y.getOperand(0);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
Modified: llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/reg_sequence.ll?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/reg_sequence.ll Sun Apr 1 14:31:22 2012
@@ -273,7 +273,7 @@
entry:
; CHECK: t10:
; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
; CHECK: vadd.f32 q8, q8, q8
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
Modified: llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll (original)
+++ llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll Sun Apr 1 14:31:22 2012
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot %t1.s | count 86
+; RUN: grep rot %t1.s | count 85
; RUN: grep roth %t1.s | count 8
; RUN: grep roti.*5 %t1.s | count 1
; RUN: grep roti.*27 %t1.s | count 1
Modified: llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll Sun Apr 1 14:31:22 2012
@@ -3,14 +3,14 @@
target triple = "x86_64-unknown-linux-gnu"
;CHECK: ltstore
-;CHECK: pshufd
-;CHECK: pshufd
-;CHECK: ret
-define void @ltstore() {
+;CHECK: movq
+;CHECK-NEXT: movq
+;CHECK-NEXT: ret
+define void @ltstore(<4 x i32>* %pIn, <2 x i32>* %pOut) {
entry:
- %in = load <4 x i32>* undef
+ %in = load <4 x i32>* %pIn
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- store <2 x i32> %j, <2 x i32>* undef
+ store <2 x i32> %j, <2 x i32>* %pOut
ret void
}
Added: llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll?rev=153848&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll (added)
+++ llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll Sun Apr 1 14:31:22 2012
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+ %A = load <4 x i8>* %pA
+ %B = load <4 x i8>* %pB
+ %C = xor <4 x i8> %A, %B
+ store <4 x i8> %C, <4 x i8>* %pA
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll Sun Apr 1 14:31:22 2012
@@ -27,11 +27,11 @@
define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
entry:
; CHECK: t02
-; CHECK: movaps
-; CHECK: shufps
-; CHECK: pshufd
-; CHECK: movq
-; CHECK: ret
+; CHECK: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: ret
%0 = bitcast <8 x i32>* %source to <4 x i32>*
%arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
%tmp2 = load <4 x i32>* %arrayidx, align 16
Modified: llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll?rev=153848&r1=153847&r2=153848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll Sun Apr 1 14:31:22 2012
@@ -33,7 +33,7 @@
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
entry:
; CHECK: shuf3:
-; CHECK: shufps
+; CHECK: shufd
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
More information about the llvm-commits
mailing list