[llvm-commits] [llvm] r153848 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/ARM/reg_sequence.ll test/CodeGen/CellSPU/rotate_ops.ll test/CodeGen/X86/2011-10-27-tstore.ll test/CodeGen/X86
Eli Friedman
eli.friedman at gmail.com
Sun Apr 1 15:00:26 PDT 2012
On Sun, Apr 1, 2012 at 12:31 PM, Nadav Rotem <nadav.rotem at intel.com> wrote:
> Author: nadav
> Date: Sun Apr 1 14:31:22 2012
> New Revision: 153848
>
> URL: http://llvm.org/viewvc/llvm-project?rev=153848&view=rev
> Log:
> This commit contains a few changes that had to go in together.
>
> 1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
> (and also scalar_to_vector).
>
> 2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src).
> Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B))
>
> 3. Optimize swizzles of shuffles: shuff(shuff(x, y), undef) -> shuff(x, y).
>
> 4. Fix an X86ISelLowering optimization which was very bitcast-sensitive.
>
> Code which was previously compiled to this:
>
> movd (%rsi), %xmm0
> movdqa .LCPI0_0(%rip), %xmm2
> pshufb %xmm2, %xmm0
> movd (%rdi), %xmm1
> pshufb %xmm2, %xmm1
> pxor %xmm0, %xmm1
> pshufb .LCPI0_1(%rip), %xmm1
> movd %xmm1, (%rdi)
> ret
>
> Now compiles to this:
>
> movl (%rsi), %eax
> xorl %eax, (%rdi)
> ret
>
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
> llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
> llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
> llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
> llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=153848&r1=153847&r2=153848&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Apr 1 14:31:22 2012
> @@ -2336,6 +2336,68 @@
> ORNode, N0.getOperand(1));
> }
>
> + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
> + // Only perform this optimization after type legalization and before
> + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
> + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
> + // we don't want to undo this promotion.
> + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
> + // on scalars.
> + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
> + && Level == AfterLegalizeVectorOps) {
> + SDValue In0 = N0.getOperand(0);
> + SDValue In1 = N1.getOperand(0);
> + EVT In0Ty = In0.getValueType();
> + EVT In1Ty = In1.getValueType();
> + // If both incoming values are integers, and the original types are the same.
> + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
> + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
> + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
> + AddToWorkList(Op.getNode());
> + return BC;
> + }
> + }
> +
> + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
> + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
> + // If both shuffles use the same mask, and both shuffle within a single
> + // vector, then it is worthwhile to move the swizzle after the operation.
> + // The type-legalizer generates this pattern when loading illegal
> + // vector types from memory. In many cases this allows additional shuffle
> + // optimizations.
> + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
> + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
> + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
> + SDValue In0 = SVN0->getOperand(0);
> + SDValue In1 = SVN1->getOperand(0);
> + EVT In0Ty = In0.getValueType();
> + EVT In1Ty = In1.getValueType();
> +
> + unsigned NumElts = VT.getVectorNumElements();
> + // Check that both shuffles are swizzles.
> + bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
> + N1.getOperand(1).getOpcode() == ISD::UNDEF);
> +
> + // Check that both shuffles use the same mask. The masks are known to be of
> + // the same length because the result vector type is the same.
> + bool SameMask = true;
> + for (unsigned i = 0; i != NumElts; ++i) {
> + int Idx0 = SVN0->getMaskElt(i);
> + int Idx1 = SVN1->getMaskElt(i);
> + if (Idx0 != Idx1) {
> + SameMask = false;
> + break;
> + }
> + }
> +
> + if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
> + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
> + SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
> + DAG.getUNDEF(VT), &SVN0->getMask()[0]);
> + AddToWorkList(Op.getNode());
> + return Shuff;
> + }
> + }
> return SDValue();
> }
>
> @@ -7721,6 +7783,36 @@
> return N0;
> }
> }
> +
> + // If this shuffle node is simply a swizzle of another shuffle node,
> + // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
> + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
> + N1.getOpcode() == ISD::UNDEF) {
> +
> + SmallVector<int, 8> NewMask;
> + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
> +
> + EVT InVT = N0.getValueType();
> + int InNumElts = InVT.getVectorNumElements();
> +
> + for (unsigned i = 0; i != NumElts; ++i) {
> + int Idx = SVN->getMaskElt(i);
> + // If we access the second (undef) operand then this index can be
> + // canonicalized to undef as well.
> + if (Idx >= InNumElts)
> + Idx = -1;
> + // Next, this index comes from the first value, which is the incoming
> + // shuffle. Adopt the incoming index.
> + if (Idx >= 0)
> + Idx = OtherSV->getMaskElt(Idx);
> +
> + NewMask.push_back(Idx);
> + }
> +
> + return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
> + OtherSV->getOperand(1), &NewMask[0]);
> + }
I'm a bit worried about this transformation... our shuffle-matching
code isn't perfect, so this could easily turn two shuffles we know how
to match into a shuffle we generate really bad code for.
-Eli
More information about the llvm-commits
mailing list