[llvm-commits] [llvm] r153848 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/ARM/reg_sequence.ll test/CodeGen/CellSPU/rotate_ops.ll test/CodeGen/X86/2011-10-27-tstore.ll test/CodeGen/X86

Sun Apr 1 15:00:26 PDT 2012

On Sun, Apr 1, 2012 at 12:31 PM, Nadav Rotem <nadav.rotem at intel.com> wrote:
> Author: nadav
> Date: Sun Apr  1 14:31:22 2012
> New Revision: 153848
>
> URL: http://llvm.org/viewvc/llvm-project?rev=153848&view=rev
> Log:
> This commit contains a few changes that had to go in together.
>
> 1. Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
>   (and also scalar_to_vector).
>
> 2. Xor/and/or are indifferent to the swizzle operation (shuffle of one src).
>   Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A, B))
>
> 3. Optimize swizzles of shuffles:  shuff(shuff(x, y), undef) -> shuff(x, y).
>
> 4. Fix an X86ISelLowering optimization which was very bitcast-sensitive.
>
> Code which was previously compiled to this:
>
> movd    (%rsi), %xmm0
> movdqa  .LCPI0_0(%rip), %xmm2
> pshufb  %xmm2, %xmm0
> movd    (%rdi), %xmm1
> pshufb  %xmm2, %xmm1
> pxor    %xmm0, %xmm1
> pshufb  .LCPI0_1(%rip), %xmm1
> movd    %xmm1, (%rdi)
> ret
>
> Now compiles to this:
>
> movl    (%rsi), %eax
> xorl    %eax, (%rdi)
> ret
>
>
>
> Added:
>    llvm/trunk/test/CodeGen/X86/SwizzleShuff.ll
> Modified:
>    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/test/CodeGen/ARM/reg_sequence.ll
>    llvm/trunk/test/CodeGen/CellSPU/rotate_ops.ll
>    llvm/trunk/test/CodeGen/X86/2011-10-27-tstore.ll
>    llvm/trunk/test/CodeGen/X86/vec_shuffle-37.ll
>    llvm/trunk/test/CodeGen/X86/widen_shuffle-1.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=153848&r1=153847&r2=153848&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Apr  1 14:31:22 2012
> @@ -2336,6 +2336,68 @@
>                        ORNode, N0.getOperand(1));
>   }
>
> +  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
> +  // Only perform this optimization after type legalization and before
> +  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
> +  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
> +  // we don't want to undo this promotion.
> +  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
> +  // on scalars.
> +  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
> +      && Level == AfterLegalizeVectorOps) {
> +    SDValue In0 = N0.getOperand(0);
> +    SDValue In1 = N1.getOperand(0);
> +    EVT In0Ty = In0.getValueType();
> +    EVT In1Ty = In1.getValueType();
> +    // If both incoming values are integers, and the original types are the same.
> +    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
> +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
> +      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
> +      AddToWorkList(Op.getNode());
> +      return BC;
> +    }
> +  }
> +
> +  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
> +  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
> +  // If both shuffles use the same mask, and both shuffle within a single
> +  // vector, then it is worthwhile to move the swizzle after the operation.
> +  // The type-legalizer generates this pattern when loading illegal
> +  // vector types from memory. In many cases this allows additional shuffle
> +  // optimizations.
> +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
> +    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
> +    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
> +    SDValue In0 = SVN0->getOperand(0);
> +    SDValue In1 = SVN1->getOperand(0);
> +    EVT In0Ty = In0.getValueType();
> +    EVT In1Ty = In1.getValueType();
> +
> +    unsigned NumElts = VT.getVectorNumElements();
> +    // Check that both shuffles are swizzles.
> +    bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF &&
> +                           N1.getOperand(1).getOpcode() == ISD::UNDEF);
> +
> +    // Check that both shuffles use the same mask. The masks are known to be of
> +    // the same length because the result vector type is the same.
> +    bool SameMask = true;
> +    for (unsigned i = 0; i != NumElts; ++i) {
> +      int Idx0 = SVN0->getMaskElt(i);
> +      int Idx1 = SVN1->getMaskElt(i);
> +      if (Idx0 != Idx1) {
> +        SameMask = false;
> +        break;
> +      }
> +    }
> +
> +    if (SameMask && SingleVecShuff && In0Ty == In1Ty) {
> +      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1);
> +      SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
> +                                          DAG.getUNDEF(VT), &SVN0->getMask()[0]);
> +      AddToWorkList(Op.getNode());
> +      return Shuff;
> +    }
> +  }
>   return SDValue();
>  }
>
> @@ -7721,6 +7783,36 @@
>         return N0;
>     }
>   }
> +
> +  // If this shuffle node is simply a swizzle of another shuffle node,
> +  // optimize shuffle(shuffle(x, y), undef) -> shuffle(x, y).
> +  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
> +      N1.getOpcode() == ISD::UNDEF) {
> +
> +    SmallVector<int, 8> NewMask;
> +    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
> +
> +    EVT InVT = N0.getValueType();
> +    int InNumElts = InVT.getVectorNumElements();
> +
> +    for (unsigned i = 0; i != NumElts; ++i) {
> +      int Idx = SVN->getMaskElt(i);
> +      // If we access the second (undef) operand then this index can be
> +      // canonicalized to undef as well.
> +      if (Idx >= InNumElts)
> +        Idx = -1;
> +      // Next, this index comes from the first value, which is the incoming
> +      // shuffle. Adopt the incoming index.
> +      if (Idx >= 0)
> +        Idx = OtherSV->getMaskElt(Idx);
> +
> +      NewMask.push_back(Idx);
> +    }
> +
> +    return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
> +                                OtherSV->getOperand(1), &NewMask[0]);
> +  }

I'm a bit worried about this transformation... our shuffle-matching
code isn't perfect, so this could easily turn two shuffles we know how
to match into a shuffle we generate really bad code for.

-Eli