[llvm] r212808 - [X86] Fix the inversion of low and high bits for the lowering of MUL_LOHI.

Wed Jul 23 16:36:10 PDT 2014

Hi Quentin,

Could you take a look at http://llvm.org/bugs/show_bug.cgi?id=20421? I
wonder if this revision could cause that.

On Fri, Jul 11, 2014 at 5:08 AM, Quentin Colombet <qcolombet at apple.com>
wrote:

> Author: qcolombet
> Date: Fri Jul 11 07:08:23 2014
> New Revision: 212808
>
> URL: http://llvm.org/viewvc/llvm-project?rev=212808&view=rev
> Log:
> [X86] Fix the inversion of low and high bits for the lowering of MUL_LOHI.
> Also add a few comments.
>
> <rdar://problem/17581756>
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/vector-idiv.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=212808&r1=212807&r2=212808&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jul 11 07:08:23 2014
> @@ -15156,10 +15156,23 @@ static SDValue LowerMUL_LOHI(SDValue Op,
>    assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
>           (VT == MVT::v8i32 && Subtarget->hasInt256()));
>
> -  // Get the high parts.
> +  // PMULxD operations multiply each even value (starting at 0) of LHS
> with
> +  // the related value of RHS and produce a widen result.
> +  // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
> +  // => <2 x i64> <ae|cg>
> +  //
> +  // In other word, to have all the results, we need to perform two
> PMULxD:
> +  // 1. one with the even values.
> +  // 2. one with the odd values.
> +  // To achieve #2, with need to place the odd values at an even position.
> +  //
> +  // Place the odd value at an even position (basically, shift all values
> 1
> +  // step to the left):
>    const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
> -  SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
> -  SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
> +  // <a|b|c|d> => <b|undef|d|undef>
> +  SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
> +  // <e|f|g|h> => <f|undef|h|undef>
> +  SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
>
>    // Emit two multiplies, one for the lower 2 ints and one for the higher
> 2
>    // ints.
> @@ -15167,22 +15180,39 @@ static SDValue LowerMUL_LOHI(SDValue Op,
>    bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
>    unsigned Opcode =
>        (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ :
> X86ISD::PMULDQ;
> +  // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
> +  // => <2 x i64> <ae|cg>
>    SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
>                               DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
> +  // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
> +  // => <2 x i64> <bf|dh>
>    SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
> -                             DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
> +                             DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
>
>    // Shuffle it back into the right order.
> +  // The internal representation is big endian.
> +  // In other words, a i64 bitcasted to 2 x i32 has its high part at
> index 0
> +  // and its low part at index 1.
> +  // Moreover, we have: Mul1 = <ae|cg> ; Mul2 = <bf|dh>
> +  // Vector index                0 1   ;          2 3
> +  // We want      <ae|bf|cg|dh>
> +  // Vector index   0  2  1  3
> +  // Since each element is seen as 2 x i32, we get:
> +  // high_mask[i] = 2 x vector_index[i]
> +  // low_mask[i] = 2 x vector_index[i] + 1
> +  // where vector_index = {0, Size/2, 1, Size/2 + 1, ...,
> +  //                       Size/2 - 1, Size/2 + Size/2 - 1}
> +  // where Size is the number of element of the final vector.
>    SDValue Highs, Lows;
>    if (VT == MVT::v8i32) {
> -    const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
> +    const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
>      Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
> -    const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
> +    const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
>      Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
>    } else {
> -    const int HighMask[] = {1, 5, 3, 7};
> +    const int HighMask[] = {0, 4, 2, 6};
>      Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
> -    const int LowMask[] = {0, 4, 2, 6};
> +    const int LowMask[] = {1, 5, 3, 7};
>      Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
>    }
>
> @@ -15200,7 +15230,9 @@ static SDValue LowerMUL_LOHI(SDValue Op,
>      Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
>    }
>
> -  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs,
> Lows);
> +  // The low part of a MUL_LOHI is supposed to be the first value and the
> +  // high part the second value.
> +  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Lows,
> Highs);
>  }
>
>  static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-idiv.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv.ll?rev=212808&r1=212807&r2=212808&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-idiv.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-idiv.ll Fri Jul 11 07:08:23 2014
> @@ -132,9 +132,6 @@ define <4 x i32> @test8(<4 x i32> %a) {
>  ; SSE41: padd
>
>  ; SSE-LABEL: test8:
> -; SSE: psrad $31
> -; SSE: pand
> -; SSE: paddd
>  ; SSE: pmuludq
>  ; SSE: pshufd  $49
>  ; SSE-NOT: pshufd      $49
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>

-- 
Alexey Samsonov
vonosmas at gmail.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140723/27e71a25/attachment.html>