[llvm] r234809 - [CodeGen] Combine concat_vectors of scalars into build_vector.

Wed Apr 15 17:58:37 PDT 2015

On 13 April 2015 at 15:57, Ahmed Bougacha <ahmed.bougacha at gmail.com> wrote:

> Author: ab
> Date: Mon Apr 13 17:57:21 2015
> New Revision: 234809
>
> URL: http://llvm.org/viewvc/llvm-project?rev=234809&view=rev
> Log:
> [CodeGen] Combine concat_vectors of scalars into build_vector.
>
> Combine something like:
>   (v8i8 concat_vectors (v2i8 bitcast (i16)) x4)
> into:
>   (v8i8 (bitcast (v4i16 BUILD_VECTOR (i16) x4)))
>
> If any of the scalars are floating point, use that throughout.
>

Hi Ahmed! I think this patch caused PR23246 and I'm going to revert it for
now. I'm really not familiar with x86, vectors, or the backend, but guess
is that this is because x86mmx is a strange type with unusual restrictions
on it. Please reapply when the testcase is fixed.

Differential Revision: http://reviews.llvm.org/D8948
>
> Added:
>     llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
> Modified:
>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234809&r1=234808&r2=234809&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 13
> 17:57:21 2015
> @@ -11499,6 +11499,62 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
>    return SDValue();
>  }
>
> +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
> {
> +  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
> +  EVT OpVT = N->getOperand(0).getValueType();
> +
> +  // If the operands are legal vectors, leave them alone.
> +  if (TLI.isTypeLegal(OpVT))
> +    return SDValue();
> +
> +  SDLoc DL(N);
> +  EVT VT = N->getValueType(0);
> +  SmallVector<SDValue, 8> Ops;
> +
> +  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
> +  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
> +
> +  // Keep track of what we encounter.
> +  bool AnyInteger = false;
> +  bool AnyFP = false;
> +  for (const SDValue &Op : N->ops()) {
> +    if (ISD::BITCAST == Op.getOpcode() &&
> +        !Op.getOperand(0).getValueType().isVector())
> +      Ops.push_back(Op.getOperand(0));
> +    else if (ISD::UNDEF == Op.getOpcode())
> +      Ops.push_back(ScalarUndef);
> +    else
> +      return SDValue();
> +
> +    if (Ops.back().getValueType().isFloatingPoint())
> +      AnyFP = true;
> +    else
> +      AnyInteger = true;
> +  }
> +
> +  // If any of the operands is a floating point scalar bitcast to a
> vector,
> +  // use floating point types throughout, and bitcast everything.
> +  // Replace UNDEFs by another scalar UNDEF node, of the final desired
> type.
> +  if (AnyFP) {
> +    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
> +    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
> +    if (AnyInteger) {
> +      for (SDValue &Op : Ops) {
> +        if (Op.getValueType() != SVT) {
> +          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
> +          if (Op.getOpcode() == ISD::UNDEF)
> +            Op = ScalarUndef;
> +        }
> +      }
> +    }
> +  }
> +
> +  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
> +                               VT.getSizeInBits() / SVT.getSizeInBits());
> +  return DAG.getNode(ISD::BITCAST, DL, VT,
> +                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
> +}
> +
>  SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
>    // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
>    // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR
> vector
> @@ -11601,6 +11657,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS
>      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
>    }
>
> +  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to
> BUILD_VECTOR.
> +  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
> +    return V;
> +
>    // Type legalization of vectors and DAG canonicalization of
> SHUFFLE_VECTOR
>    // nodes often generate nop CONCAT_VECTOR nodes.
>    // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
>
> Added: llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll?rev=234809&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll Mon
> Apr 13 17:57:21 2015
> @@ -0,0 +1,125 @@
> +; RUN: llc < %s -mtriple aarch64-unknown-unknown
> -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
> +
> +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
> +
> +; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
> +
> +define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
> +; CHECK-NEXT: dup.4h v0, w0
> +; CHECK-NEXT: ret
> +  %t = trunc i32 %x to i16
> +  %0 = bitcast i16 %t to <2 x i8>
> +  %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32
> 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
> +  ret <8 x i8> %1
> +}
> +
> +define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
> +; CHECK-NEXT: dup.2s v0, w0
> +; CHECK-NEXT: ret
> +  %0 = bitcast i32 %x to <4 x i8>
> +  %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
> +  ret <8 x i8> %1
> +}
> +
> +define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
> +; CHECK-NEXT: dup.4s v0, w0
> +; CHECK-NEXT: ret
> +  %0 = bitcast i32 %x to <2 x i16>
> +  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32
> 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
> +  ret <8 x i16> %1
> +}
> +
> +define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
> +; CHECK-NEXT: ins.h v0[0], w0
> +; CHECK-NEXT: ins.h v0[1], w1
> +; CHECK-NEXT: ins.h v0[3], w1
> +; CHECK-NEXT: ret
> +  %tx = trunc i32 %x to i16
> +  %ty = trunc i32 %y to i16
> +  %bx = bitcast i16 %tx to <2 x i8>
> +  %by = bitcast i16 %ty to <2 x i8>
> +  %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1,
> i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
> +  ret <8 x i8> %r
> +}
> +
> +define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y)
> #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
> +; CHECK-NEXT: fmov s0, w1
> +; CHECK-NEXT: ins.s v0[1], w0
> +; CHECK-NEXT: ret
> +  %bx = bitcast i32 %x to <4 x i8>
> +  %by = bitcast i32 %y to <4 x i8>
> +  %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5,
> i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
> +  ret <8 x i8> %r
> +}
> +
> +define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32
> %y) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
> +; CHECK-NEXT: fmov s0, w0
> +; CHECK-NEXT: ins.s v0[1], w1
> +; CHECK-NEXT: ins.s v0[2], w1
> +; CHECK-NEXT: ins.s v0[3], w0
> +; CHECK-NEXT: ret
> +  %bx = bitcast i32 %x to <2 x i16>
> +  %by = bitcast i32 %y to <2 x i16>
> +  %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32
> 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
> +  ret <8 x i16> %r
> +}
> +
> +; Also make sure we minimize bitcasts.
> +
> +; This is a pretty artificial testcase: make sure we bitcast to
> floating-point
> +; if any of the scalars is floating-point.
> +define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy,
> i32 %x, half %y) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
> +; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
> +; CHECK-NEXT: ins.h v0[0], v[[X]][0]
> +; CHECK-NEXT: ins.h v0[1], v1[0]
> +; CHECK-NEXT: ins.h v0[2], v[[X]][0]
> +; CHECK-NEXT: ins.h v0[3], v1[0]
> +; CHECK-NEXT: ret
> +  %t = trunc i32 %x to i16
> +  %0 = bitcast i16 %t to <2 x i8>
> +  %y0 = bitcast half %y to <2 x i8>
> +  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1,
> i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
> +  ret <8 x i8> %1
> +}
> +
> +define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy,
> half %x, half %y) #0 {
> +entry:
> +; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
> +; CHECK-NEXT: ins.h v0[0], v1[0]
> +; CHECK-NEXT: ins.h v0[1], v2[0]
> +; CHECK-NEXT: ins.h v0[2], v1[0]
> +; CHECK-NEXT: ins.h v0[3], v2[0]
> +; CHECK-NEXT: ret
> +  %0 = bitcast half %x to <2 x i8>
> +  %y0 = bitcast half %y to <2 x i8>
> +  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1,
> i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
> +  %2 = bitcast <8 x i8> %1 to <2 x float>
> +  ret <2 x float> %2
> +}
> +
> +define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0
> {
> +entry:
> +; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
> +; CHECK-NEXT: dup.4s v0, v0[0]
> +; CHECK-NEXT: ret
> +  %0 = bitcast float %x to <2 x i16>
> +  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32
> 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
> +  %2 = bitcast <8 x i16> %1 to <4 x float>
> +  ret <4 x float> %2
> +}
> +
> +attributes #0 = { nounwind }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150415/33f4fd92/attachment.html>