[llvm] r234809 - [CodeGen] Combine concat_vectors of scalars into build_vector.
Ahmed Bougacha
ahmed.bougacha at gmail.com
Wed Apr 15 19:43:15 PDT 2015
r235072 should fix it. Sorry about the breakage!
-Ahmed
On Wed, Apr 15, 2015 at 5:58 PM, Nick Lewycky <nlewycky at google.com> wrote:
> On 13 April 2015 at 15:57, Ahmed Bougacha <ahmed.bougacha at gmail.com> wrote:
>>
>> Author: ab
>> Date: Mon Apr 13 17:57:21 2015
>> New Revision: 234809
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=234809&view=rev
>> Log:
>> [CodeGen] Combine concat_vectors of scalars into build_vector.
>>
>> Combine something like:
>> (v8i8 concat_vectors (v2i8 bitcast (i16)) x4)
>> into:
>> (v8i8 (bitcast (v4i16 BUILD_VECTOR (i16) x4)))
>>
>> If any of the scalars are floating point, use that throughout.
>
>
> Hi Ahmed! I think this patch caused PR23246 and I'm going to revert it for
> now. I'm really not familiar with x86, vectors, or the backend, but guess is
> that this is because x86mmx is a strange type with unusual restrictions on
> it. Please reapply when the testcase is fixed.
>
>
>> Differential Revision: http://reviews.llvm.org/D8948
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
>> Modified:
>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=234809&r1=234808&r2=234809&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 13
>> 17:57:21 2015
>> @@ -11499,6 +11499,62 @@ SDValue DAGCombiner::visitBUILD_VECTOR(S
>> return SDValue();
>> }
>>
>> +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
>> {
>> + const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>> + EVT OpVT = N->getOperand(0).getValueType();
>> +
>> + // If the operands are legal vectors, leave them alone.
>> + if (TLI.isTypeLegal(OpVT))
>> + return SDValue();
>> +
>> + SDLoc DL(N);
>> + EVT VT = N->getValueType(0);
>> + SmallVector<SDValue, 8> Ops;
>> +
>> + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
>> + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
>> +
>> + // Keep track of what we encounter.
>> + bool AnyInteger = false;
>> + bool AnyFP = false;
>> + for (const SDValue &Op : N->ops()) {
>> + if (ISD::BITCAST == Op.getOpcode() &&
>> + !Op.getOperand(0).getValueType().isVector())
>> + Ops.push_back(Op.getOperand(0));
>> + else if (ISD::UNDEF == Op.getOpcode())
>> + Ops.push_back(ScalarUndef);
>> + else
>> + return SDValue();
>> +
>> + if (Ops.back().getValueType().isFloatingPoint())
>> + AnyFP = true;
>> + else
>> + AnyInteger = true;
>> + }
>> +
>> + // If any of the operands is a floating point scalar bitcast to a
>> vector,
>> + // use floating point types throughout, and bitcast everything.
>> + // Replace UNDEFs by another scalar UNDEF node, of the final desired
>> type.
>> + if (AnyFP) {
>> + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
>> + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
>> + if (AnyInteger) {
>> + for (SDValue &Op : Ops) {
>> + if (Op.getValueType() != SVT) {
>> + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
>> + if (Op.getOpcode() == ISD::UNDEF)
>> + Op = ScalarUndef;
>> + }
>> + }
>> + }
>> + }
>> +
>> + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
>> + VT.getSizeInBits() / SVT.getSizeInBits());
>> + return DAG.getNode(ISD::BITCAST, DL, VT,
>> + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
>> +}
>> +
>> SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
>> // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
>> // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR
>> vector
>> @@ -11601,6 +11657,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS
>> return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
>> }
>>
>> + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to
>> BUILD_VECTOR.
>> + if (SDValue V = combineConcatVectorOfScalars(N, DAG))
>> + return V;
>> +
>> // Type legalization of vectors and DAG canonicalization of
>> SHUFFLE_VECTOR
>> // nodes often generate nop CONCAT_VECTOR nodes.
>> // Scan the CONCAT_VECTOR operands and look for a CONCAT operations
>> that
>>
>> Added: llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll?rev=234809&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
>> (added)
>> +++ llvm/trunk/test/CodeGen/AArch64/concat_vector-scalar-combine.ll Mon
>> Apr 13 17:57:21 2015
>> @@ -0,0 +1,125 @@
>> +; RUN: llc < %s -mtriple aarch64-unknown-unknown
>> -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
>> +
>> +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
>> +
>> +; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
>> +
>> +define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
>> +; CHECK-NEXT: dup.4h v0, w0
>> +; CHECK-NEXT: ret
>> + %t = trunc i32 %x to i16
>> + %0 = bitcast i16 %t to <2 x i8>
>> + %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32
>> 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
>> + ret <8 x i8> %1
>> +}
>> +
>> +define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
>> +; CHECK-NEXT: dup.2s v0, w0
>> +; CHECK-NEXT: ret
>> + %0 = bitcast i32 %x to <4 x i8>
>> + %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32
>> 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
>> + ret <8 x i8> %1
>> +}
>> +
>> +define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
>> +; CHECK-NEXT: dup.4s v0, w0
>> +; CHECK-NEXT: ret
>> + %0 = bitcast i32 %x to <2 x i16>
>> + %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32
>> 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
>> + ret <8 x i16> %1
>> +}
>> +
>> +define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
>> +; CHECK-NEXT: ins.h v0[0], w0
>> +; CHECK-NEXT: ins.h v0[1], w1
>> +; CHECK-NEXT: ins.h v0[3], w1
>> +; CHECK-NEXT: ret
>> + %tx = trunc i32 %x to i16
>> + %ty = trunc i32 %y to i16
>> + %bx = bitcast i16 %tx to <2 x i8>
>> + %by = bitcast i16 %ty to <2 x i8>
>> + %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1,
>> i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
>> + ret <8 x i8> %r
>> +}
>> +
>> +define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y)
>> #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
>> +; CHECK-NEXT: fmov s0, w1
>> +; CHECK-NEXT: ins.s v0[1], w0
>> +; CHECK-NEXT: ret
>> + %bx = bitcast i32 %x to <4 x i8>
>> + %by = bitcast i32 %y to <4 x i8>
>> + %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5,
>> i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
>> + ret <8 x i8> %r
>> +}
>> +
>> +define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32
>> %y) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
>> +; CHECK-NEXT: fmov s0, w0
>> +; CHECK-NEXT: ins.s v0[1], w1
>> +; CHECK-NEXT: ins.s v0[2], w1
>> +; CHECK-NEXT: ins.s v0[3], w0
>> +; CHECK-NEXT: ret
>> + %bx = bitcast i32 %x to <2 x i16>
>> + %by = bitcast i32 %y to <2 x i16>
>> + %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32
>> 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
>> + ret <8 x i16> %r
>> +}
>> +
>> +; Also make sure we minimize bitcasts.
>> +
>> +; This is a pretty artificial testcase: make sure we bitcast to
>> floating-point
>> +; if any of the scalars is floating-point.
>> +define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy,
>> i32 %x, half %y) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
>> +; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
>> +; CHECK-NEXT: ins.h v0[0], v[[X]][0]
>> +; CHECK-NEXT: ins.h v0[1], v1[0]
>> +; CHECK-NEXT: ins.h v0[2], v[[X]][0]
>> +; CHECK-NEXT: ins.h v0[3], v1[0]
>> +; CHECK-NEXT: ret
>> + %t = trunc i32 %x to i16
>> + %0 = bitcast i16 %t to <2 x i8>
>> + %y0 = bitcast half %y to <2 x i8>
>> + %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1,
>> i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
>> + ret <8 x i8> %1
>> +}
>> +
>> +define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy,
>> half %x, half %y) #0 {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
>> +; CHECK-NEXT: ins.h v0[0], v1[0]
>> +; CHECK-NEXT: ins.h v0[1], v2[0]
>> +; CHECK-NEXT: ins.h v0[2], v1[0]
>> +; CHECK-NEXT: ins.h v0[3], v2[0]
>> +; CHECK-NEXT: ret
>> + %0 = bitcast half %x to <2 x i8>
>> + %y0 = bitcast half %y to <2 x i8>
>> + %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1,
>> i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
>> + %2 = bitcast <8 x i8> %1 to <2 x float>
>> + ret <2 x float> %2
>> +}
>> +
>> +define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0
>> {
>> +entry:
>> +; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
>> +; CHECK-NEXT: dup.4s v0, v0[0]
>> +; CHECK-NEXT: ret
>> + %0 = bitcast float %x to <2 x i16>
>> + %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32
>> 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
>> + %2 = bitcast <8 x i16> %1 to <4 x float>
>> + ret <4 x float> %2
>> +}
>> +
>> +attributes #0 = { nounwind }
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>
More information about the llvm-commits
mailing list