[llvm] r211144 - [AArch64] Fix a pattern match failure caused by creating improper CONCAT_VECTOR.

Thu Jul 3 18:15:04 PDT 2014

Hi Kevin,
For the following IR:

define <4 x i8> @test(<8 x i8> %p) {
 %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1,
i32 2, i32 3>
 ret <4 x i8> %lo
}

Currently, ToT is generating the following instructions, which appear to
be incorrect:

	mov		v1.16b, v0.16b
	dup	v0.4h, v1.h[0]
	ret

I believe this regression was caused by this commit.  Previously we were
generating:

	mov		v1.16b, v0.16b
	umov	w8, v1.b[0]
	mov		v2.16b, v0.16b
	ins	v2.h[0], w8
	mov		v0.16b, v2.16b
	mov		v2.16b, v0.16b
	umov	w8, v1.b[1]
	ins	v2.h[1], w8
	mov		v0.16b, v2.16b
	mov		v2.16b, v0.16b
	umov	w8, v1.b[2]
	ins	v2.h[2], w8
	mov		v0.16b, v2.16b
	mov		v2.16b, v0.16b
	umov	w8, v1.b[3]
	ins	v2.h[3], w8
	mov		v0.16b, v2.16b
	ret

Would you mind taking a look?

 Regards,
  Chad

> Author: kevinqin
> Date: Wed Jun 18 00:54:42 2014
> New Revision: 211144
>
> URL: http://llvm.org/viewvc/llvm-project?rev=211144&view=rev
> Log:
> [AArch64] Fix a pattern match failure caused by creating improper
> CONCAT_VECTOR.
>
> ReconstructShuffle() may wrongly creat a CONCAT_VECTOR trying to
> concat 2 of v2i32 into v4i16. This commit is to fix this issue and
> try to generate UZP1 instead of lots of MOV and INS.
> Patch is initalized by Kevin Qin, and refactored by Tim Northover.
>
> Added:
>     llvm/trunk/test/CodeGen/AArch64/arm64-convert-v4f64.ll
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=211144&r1=211143&r2=211144&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Jun 18
> 00:54:42 2014
> @@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128
>  // shuffle in combination with VEXTs.
>  SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
>                                                    SelectionDAG &DAG)
> const {
> +  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
>    SDLoc dl(Op);
>    EVT VT = Op.getValueType();
>    unsigned NumElts = VT.getVectorNumElements();
> @@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::Reconstru
>
>    SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
>    int VEXTOffsets[2] = { 0, 0 };
> +  int OffsetMultipliers[2] = { 1, 1 };
>
>    // This loop extracts the usage patterns of the source vectors
>    // and prepares appropriate SDValues for a shuffle if possible.
>    for (unsigned i = 0; i < SourceVecs.size(); ++i) {
> -    if (SourceVecs[i].getValueType() == VT) {
> +    unsigned NumSrcElts =
> SourceVecs[i].getValueType().getVectorNumElements();
> +    SDValue CurSource = SourceVecs[i];
> +    if (SourceVecs[i].getValueType().getVectorElementType() !=
> +        VT.getVectorElementType()) {
> +      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
> +      // Then bitcast it to the vector which holds asserted element type,
> +      // and record the multiplier of element width between SourceVecs
> and
> +      // Build_vector which is needed to extract the correct lanes later.
> +      EVT CastVT =
> +          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
> +                           SourceVecs[i].getValueSizeInBits() /
> +
> VT.getVectorElementType().getSizeInBits());
> +
> +      CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
> +      OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
> +      NumSrcElts *= OffsetMultipliers[i];
> +      MaxElts[i] *= OffsetMultipliers[i];
> +      MinElts[i] *= OffsetMultipliers[i];
> +    }
> +
> +    if (CurSource.getValueType() == VT) {
>        // No VEXT necessary
> -      ShuffleSrcs[i] = SourceVecs[i];
> +      ShuffleSrcs[i] = CurSource;
>        VEXTOffsets[i] = 0;
>        continue;
> -    } else if (SourceVecs[i].getValueType().getVectorNumElements() <
> NumElts) {
> +    } else if (NumSrcElts < NumElts) {
>        // We can pad out the smaller vector for free, so if it's part of a
>        // shuffle...
> -      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
> SourceVecs[i],
> -
> DAG.getUNDEF(SourceVecs[i].getValueType()));
> +      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
> CurSource,
> +
> DAG.getUNDEF(CurSource.getValueType()));
>        continue;
>      }
>
> -    // Don't attempt to extract subvectors from BUILD_VECTOR sources
> -    // that expand or trunc the original value.
> -    // TODO: We can try to bitcast and ANY_EXTEND the result but
> -    // we need to consider the cost of vector ANY_EXTEND, and the
> -    // legality of all the types.
> -    if (SourceVecs[i].getValueType().getVectorElementType() !=
> -        VT.getVectorElementType())
> -      return SDValue();
> -
>      // Since only 64-bit and 128-bit vectors are legal on ARM and
>      // we've eliminated the other cases...
> -    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 *
> NumElts &&
> +    assert(NumSrcElts == 2 * NumElts &&
>             "unexpected vector sizes in ReconstructShuffle");
>
>      if (MaxElts[i] - MinElts[i] >= NumElts) {
> @@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::Reconstru
>      if (MinElts[i] >= NumElts) {
>        // The extraction can just take the second half
>        VEXTOffsets[i] = NumElts;
> -      ShuffleSrcs[i] =
> -          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
> -                      DAG.getIntPtrConstant(NumElts));
> +      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> CurSource,
> +                                   DAG.getIntPtrConstant(NumElts));
>      } else if (MaxElts[i] < NumElts) {
>        // The extraction can just take the first half
>        VEXTOffsets[i] = 0;
> -      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                   SourceVecs[i],
> DAG.getIntPtrConstant(0));
> +      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> CurSource,
> +                                   DAG.getIntPtrConstant(0));
>      } else {
>        // An actual VEXT is needed
>        VEXTOffsets[i] = MinElts[i];
> -      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> -                                     SourceVecs[i],
> DAG.getIntPtrConstant(0));
> -      SDValue VEXTSrc2 =
> -          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
> -                      DAG.getIntPtrConstant(NumElts));
> +      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> CurSource,
> +                                     DAG.getIntPtrConstant(0));
> +      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
> CurSource,
> +                                     DAG.getIntPtrConstant(NumElts));
>        unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
>        ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1,
> VEXTSrc2,
>                                     DAG.getConstant(Imm, MVT::i32));
> @@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::Reconstru
>      int ExtractElt =
>          cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
>      if (ExtractVec == SourceVecs[0]) {
> -      Mask.push_back(ExtractElt - VEXTOffsets[0]);
> +      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
>      } else {
> -      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
> +      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
> +                     VEXTOffsets[1]);
>      }
>    }
>
>
> Added: llvm/trunk/test/CodeGen/AArch64/arm64-convert-v4f64.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-convert-v4f64.ll?rev=211144&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-convert-v4f64.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-convert-v4f64.ll Wed Jun 18
> 00:54:42 2014
> @@ -0,0 +1,33 @@
> +; RUN: llc < %s -march=arm64 | FileCheck %s
> +
> +
> +define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
> +; CHECK: fptosi_v4f64_to_v4i16
> +; CHECK-DAG: fcvtzs  v[[LHS:[0-9]+]].2d, v1.2d
> +; CHECK-DAG: fcvtzs  v[[RHS:[0-9]+]].2d, v0.2d
> +; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
> +; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
> +; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
> +  %tmp1 = load <4 x double>* %ptr
> +  %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
> +  ret <4 x i16> %tmp2
> +}
> +
> +define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
> +; CHECK: fptosi_v4f64_to_v4i8
> +; CHECK-DAG:  fcvtzs  v[[CONV3:[0-9]+]].2d, v3.2d
> +; CHECK-DAG:  fcvtzs  v[[CONV2:[0-9]+]].2d, v2.2d
> +; CHECK-DAG:  fcvtzs  v[[CONV1:[0-9]+]].2d, v1.2d
> +; CHECK-DAG:  fcvtzs  v[[CONV0:[0-9]+]].2d, v0.2d
> +; CHECK-DAG:  xtn  v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
> +; CHECK-DAG:  xtn  v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
> +; CHECK-DAG:  xtn  v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
> +; CHECK-DAG:  xtn  v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
> +; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
> +; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
> +; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
> +  %tmp1 = load <8 x double>* %ptr
> +  %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
> +  ret <8 x i8> %tmp2
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation