[PATCH] [AArch64 NEON] Fix a bug caused by undef lane in generating VEXT.

Jiangning Liu liujiangning1 at gmail.com
Sun Jan 19 18:40:56 PST 2014


Looks good to me!

Thanks,
-Jiangning


2014/1/17 Kevin Qin <kevinqindev at gmail.com>

> Hi #llvm,
>
> Hi,
>
> Previous code doesn't consider lane number would be -1, which means undef.
> And it would create a huge immediate (the complement of -1) for lane
> operand and cause pattern match fail. This patch can fix that problem.
> Please review.
>
> http://llvm-reviews.chandlerc.com/D2567
>
> Files:
>   lib/Target/AArch64/AArch64ISelLowering.cpp
>   test/CodeGen/AArch64/neon-extract.ll
>
> Index: lib/Target/AArch64/AArch64ISelLowering.cpp
> ===================================================================
> --- lib/Target/AArch64/AArch64ISelLowering.cpp
> +++ lib/Target/AArch64/AArch64ISelLowering.cpp
> @@ -4404,22 +4404,28 @@
>    // it into NEON_VEXTRACT.
>    if (V1EltNum == Length) {
>      // Check if the shuffle mask is sequential.
> -    bool IsSequential = true;
> -    int CurMask = ShuffleMask[0];
> -    for (int I = 0; I < Length; ++I) {
> -      if (ShuffleMask[I] != CurMask) {
> -        IsSequential = false;
> -        break;
> -      }
> -      CurMask++;
> +    int SkipUndef = 0;
> +    while (ShuffleMask[SkipUndef] == -1) {
> +      SkipUndef++;
>      }
> -    if (IsSequential) {
> -      assert((EltSize % 8 == 0) && "Bitsize of vector element is
> incorrect");
> -      unsigned VecSize = EltSize * V1EltNum;
> -      unsigned Index = (EltSize/8) * ShuffleMask[0];
> -      if (VecSize == 64 || VecSize == 128)
> -        return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
> -                           DAG.getConstant(Index, MVT::i64));
> +    int CurMask = ShuffleMask[SkipUndef];
> +    if (CurMask >= SkipUndef) {
> +      bool IsSequential = true;
> +      for (int I = SkipUndef; I < Length; ++I) {
> +        if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) {
> +          IsSequential = false;
> +          break;
> +        }
> +        CurMask++;
> +      }
> +      if (IsSequential) {
> +        assert((EltSize % 8 == 0) && "Bitsize of vector element is
> incorrect");
> +        unsigned VecSize = EltSize * V1EltNum;
> +        unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] -
> SkipUndef);
> +        if (VecSize == 64 || VecSize == 128)
> +          return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
> +                             DAG.getConstant(Index, MVT::i64));
> +      }
>      }
>    }
>
> Index: test/CodeGen/AArch64/neon-extract.ll
> ===================================================================
> --- test/CodeGen/AArch64/neon-extract.ll
> +++ test/CodeGen/AArch64/neon-extract.ll
> @@ -188,3 +188,35 @@
>    %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32
> 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
>    ret <8 x i16> %vext
>  }
> +
> +define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) {
> +; CHECK: test_undef_vext_s8:
> +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
> +entry:
> +  %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 10,
> i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
> +  ret <8 x i8> %vext
> +}
> +
> +define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) {
> +; CHECK: test_undef_vextq_s8:
> +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
> +entry:
> +  %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32
> 20, i32 20, i32 20, i32 20, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
> i32 16, i32 20, i32 20, i32 20, i32 20, i32 20>
> +  ret <16 x i8> %vext
> +}
> +
> +define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) {
> +; CHECK: test_undef_vext_s16:
> +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
> +entry:
> +  %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 4,
> i32 2, i32 3, i32 4>
> +  ret <4 x i16> %vext
> +}
> +
> +define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) {
> +; CHECK: test_undef_vextq_s16:
> +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
> +entry:
> +  %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 10,
> i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
> +  ret <8 x i16> %vext
> +}
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
>


-- 
Thanks,
-Jiangning
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140120/a55c5440/attachment.html>


More information about the llvm-commits mailing list