[PATCH] [AArch64 NEON] Fix a bug caused by undef lane in generating VEXT.

Kevin Qin kevinqindev at gmail.com
Fri Jan 17 01:25:05 PST 2014


Hi #llvm,

Hi,

Previous code doesn't consider lane number would be -1, which means undef. And it would create a huge immediate (the complement of -1) for lane operand and cause pattern match fail. This patch can fix that problem. Please review.

http://llvm-reviews.chandlerc.com/D2567

Files:
  lib/Target/AArch64/AArch64ISelLowering.cpp
  test/CodeGen/AArch64/neon-extract.ll

Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4404,22 +4404,28 @@
   // it into NEON_VEXTRACT.
   if (V1EltNum == Length) {
     // Check if the shuffle mask is sequential.
-    bool IsSequential = true;
-    int CurMask = ShuffleMask[0];
-    for (int I = 0; I < Length; ++I) {
-      if (ShuffleMask[I] != CurMask) {
-        IsSequential = false;
-        break;
-      }
-      CurMask++;
+    int SkipUndef = 0;
+    while (ShuffleMask[SkipUndef] == -1) {
+      SkipUndef++;
     }
-    if (IsSequential) {
-      assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
-      unsigned VecSize = EltSize * V1EltNum;
-      unsigned Index = (EltSize/8) * ShuffleMask[0];
-      if (VecSize == 64 || VecSize == 128)
-        return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
-                           DAG.getConstant(Index, MVT::i64));
+    int CurMask = ShuffleMask[SkipUndef];
+    if (CurMask >= SkipUndef) {
+      bool IsSequential = true;
+      for (int I = SkipUndef; I < Length; ++I) {
+        if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) {
+          IsSequential = false;
+          break;
+        }
+        CurMask++;
+      }
+      if (IsSequential) {
+        assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
+        unsigned VecSize = EltSize * V1EltNum;
+        unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef);
+        if (VecSize == 64 || VecSize == 128)
+          return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
+                             DAG.getConstant(Index, MVT::i64));
+      }
     }
   }
 
Index: test/CodeGen/AArch64/neon-extract.ll
===================================================================
--- test/CodeGen/AArch64/neon-extract.ll
+++ test/CodeGen/AArch64/neon-extract.ll
@@ -188,3 +188,35 @@
   %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   ret <8 x i16> %vext
 }
+
+define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) {
+; CHECK: test_undef_vext_s8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 10, i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) {
+; CHECK: test_undef_vextq_s8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 20, i32 20, i32 20, i32 20, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 20, i32 20, i32 20, i32 20, i32 20>
+  ret <16 x i8> %vext
+}
+
+define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) {
+; CHECK: test_undef_vext_s16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 4>
+  ret <4 x i16> %vext
+}
+
+define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) {
+; CHECK: test_undef_vextq_s16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2567.1.patch
Type: text/x-patch
Size: 3459 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140117/d7fb68ee/attachment.bin>


More information about the llvm-commits mailing list