[llvm] e50060f - [AArch64][GlobalISel] Addition GISel test coverage for shuffles. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 09:20:14 PDT 2024


Author: David Green
Date: 2024-05-06T17:20:09+01:00
New Revision: e50060fc6fc9ca41fe4b266f3b68005247327247

URL: https://github.com/llvm/llvm-project/commit/e50060fc6fc9ca41fe4b266f3b68005247327247
DIFF: https://github.com/llvm/llvm-project/commit/e50060fc6fc9ca41fe4b266f3b68005247327247.diff

LOG: [AArch64][GlobalISel] Addition GISel test coverage for shuffles. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/arm64-uzp.ll
    llvm/test/CodeGen/AArch64/arm64-zip.ll
    llvm/test/CodeGen/AArch64/neon-perm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index 49a51d96fbc841..10624f3879890d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i8> @vuzpi8(<8 x i8> %A, <8 x i8> %B) nounwind {
 ; CHECK-LABEL: vuzpi8:
@@ -108,12 +109,23 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
 }
 
 define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vuzpQi16_undef0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uzp1.8h v2, v0, v1
-; CHECK-NEXT:    uzp2.8h v0, v0, v1
-; CHECK-NEXT:    add.8h v0, v2, v0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vuzpQi16_undef0:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-SD-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-SD-NEXT:    add.8h v0, v2, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vuzpQi16_undef0:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    uzp2.8h v3, v0, v1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    add.8h v0, v0, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -121,12 +133,23 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
 }
 
 define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vuzpQi16_undef01:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uzp1.8h v2, v0, v1
-; CHECK-NEXT:    uzp2.8h v0, v0, v1
-; CHECK-NEXT:    add.8h v0, v2, v0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vuzpQi16_undef01:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-SD-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-SD-NEXT:    add.8h v0, v2, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vuzpQi16_undef01:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    uzp2.8h v3, v0, v1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    add.8h v0, v0, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -134,12 +157,23 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
 }
 
 define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vuzpQi16_undef012:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uzp1.8h v2, v0, v1
-; CHECK-NEXT:    uzp2.8h v0, v0, v1
-; CHECK-NEXT:    add.8h v0, v2, v0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vuzpQi16_undef012:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uzp1.8h v2, v0, v1
+; CHECK-SD-NEXT:    uzp2.8h v0, v0, v1
+; CHECK-SD-NEXT:    add.8h v0, v2, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vuzpQi16_undef012:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    uzp2.8h v3, v0, v1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    add.8h v0, v0, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
   %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>
   %tmp5 = add <8 x i16> %tmp3, %tmp4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 4c771cbd2966cc..b7dedb20999299 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -1,5 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:       warning: Instruction selection used fallback path for shuffle_zip1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shuffle_zip2
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shuffle_zip3
 
 define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vzipi8:
@@ -140,19 +145,37 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
 }
 
 define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vzip1_undef_01:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1.8h v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vzip1_undef_01:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vzip1_undef_01:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
 }
 
 define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vzip1_undef_0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1.8h v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vzip1_undef_0:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vzip1_undef_0:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
 }
@@ -167,10 +190,19 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
 }
 
 define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK-LABEL: vzip1_undef_012:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1.8h v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vzip1_undef_012:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vzip1_undef_012:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ret
   %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %s
 }
@@ -223,12 +255,20 @@ define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
 }
 
 define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
-; CHECK-LABEL: combine2_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    zip1.16b v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine2_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine2_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    zip1.8b v2, v0, v1
+; CHECK-GI-NEXT:    zip2.8b v0, v0, v1
+; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
+; CHECK-GI-NEXT:    mov.16b v0, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -247,12 +287,20 @@ define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
 }
 
 define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
-; CHECK-LABEL: combine2_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    zip1.8h v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine2_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine2_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    zip1.4h v2, v0, v1
+; CHECK-GI-NEXT:    zip2.4h v0, v0, v1
+; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
+; CHECK-GI-NEXT:    mov.16b v0, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -271,12 +319,20 @@ define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
 }
 
 define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
-; CHECK-LABEL: combine2_v4i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    zip1.4s v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine2_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    zip1.4s v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine2_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    zip1.2s v2, v0, v1
+; CHECK-GI-NEXT:    zip2.2s v0, v0, v1
+; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
+; CHECK-GI-NEXT:    mov.16b v0, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
   %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
   %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -295,12 +351,20 @@ define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 }
 
 define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
-; CHECK-LABEL: combine2_v16i8_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    zip1.16b v0, v0, v1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine2_v16i8_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine2_v16i8_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    zip1.8b v2, v0, v1
+; CHECK-GI-NEXT:    zip2.8b v0, v0, v1
+; CHECK-GI-NEXT:    mov.d v2[1], v0[0]
+; CHECK-GI-NEXT:    mov.16b v0, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -320,14 +384,23 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
 
 ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
 define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
-; CHECK-LABEL: combine_v8i16_8first:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
-; CHECK-NEXT:    adrp x8, .LCPI25_0
-; CHECK-NEXT:    fmov d2, d0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI25_0]
-; CHECK-NEXT:    tbl.16b v0, { v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine_v8i16_8first:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
+; CHECK-SD-NEXT:    adrp x8, .LCPI25_0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI25_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine_v8i16_8first:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
+; CHECK-GI-NEXT:    fmov d31, d1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
   ret <16 x i8> %3
 }
@@ -335,14 +408,23 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
 
 ; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
 define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
-; CHECK-LABEL: combine_v8i16_8firstundef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
-; CHECK-NEXT:    adrp x8, .LCPI26_0
-; CHECK-NEXT:    fmov d2, d0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI26_0]
-; CHECK-NEXT:    tbl.16b v0, { v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: combine_v8i16_8firstundef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
+; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: combine_v8i16_8firstundef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
+; CHECK-GI-NEXT:    fmov d31, d1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
+; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
   ret <16 x i8> %3
 }

diff  --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll
index 26ffa2727a1cd1..037451eb803c3f 100644
--- a/llvm/test/CodeGen/AArch64/neon-perm.ll
+++ b/llvm/test/CodeGen/AArch64/neon-perm.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
@@ -1731,10 +1732,23 @@ entry:
 }
 
 define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
-; CHECK-LABEL: test_vzip1_v4i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_vzip1_v4i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    zip1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_vzip1_v4i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    mov b1, v0.b[1]
+; CHECK-GI-NEXT:    mov b2, v0.b[2]
+; CHECK-GI-NEXT:    mov b3, v0.b[3]
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT:    mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
  %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i8> %lo
 }
@@ -2201,10 +2215,15 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_s8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_s8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_s8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   ret <16 x i8> %shuffle.i
@@ -2221,20 +2240,30 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vuzp1q_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_s32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_s32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_s32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   ret <4 x i32> %shuffle.i
@@ -2251,10 +2280,15 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_u8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_u8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_u8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   ret <16 x i8> %shuffle.i
@@ -2271,20 +2305,30 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_u16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_u16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_u16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vuzp1q_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_u32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_u32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_u32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   ret <4 x i32> %shuffle.i
@@ -2311,10 +2355,15 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_p8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.8b, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_p8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_p8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   ret <16 x i8> %shuffle.i
@@ -2331,10 +2380,15 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vuzp1q_p16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vuzp1q_p16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vuzp1q_p16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   ret <8 x i16> %shuffle.i
@@ -2791,285 +2845,435 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn1_s8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1_s8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_s8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_s8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn1q_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_s8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_s8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_s8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn1_s16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn1q_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vtrn1q_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_s32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_s32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_s32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i32> %shuffle.i
 }
 
 define <8 x i8> @test_undef_vtrn1_u8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1_u8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_u8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_u8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn1q_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_u8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_u8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_u8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn1_u16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1_u16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_u16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_u16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn1q_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_u16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_u16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_u16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vtrn1q_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_u32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_u32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_u32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i32> %shuffle.i
 }
 
 define <4 x float> @test_undef_vtrn1q_f32(<4 x float> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x float> %shuffle.i
 }
 
 define <8 x i8> @test_undef_vtrn1_p8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1_p8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_p8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_p8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn1q_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_p8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_p8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_p8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn1_p16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1_p16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1_p16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1_p16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn1q_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn1q_p16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn1q_p16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn1q_p16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i16> %shuffle.i
 }
 
 define <8 x i8> @test_undef_vtrn2_s8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2_s8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_s8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_s8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn2q_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_s8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_s8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_s8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn2_s16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.4h, v0.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.4h, v0.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn2q_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vtrn2q_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_s32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_s32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_s32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i32> %shuffle.i
 }
 
 define <8 x i8> @test_undef_vtrn2_u8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2_u8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_u8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_u8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn2q_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_u8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_u8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_u8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn2_u16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2_u16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.4h, v0.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_u16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.4h, v0.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_u16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn2q_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_u16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_u16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_u16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i16> %shuffle.i
 }
 
 define <4 x i32> @test_undef_vtrn2q_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_u32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_u32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_u32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i32> %shuffle.i
 }
 
 define <4 x float> @test_undef_vtrn2q_f32(<4 x float> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev64 v0.4s, v0.4s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x float> %shuffle.i
 }
 
 define <8 x i8> @test_undef_vtrn2_p8(<8 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2_p8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_p8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_p8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i8> %shuffle.i
 }
 
 define <16 x i8> @test_undef_vtrn2q_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_p8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev16 v0.16b, v0.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_p8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev16 v0.16b, v0.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_p8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   ret <16 x i8> %shuffle.i
 }
 
 define <4 x i16> @test_undef_vtrn2_p16(<4 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2_p16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.4h, v0.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2_p16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.4h, v0.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2_p16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i16> %shuffle.i
 }
 
 define <8 x i16> @test_undef_vtrn2q_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_undef_vtrn2q_p16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev32 v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_undef_vtrn2q_p16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    rev32 v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_undef_vtrn2q_p16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    trn2 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i16> %shuffle.i
@@ -3886,13 +4090,21 @@ entry:
 }
 
 define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
-; CHECK-LABEL: test_uzp:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    uzp1 v2.8b, v0.8b, v1.8b
-; CHECK-NEXT:    uzp2 v1.8b, v0.8b, v1.8b
-; CHECK-NEXT:    fmov d0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_uzp:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    uzp1 v2.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    uzp2 v1.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    fmov d0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_uzp:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uzp1 v2.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    uzp2 v1.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    fmov d0, d2
+; CHECK-GI-NEXT:    ret
 
   %vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>


        


More information about the llvm-commits mailing list