[llvm] 4585bff - [AArch64] Add new shuffles tests, and regenerate aarch64-wide-shuffle.ll and neon-wide-splat.ll. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 13 10:10:55 PDT 2022


Author: David Green
Date: 2022-04-13T18:10:49+01:00
New Revision: 4585bff408bca58a883b9593f578652cff0f43c9

URL: https://github.com/llvm/llvm-project/commit/4585bff408bca58a883b9593f578652cff0f43c9
DIFF: https://github.com/llvm/llvm-project/commit/4585bff408bca58a883b9593f578652cff0f43c9.diff

LOG: [AArch64] Add new shuffles tests, and regenerate aarch64-wide-shuffle.ll and neon-wide-splat.ll. NFC

Added: 
    llvm/test/CodeGen/AArch64/shuffles.ll

Modified: 
    llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
    llvm/test/CodeGen/AArch64/neon-wide-splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
index d06df7a87fd7b..64cd0504467c1 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -1,16 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
 define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+; CHECK-LABEL: f:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ext v0.8b, v0.8b, v1.8b, #2
+; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #4
+; CHECK-NEXT:    ret
 entry:
   ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
   ; but only match the last three instructions as the first two could be combined to
   ; a dup2 at some stage.
-  ; CHECK: dup
-  ; CHECK: ext
-  ; CHECK: ext
   %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
   %vgetq_lane = trunc i32 %x4 to i16
   %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
index 6290f85dc1cec..16386f5d3e06d 100644
--- a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
+++ b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
@@ -1,8 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <4 x i16> @shuffle1(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle1:
-; CHECK:         dup v0.2s, v0.s[0]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 1>
@@ -11,7 +14,9 @@ entry:
 
 define <4 x i16> @shuffle2(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle2:
-; CHECK:         dup v0.2s, v0.s[1]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 3>
@@ -20,7 +25,8 @@ entry:
 
 define <8 x i16> @shuffle3(<8 x i16> %v) {
 ; CHECK-LABEL: shuffle3:
-; CHECK:         dup v0.2d, v0.d[0]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3,
@@ -30,7 +36,8 @@ entry:
 
 define <4 x i32> @shuffle4(<4 x i32> %v) {
 ; CHECK-LABEL: shuffle4:
-; CHECK:         dup v0.2d, v0.d[0]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -39,7 +46,8 @@ entry:
 
 define <16 x i8> @shuffle5(<16 x i8> %v) {
 ; CHECK-LABEL: shuffle5:
-; CHECK:         dup v0.4s, v0.s[2]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.4s, v0.s[2]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <16 x i8> %v, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11,
@@ -51,7 +59,8 @@ entry:
 
 define <16 x i8> @shuffle6(<16 x i8> %v) {
 ; CHECK-LABEL: shuffle6:
-; CHECK:         dup v0.2d, v0.d[1]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    dup v0.2d, v0.d[1]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <16 x i8> %v, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11,
@@ -63,7 +72,9 @@ entry:
 
 define <8 x i8> @shuffle7(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle7:
-; CHECK:         dup v0.2s, v0.s[1]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef,
@@ -73,7 +84,9 @@ entry:
 
 define <8 x i8> @shuffle8(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle8:
-; CHECK:         dup v0.4h, v0.h[3]
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    dup v0.4h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> <i32 6, i32 7, i32 6, i32 undef,
@@ -84,7 +97,10 @@ entry:
 ; No blocks
 define <8 x i8> @shuffle_not1(<16 x i8> %v) {
 ; CHECK-LABEL: shuffle_not1:
-; CHECK:         ext v0.16b, v0.16b, v0.16b, #2
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
   %res = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %res
 }
@@ -92,9 +108,11 @@ define <8 x i8> @shuffle_not1(<16 x i8> %v) {
 ; Block is not a proper lane
 define <4 x i32> @shuffle_not2(<4 x i32> %v) {
 ; CHECK-LABEL: shuffle_not2:
-; CHECK-NOT:     dup
-; CHECK:         ext
-; CHECK:         ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 2>
   ret <4 x i32> %res
@@ -103,8 +121,8 @@ entry:
 ; Block size is equal to vector size
 define <4 x i16> @shuffle_not3(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle_not3:
-; CHECK-NOT:     dup
-; CHECK:         ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i16> %res
@@ -113,8 +131,13 @@ entry:
 ; Blocks mismatch
 define <8 x i8> @shuffle_not4(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle_not4:
-; CHECK-NOT:     dup
-; CHECK:         ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI11_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %res = shufflevector <8 x i8> %v, <8 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef,
                                                                i32 undef, i32 5, i32 5, i32 undef>

diff  --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
new file mode 100644
index 0000000000000..9365344da11c2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+
+define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: test_shuf1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v3.16b, v6.16b, v7.16b, #8
+; CHECK-NEXT:    ext v5.16b, v6.16b, v4.16b, #12
+; CHECK-NEXT:    uzp1 v6.4s, v1.4s, v0.4s
+; CHECK-NEXT:    uzp2 v4.4s, v2.4s, v4.4s
+; CHECK-NEXT:    ext v3.16b, v3.16b, v3.16b, #12
+; CHECK-NEXT:    ext v5.16b, v7.16b, v5.16b, #8
+; CHECK-NEXT:    trn2 v6.4s, v6.4s, v1.4s
+; CHECK-NEXT:    trn1 v2.4s, v4.4s, v2.4s
+; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #12
+; CHECK-NEXT:    ext v3.16b, v1.16b, v3.16b, #8
+; CHECK-NEXT:    rev64 v16.4s, v5.4s
+; CHECK-NEXT:    dup v7.4s, v7.s[0]
+; CHECK-NEXT:    ext v1.16b, v0.16b, v6.16b, #12
+; CHECK-NEXT:    mov v2.s[3], v7.s[3]
+; CHECK-NEXT:    ext v0.16b, v3.16b, v4.16b, #8
+; CHECK-NEXT:    ext v3.16b, v5.16b, v16.16b, #8
+; CHECK-NEXT:    ret
+  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30>
+  ret <16 x i32> %s3
+}
+
+define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: test_shuf2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v6.16b, v7.16b, #8
+; CHECK-NEXT:    ext v2.16b, v1.16b, v1.16b, #12
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #12
+; CHECK-NEXT:    ext v0.16b, v1.16b, v0.16b, #8
+; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
+; CHECK-NEXT:    ret
+  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4>
+  ret <4 x i32> %s3
+}
+
+define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: test_shuf3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 v2.4s, v1.4s, v0.4s
+; CHECK-NEXT:    trn2 v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2>
+  ret <4 x i32> %s3
+}
+
+define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: test_shuf4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp2 v0.4s, v2.4s, v4.4s
+; CHECK-NEXT:    dup v1.4s, v7.s[0]
+; CHECK-NEXT:    trn1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    mov v0.s[3], v1.s[3]
+; CHECK-NEXT:    ret
+  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28>
+  ret <4 x i32> %s3
+}
+
+define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: test_shuf5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v6.16b, v4.16b, #12
+; CHECK-NEXT:    ext v0.16b, v7.16b, v0.16b, #8
+; CHECK-NEXT:    rev64 v1.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ret
+  %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30>
+  ret <4 x i32> %s3
+}
+
+define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b)
+; CHECK-LABEL: test1503:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v1.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    trn1 v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v1.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+{
+  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b)
+; CHECK-LABEL: test4366:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip2 v2.4s, v1.4s, v0.4s
+; CHECK-NEXT:    uzp1 v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #4
+; CHECK-NEXT:    zip2 v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+{
+  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b)
+; CHECK-LABEL: test7367:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    zip2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #12
+; CHECK-NEXT:    ret
+{
+  %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7>
+  ret <4 x i32> %r
+}


        


More information about the llvm-commits mailing list