[llvm] c5fcc2e - [AArch64] Add addp from shuffles tests. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 06:42:28 PDT 2024
Author: David Green
Date: 2024-06-07T14:42:22+01:00
New Revision: c5fcc2ea55372060760b0ba46d36d03ed39825d5
URL: https://github.com/llvm/llvm-project/commit/c5fcc2ea55372060760b0ba46d36d03ed39825d5
DIFF: https://github.com/llvm/llvm-project/commit/c5fcc2ea55372060760b0ba46d36d03ed39825d5.diff
LOG: [AArch64] Add addp from shuffles tests. NFC
Added:
llvm/test/CodeGen/AArch64/addp-shuffle.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
new file mode 100644
index 0000000000000..c15a84c7b3a2a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+define <4 x i32> @deinterleave_shuffle_v8i32(<8 x i32> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i32> %r0, %r1
+ ret <4 x i32> %o
+}
+
+define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8i32_c:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i32> %r1, %r0
+ ret <4 x i32> %o
+}
+
+define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
+; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: add v0.2s, v2.2s, v0.2s
+; CHECK-NEXT: ret
+ %r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+ %r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ %o = add <2 x i32> %r0, %r1
+ ret <2 x i32> %o
+}
+
+define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.8h, v0.8h, v1.8h
+; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: add v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: ret
+ %r0 = shufflevector <16 x i16> %a, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %r1 = shufflevector <16 x i16> %a, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %o = add <8 x i16> %r0, %r1
+ ret <8 x i16> %o
+}
+
+define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: uzp2 v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: add v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: ret
+ %r0 = shufflevector <32 x i8> %a, <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ %r1 = shufflevector <32 x i8> %a, <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ %o = add <16 x i8> %r0, %r1
+ ret <16 x i8> %o
+}
+
+define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
+; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
+; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
+; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: add v1.2d, v4.2d, v2.2d
+; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i64> %r0, %r1
+ ret <4 x i64> %o
+}
+
+define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = fadd <4 x float> %r0, %r1
+ ret <4 x float> %o
+}
+
+define <4 x float> @deinterleave_shuffle_v8f32_c(<8 x float> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8f32_c:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = fadd <4 x float> %r1, %r0
+ ret <4 x float> %o
+}
+
+define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
+; CHECK-NOFP16-LABEL: deinterleave_shuffle_v16f16:
+; CHECK-NOFP16: // %bb.0:
+; CHECK-NOFP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
+; CHECK-NOFP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-NOFP16-NEXT: fcvtl v3.4s, v2.4h
+; CHECK-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
+; CHECK-NOFP16-NEXT: fadd v1.4s, v3.4s, v1.4s
+; CHECK-NOFP16-NEXT: fadd v2.4s, v2.4s, v0.4s
+; CHECK-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: deinterleave_shuffle_v16f16:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-FP16-NEXT: fadd v0.8h, v2.8h, v0.8h
+; CHECK-FP16-NEXT: ret
+ %r0 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %r1 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %o = fadd <8 x half> %r0, %r1
+ ret <8 x half> %o
+}
+
+define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
+; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
+; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
+; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fadd v1.2d, v4.2d, v2.2d
+; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = fadd <4 x double> %r0, %r1
+ ret <4 x double> %o
+}
+
+define <4 x i32> @udot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: udot:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-NEXT: ushll v4.8h, v2.8b, #0
+; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NEXT: umull2 v5.4s, v3.8h, v4.8h
+; CHECK-NEXT: umull v3.4s, v3.4h, v4.4h
+; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp1 v2.4s, v3.4s, v5.4s
+; CHECK-NEXT: uzp2 v3.4s, v3.4s, v5.4s
+; CHECK-NEXT: uzp1 v6.4s, v1.4s, v4.4s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
+; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
+; CHECK-NEXT: uzp1 v3.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp2 v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %za = zext <16 x i8> %a to <16 x i32>
+ %zb = zext <16 x i8> %b to <16 x i32>
+ %m = mul <16 x i32> %za, %zb
+ %r0 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %r1 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %m2 = add <8 x i32> %r0, %r1
+ %s0 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %s1 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i32> %s0, %s1
+ %n = add <4 x i32> %z, %o
+ ret <4 x i32> %n
+}
+
+define <4 x i32> @sdot(<4 x i32> %z, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sdot:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v3.8h, v1.8b, #0
+; CHECK-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0
+; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NEXT: smull2 v5.4s, v3.8h, v4.8h
+; CHECK-NEXT: smull v3.4s, v3.4h, v4.4h
+; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
+; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: uzp1 v2.4s, v3.4s, v5.4s
+; CHECK-NEXT: uzp2 v3.4s, v3.4s, v5.4s
+; CHECK-NEXT: uzp1 v6.4s, v1.4s, v4.4s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: add v2.4s, v2.4s, v3.4s
+; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
+; CHECK-NEXT: uzp1 v3.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp2 v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %za = sext <16 x i8> %a to <16 x i32>
+ %zb = sext <16 x i8> %b to <16 x i32>
+ %m = mul <16 x i32> %za, %zb
+ %r0 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %r1 = shufflevector <16 x i32> %m, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %m2 = add <8 x i32> %r0, %r1
+ %s0 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %s1 = shufflevector <8 x i32> %m2, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i32> %s0, %s1
+ %n = add <4 x i32> %z, %o
+ ret <4 x i32> %n
+}
More information about the llvm-commits
mailing list