[llvm] 84ccd01 - [AArch64] Some tests to show reconstructing truncates. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 5 10:35:46 PST 2022
Author: David Green
Date: 2022-03-05T18:35:43Z
New Revision: 84ccd015e7dd3ca57c4a9366ecd2b9a7430f505d
URL: https://github.com/llvm/llvm-project/commit/84ccd015e7dd3ca57c4a9366ecd2b9a7430f505d
DIFF: https://github.com/llvm/llvm-project/commit/84ccd015e7dd3ca57c4a9366ecd2b9a7430f505d.diff
LOG: [AArch64] Some tests to show reconstructing truncates. NFC
Added:
llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
new file mode 100644
index 0000000000000..14cc333120c7c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
@@ -0,0 +1,490 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @extract_2_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: extract_2_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i16> %a, i32 0
+ %a1 = extractelement <4 x i16> %a, i32 1
+ %a2 = extractelement <4 x i16> %a, i32 2
+ %a3 = extractelement <4 x i16> %a, i32 3
+ %b0 = extractelement <4 x i16> %b, i32 0
+ %b1 = extractelement <4 x i16> %b, i32 1
+ %b2 = extractelement <4 x i16> %b, i32 2
+ %b3 = extractelement <4 x i16> %b, i32 3
+ %t0 = trunc i16 %a0 to i8
+ %t1 = trunc i16 %a1 to i8
+ %t2 = trunc i16 %a2 to i8
+ %t3 = trunc i16 %a3 to i8
+ %t4 = trunc i16 %b0 to i8
+ %t5 = trunc i16 %b1 to i8
+ %t6 = trunc i16 %b2 to i8
+ %t7 = trunc i16 %b3 to i8
+ %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
+ ret <8 x i8> %i7
+}
+
+define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: extract_2_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov w9, v0.s[2]
+; CHECK-NEXT: mov w10, v0.s[3]
+; CHECK-NEXT: mov v0.b[1], w8
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.b[2], w9
+; CHECK-NEXT: mov w9, v1.s[1]
+; CHECK-NEXT: mov v0.b[3], w10
+; CHECK-NEXT: mov v0.b[4], w8
+; CHECK-NEXT: mov w8, v1.s[2]
+; CHECK-NEXT: mov v0.b[5], w9
+; CHECK-NEXT: mov w9, v1.s[3]
+; CHECK-NEXT: mov v0.b[6], w8
+; CHECK-NEXT: mov v0.b[7], w9
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i32> %a, i32 0
+ %a1 = extractelement <4 x i32> %a, i32 1
+ %a2 = extractelement <4 x i32> %a, i32 2
+ %a3 = extractelement <4 x i32> %a, i32 3
+ %b0 = extractelement <4 x i32> %b, i32 0
+ %b1 = extractelement <4 x i32> %b, i32 1
+ %b2 = extractelement <4 x i32> %b, i32 2
+ %b3 = extractelement <4 x i32> %b, i32 3
+ %t0 = trunc i32 %a0 to i8
+ %t1 = trunc i32 %a1 to i8
+ %t2 = trunc i32 %a2 to i8
+ %t3 = trunc i32 %a3 to i8
+ %t4 = trunc i32 %b0 to i8
+ %t5 = trunc i32 %b1 to i8
+ %t6 = trunc i32 %b2 to i8
+ %t7 = trunc i32 %b3 to i8
+ %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7
+ ret <8 x i8> %i7
+}
+
+define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
+; CHECK-LABEL: extract_4_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w9, v0.h[0]
+; CHECK-NEXT: umov w10, v0.h[1]
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: umov w8, v2.h[0]
+; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT: fmov s4, w9
+; CHECK-NEXT: umov w9, v0.h[2]
+; CHECK-NEXT: mov v4.b[1], w10
+; CHECK-NEXT: umov w10, v0.h[3]
+; CHECK-NEXT: mov v4.b[2], w9
+; CHECK-NEXT: umov w9, v1.h[0]
+; CHECK-NEXT: mov v4.b[3], w10
+; CHECK-NEXT: umov w10, v1.h[1]
+; CHECK-NEXT: mov v4.b[4], w9
+; CHECK-NEXT: umov w9, v1.h[2]
+; CHECK-NEXT: mov v4.b[5], w10
+; CHECK-NEXT: umov w10, v1.h[3]
+; CHECK-NEXT: mov v4.b[6], w9
+; CHECK-NEXT: umov w9, v2.h[1]
+; CHECK-NEXT: mov v4.b[7], w10
+; CHECK-NEXT: mov v4.b[8], w8
+; CHECK-NEXT: umov w8, v2.h[2]
+; CHECK-NEXT: mov v4.b[9], w9
+; CHECK-NEXT: umov w9, v2.h[3]
+; CHECK-NEXT: mov v4.b[10], w8
+; CHECK-NEXT: umov w8, v3.h[0]
+; CHECK-NEXT: mov v4.b[11], w9
+; CHECK-NEXT: umov w9, v3.h[1]
+; CHECK-NEXT: mov v4.b[12], w8
+; CHECK-NEXT: umov w8, v3.h[2]
+; CHECK-NEXT: mov v4.b[13], w9
+; CHECK-NEXT: umov w9, v3.h[3]
+; CHECK-NEXT: mov v4.b[14], w8
+; CHECK-NEXT: mov v4.b[15], w9
+; CHECK-NEXT: mov v0.16b, v4.16b
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i16> %a, i32 0
+ %a1 = extractelement <4 x i16> %a, i32 1
+ %a2 = extractelement <4 x i16> %a, i32 2
+ %a3 = extractelement <4 x i16> %a, i32 3
+ %b0 = extractelement <4 x i16> %b, i32 0
+ %b1 = extractelement <4 x i16> %b, i32 1
+ %b2 = extractelement <4 x i16> %b, i32 2
+ %b3 = extractelement <4 x i16> %b, i32 3
+ %c0 = extractelement <4 x i16> %c, i32 0
+ %c1 = extractelement <4 x i16> %c, i32 1
+ %c2 = extractelement <4 x i16> %c, i32 2
+ %c3 = extractelement <4 x i16> %c, i32 3
+ %d0 = extractelement <4 x i16> %d, i32 0
+ %d1 = extractelement <4 x i16> %d, i32 1
+ %d2 = extractelement <4 x i16> %d, i32 2
+ %d3 = extractelement <4 x i16> %d, i32 3
+ %t0 = trunc i16 %a0 to i8
+ %t1 = trunc i16 %a1 to i8
+ %t2 = trunc i16 %a2 to i8
+ %t3 = trunc i16 %a3 to i8
+ %t4 = trunc i16 %b0 to i8
+ %t5 = trunc i16 %b1 to i8
+ %t6 = trunc i16 %b2 to i8
+ %t7 = trunc i16 %b3 to i8
+ %t8 = trunc i16 %c0 to i8
+ %t9 = trunc i16 %c1 to i8
+ %t10 = trunc i16 %c2 to i8
+ %t11 = trunc i16 %c3 to i8
+ %t12 = trunc i16 %d0 to i8
+ %t13 = trunc i16 %d1 to i8
+ %t14 = trunc i16 %d2 to i8
+ %t15 = trunc i16 %d3 to i8
+ %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
+ %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
+ %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
+ %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
+ %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
+ %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
+ %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
+ %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
+ %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
+ ret <16 x i8> %i15
+}
+
+define <16 x i8> @extract_4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
+; CHECK-LABEL: extract_4_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov w9, v0.s[2]
+; CHECK-NEXT: mov w10, v0.s[3]
+; CHECK-NEXT: mov v0.b[1], w8
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.b[2], w9
+; CHECK-NEXT: mov w9, v1.s[1]
+; CHECK-NEXT: mov v0.b[3], w10
+; CHECK-NEXT: mov v0.b[4], w8
+; CHECK-NEXT: mov w8, v1.s[2]
+; CHECK-NEXT: mov v0.b[5], w9
+; CHECK-NEXT: mov w9, v1.s[3]
+; CHECK-NEXT: mov v0.b[6], w8
+; CHECK-NEXT: fmov w8, s2
+; CHECK-NEXT: mov v0.b[7], w9
+; CHECK-NEXT: mov w9, v2.s[1]
+; CHECK-NEXT: mov v0.b[8], w8
+; CHECK-NEXT: mov w8, v2.s[2]
+; CHECK-NEXT: mov v0.b[9], w9
+; CHECK-NEXT: mov w9, v2.s[3]
+; CHECK-NEXT: mov v0.b[10], w8
+; CHECK-NEXT: fmov w8, s3
+; CHECK-NEXT: mov v0.b[11], w9
+; CHECK-NEXT: mov w9, v3.s[1]
+; CHECK-NEXT: mov v0.b[12], w8
+; CHECK-NEXT: mov w8, v3.s[2]
+; CHECK-NEXT: mov v0.b[13], w9
+; CHECK-NEXT: mov w9, v3.s[3]
+; CHECK-NEXT: mov v0.b[14], w8
+; CHECK-NEXT: mov v0.b[15], w9
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i32> %a, i32 0
+ %a1 = extractelement <4 x i32> %a, i32 1
+ %a2 = extractelement <4 x i32> %a, i32 2
+ %a3 = extractelement <4 x i32> %a, i32 3
+ %b0 = extractelement <4 x i32> %b, i32 0
+ %b1 = extractelement <4 x i32> %b, i32 1
+ %b2 = extractelement <4 x i32> %b, i32 2
+ %b3 = extractelement <4 x i32> %b, i32 3
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %d0 = extractelement <4 x i32> %d, i32 0
+ %d1 = extractelement <4 x i32> %d, i32 1
+ %d2 = extractelement <4 x i32> %d, i32 2
+ %d3 = extractelement <4 x i32> %d, i32 3
+ %t0 = trunc i32 %a0 to i8
+ %t1 = trunc i32 %a1 to i8
+ %t2 = trunc i32 %a2 to i8
+ %t3 = trunc i32 %a3 to i8
+ %t4 = trunc i32 %b0 to i8
+ %t5 = trunc i32 %b1 to i8
+ %t6 = trunc i32 %b2 to i8
+ %t7 = trunc i32 %b3 to i8
+ %t8 = trunc i32 %c0 to i8
+ %t9 = trunc i32 %c1 to i8
+ %t10 = trunc i32 %c2 to i8
+ %t11 = trunc i32 %c3 to i8
+ %t12 = trunc i32 %d0 to i8
+ %t13 = trunc i32 %d1 to i8
+ %t14 = trunc i32 %d2 to i8
+ %t15 = trunc i32 %d3 to i8
+ %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
+ %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
+ %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
+ %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
+ %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
+ %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
+ %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
+ %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
+ %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
+ ret <16 x i8> %i15
+}
+
+define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x i16> %d) {
+; CHECK-LABEL: extract_4_mixed:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w8, v0.h[0]
+; CHECK-NEXT: umov w9, v0.h[1]
+; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT: fmov s4, w8
+; CHECK-NEXT: umov w8, v0.h[2]
+; CHECK-NEXT: mov v4.b[1], w9
+; CHECK-NEXT: umov w9, v0.h[3]
+; CHECK-NEXT: mov v4.b[2], w8
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v4.b[3], w9
+; CHECK-NEXT: mov w9, v1.s[1]
+; CHECK-NEXT: mov v4.b[4], w8
+; CHECK-NEXT: mov w8, v1.s[2]
+; CHECK-NEXT: mov v4.b[5], w9
+; CHECK-NEXT: mov w9, v1.s[3]
+; CHECK-NEXT: mov v4.b[6], w8
+; CHECK-NEXT: fmov w8, s2
+; CHECK-NEXT: mov v4.b[7], w9
+; CHECK-NEXT: mov w9, v2.s[1]
+; CHECK-NEXT: mov v4.b[8], w8
+; CHECK-NEXT: mov w8, v2.s[2]
+; CHECK-NEXT: mov v4.b[9], w9
+; CHECK-NEXT: mov w9, v2.s[3]
+; CHECK-NEXT: mov v4.b[10], w8
+; CHECK-NEXT: umov w8, v3.h[0]
+; CHECK-NEXT: mov v4.b[11], w9
+; CHECK-NEXT: umov w9, v3.h[1]
+; CHECK-NEXT: mov v4.b[12], w8
+; CHECK-NEXT: umov w8, v3.h[2]
+; CHECK-NEXT: mov v4.b[13], w9
+; CHECK-NEXT: umov w9, v3.h[3]
+; CHECK-NEXT: mov v4.b[14], w8
+; CHECK-NEXT: mov v4.b[15], w9
+; CHECK-NEXT: mov v0.16b, v4.16b
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i16> %a, i32 0
+ %a1 = extractelement <4 x i16> %a, i32 1
+ %a2 = extractelement <4 x i16> %a, i32 2
+ %a3 = extractelement <4 x i16> %a, i32 3
+ %b0 = extractelement <4 x i32> %b, i32 0
+ %b1 = extractelement <4 x i32> %b, i32 1
+ %b2 = extractelement <4 x i32> %b, i32 2
+ %b3 = extractelement <4 x i32> %b, i32 3
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %d0 = extractelement <4 x i16> %d, i32 0
+ %d1 = extractelement <4 x i16> %d, i32 1
+ %d2 = extractelement <4 x i16> %d, i32 2
+ %d3 = extractelement <4 x i16> %d, i32 3
+ %t0 = trunc i16 %a0 to i8
+ %t1 = trunc i16 %a1 to i8
+ %t2 = trunc i16 %a2 to i8
+ %t3 = trunc i16 %a3 to i8
+ %t4 = trunc i32 %b0 to i8
+ %t5 = trunc i32 %b1 to i8
+ %t6 = trunc i32 %b2 to i8
+ %t7 = trunc i32 %b3 to i8
+ %t8 = trunc i32 %c0 to i8
+ %t9 = trunc i32 %c1 to i8
+ %t10 = trunc i32 %c2 to i8
+ %t11 = trunc i32 %c3 to i8
+ %t12 = trunc i16 %d0 to i8
+ %t13 = trunc i16 %d1 to i8
+ %t14 = trunc i16 %d2 to i8
+ %t15 = trunc i16 %d3 to i8
+ %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
+ %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
+ %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
+ %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
+ %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
+ %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
+ %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
+ %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
+ %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
+ ret <16 x i8> %i15
+}
+
+define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
+; CHECK-LABEL: extract_4_v4i32_badindex:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov w9, v0.s[2]
+; CHECK-NEXT: mov w10, v0.s[3]
+; CHECK-NEXT: mov v0.b[1], w8
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.b[2], w9
+; CHECK-NEXT: mov w9, v1.s[2]
+; CHECK-NEXT: mov v0.b[3], w10
+; CHECK-NEXT: mov v0.b[4], w8
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v0.b[5], w9
+; CHECK-NEXT: mov w9, v1.s[3]
+; CHECK-NEXT: mov v0.b[6], w8
+; CHECK-NEXT: fmov w8, s2
+; CHECK-NEXT: mov v0.b[7], w9
+; CHECK-NEXT: mov w9, v2.s[1]
+; CHECK-NEXT: mov v0.b[8], w8
+; CHECK-NEXT: mov w8, v2.s[2]
+; CHECK-NEXT: mov v0.b[9], w9
+; CHECK-NEXT: mov w9, v2.s[3]
+; CHECK-NEXT: mov v0.b[10], w8
+; CHECK-NEXT: fmov w8, s3
+; CHECK-NEXT: mov v0.b[11], w9
+; CHECK-NEXT: mov w9, v3.s[1]
+; CHECK-NEXT: mov v0.b[12], w8
+; CHECK-NEXT: mov w8, v3.s[2]
+; CHECK-NEXT: mov v0.b[13], w9
+; CHECK-NEXT: mov w9, v3.s[3]
+; CHECK-NEXT: mov v0.b[14], w8
+; CHECK-NEXT: mov v0.b[15], w9
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i32> %a, i32 0
+ %a1 = extractelement <4 x i32> %a, i32 1
+ %a2 = extractelement <4 x i32> %a, i32 2
+ %a3 = extractelement <4 x i32> %a, i32 3
+ %b0 = extractelement <4 x i32> %b, i32 0
+ %b1 = extractelement <4 x i32> %b, i32 2
+ %b2 = extractelement <4 x i32> %b, i32 1
+ %b3 = extractelement <4 x i32> %b, i32 3
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %d0 = extractelement <4 x i32> %d, i32 0
+ %d1 = extractelement <4 x i32> %d, i32 1
+ %d2 = extractelement <4 x i32> %d, i32 2
+ %d3 = extractelement <4 x i32> %d, i32 3
+ %t0 = trunc i32 %a0 to i8
+ %t1 = trunc i32 %a1 to i8
+ %t2 = trunc i32 %a2 to i8
+ %t3 = trunc i32 %a3 to i8
+ %t4 = trunc i32 %b0 to i8
+ %t5 = trunc i32 %b1 to i8
+ %t6 = trunc i32 %b2 to i8
+ %t7 = trunc i32 %b3 to i8
+ %t8 = trunc i32 %c0 to i8
+ %t9 = trunc i32 %c1 to i8
+ %t10 = trunc i32 %c2 to i8
+ %t11 = trunc i32 %c3 to i8
+ %t12 = trunc i32 %d0 to i8
+ %t13 = trunc i32 %d1 to i8
+ %t14 = trunc i32 %d2 to i8
+ %t15 = trunc i32 %d3 to i8
+ %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4
+ %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5
+ %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6
+ %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7
+ %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8
+ %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9
+ %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10
+ %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11
+ %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12
+ %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13
+ %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14
+ %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15
+ ret <16 x i8> %i15
+}
+
+define <16 x i8> @extract_4_v4i32_one(<4 x i32> %a) {
+; CHECK-LABEL: extract_4_v4i32_one:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: mov w10, v0.s[2]
+; CHECK-NEXT: mov w11, v0.s[3]
+; CHECK-NEXT: mov v0.b[1], w8
+; CHECK-NEXT: mov v0.b[2], w10
+; CHECK-NEXT: mov v0.b[3], w11
+; CHECK-NEXT: mov v0.b[4], w9
+; CHECK-NEXT: mov v0.b[5], w8
+; CHECK-NEXT: mov v0.b[6], w10
+; CHECK-NEXT: mov v0.b[7], w11
+; CHECK-NEXT: mov v0.b[8], w9
+; CHECK-NEXT: mov v0.b[9], w8
+; CHECK-NEXT: mov v0.b[10], w10
+; CHECK-NEXT: mov v0.b[11], w11
+; CHECK-NEXT: mov v0.b[12], w9
+; CHECK-NEXT: mov v0.b[13], w8
+; CHECK-NEXT: mov v0.b[14], w10
+; CHECK-NEXT: mov v0.b[15], w11
+; CHECK-NEXT: ret
+entry:
+ %a0 = extractelement <4 x i32> %a, i32 0
+ %a1 = extractelement <4 x i32> %a, i32 1
+ %a2 = extractelement <4 x i32> %a, i32 2
+ %a3 = extractelement <4 x i32> %a, i32 3
+ %t0 = trunc i32 %a0 to i8
+ %t1 = trunc i32 %a1 to i8
+ %t2 = trunc i32 %a2 to i8
+ %t3 = trunc i32 %a3 to i8
+ %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0
+ %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1
+ %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2
+ %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3
+ %i4 = insertelement <16 x i8> %i3, i8 %t0, i32 4
+ %i5 = insertelement <16 x i8> %i4, i8 %t1, i32 5
+ %i6 = insertelement <16 x i8> %i5, i8 %t2, i32 6
+ %i7 = insertelement <16 x i8> %i6, i8 %t3, i32 7
+ %i8 = insertelement <16 x i8> %i7, i8 %t0, i32 8
+ %i9 = insertelement <16 x i8> %i8, i8 %t1, i32 9
+ %i10 = insertelement <16 x i8> %i9, i8 %t2, i32 10
+ %i11 = insertelement <16 x i8> %i10, i8 %t3, i32 11
+ %i12 = insertelement <16 x i8> %i11, i8 %t0, i32 12
+ %i13 = insertelement <16 x i8> %i12, i8 %t1, i32 13
+ %i14 = insertelement <16 x i8> %i13, i8 %t2, i32 14
+ %i15 = insertelement <16 x i8> %i14, i8 %t3, i32 15
+ ret <16 x i8> %i15
+}
+
More information about the llvm-commits
mailing list