[llvm] de0707a - [NFC] Autogenerate several AArch64 tests.
Amaury Séchet via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 14 10:49:00 PDT 2023
Author: Amaury Séchet
Date: 2023-06-14T17:46:38Z
New Revision: de0707a2b98162ab52fa2dd9277a9bbb4f7256c7
URL: https://github.com/llvm/llvm-project/commit/de0707a2b98162ab52fa2dd9277a9bbb4f7256c7
DIFF: https://github.com/llvm/llvm-project/commit/de0707a2b98162ab52fa2dd9277a9bbb4f7256c7.diff
LOG: [NFC] Autogenerate several AArch64 tests.
Added:
Modified:
llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll
llvm/test/CodeGen/AArch64/arm64-neon-across.ll
llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
llvm/test/CodeGen/AArch64/arm64-neon-add-sub.ll
llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll
index 7f0bc6ac090b2..6db3151959c2e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
@@ -14,81 +15,112 @@
@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
define i32 @t0() {
-entry:
; CHECK-LABEL: t0:
-; CHECK-DAG: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7]
-; CHECK-DAG: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7]
-; CHECK-DAG: ldr [[REG2:x[0-9]+]],
-; CHECK-DAG: str [[REG2]],
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, src
+; CHECK-NEXT: add x8, x8, :lo12:src
+; CHECK-NEXT: ldr x9, [x8]
+; CHECK-NEXT: adrp x10, dst
+; CHECK-NEXT: add x10, x10, :lo12:dst
+; CHECK-NEXT: str x9, [x10]
+; CHECK-NEXT: ldur w8, [x8, #7]
+; CHECK-NEXT: stur w8, [x10, #7]
+; CHECK-NEXT: mov w0, #0 // =0x0
+; CHECK-NEXT: ret
+entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 8 @dst, ptr align 8 @src, i32 11, i1 false)
ret i32 0
}
define void @t1(ptr nocapture %C) nounwind {
-entry:
; CHECK-LABEL: t1:
-; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
-; CHECK: str [[DEST:q[0-9]+]], [x0]
-; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15]
-; CHECK: stur [[DEST:q[0-9]+]], [x0, #15]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .L.str1
+; CHECK-NEXT: add x8, x8, :lo12:.L.str1
+; CHECK-NEXT: ldr q0, [x8]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ldur q0, [x8, #15]
+; CHECK-NEXT: stur q0, [x0, #15]
+; CHECK-NEXT: ret
+entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false)
ret void
}
define void @t2(ptr nocapture %C) nounwind {
-entry:
; CHECK-LABEL: t2:
-; CHECK: mov [[REG3:w[0-9]+]]
-; CHECK: movk [[REG3]],
-; CHECK: str [[REG3]], [x0, #32]
-; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
-; CHECK: stp [[DEST1]], [[DEST2]], [x0]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #16716 // =0x414c
+; CHECK-NEXT: movk w8, #72, lsl #16
+; CHECK-NEXT: str w8, [x0, #32]
+; CHECK-NEXT: adrp x8, .L.str2
+; CHECK-NEXT: add x8, x8, :lo12:.L.str2
+; CHECK-NEXT: ldp q0, q1, [x8]
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
ret void
}
define void @t3(ptr nocapture %C) nounwind {
-entry:
; CHECK-LABEL: t3:
-; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
-; CHECK: str [[DEST]], [x0]
-; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16]
-; CHECK: str [[REG4]], [x0, #16]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .L.str3
+; CHECK-NEXT: add x8, x8, :lo12:.L.str3
+; CHECK-NEXT: ldr q0, [x8]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ldr x8, [x8, #16]
+; CHECK-NEXT: str x8, [x0, #16]
+; CHECK-NEXT: ret
+entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false)
ret void
}
define void @t4(ptr nocapture %C) nounwind {
-entry:
; CHECK-LABEL: t4:
-; CHECK: mov [[REG5:w[0-9]+]], #32
-; CHECK: strh [[REG5]], [x0, #16]
-; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
-; CHECK: str [[REG6]], [x0]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #32 // =0x20
+; CHECK-NEXT: strh w8, [x0, #16]
+; CHECK-NEXT: adrp x8, .L.str4
+; CHECK-NEXT: add x8, x8, :lo12:.L.str4
+; CHECK-NEXT: ldr q0, [x8]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false)
ret void
}
define void @t5(ptr nocapture %C) nounwind {
-entry:
; CHECK-LABEL: t5:
-; CHECK: mov [[REG7:w[0-9]+]], #21337
-; CHECK: movk [[REG7]],
-; CHECK: stur [[REG7]], [x0, #3]
-; CHECK: mov [[REG8:w[0-9]+]],
-; CHECK: movk [[REG8]],
-; CHECK: str [[REG8]], [x0]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #21337 // =0x5359
+; CHECK-NEXT: movk w8, #84, lsl #16
+; CHECK-NEXT: stur w8, [x0, #3]
+; CHECK-NEXT: mov w8, #18500 // =0x4844
+; CHECK-NEXT: movk w8, #22866, lsl #16
+; CHECK-NEXT: str w8, [x0]
+; CHECK-NEXT: ret
+entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str5, i64 7, i1 false)
ret void
}
define void @t6() nounwind {
-entry:
; CHECK-LABEL: t6:
-; CHECK-DAG: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
-; CHECK-DAG: stur [[REG9]], [x{{[0-9]+}}, #6]
-; CHECK-DAG: ldr
-; CHECK-DAG: str
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: adrp x8, .L.str6
+; CHECK-NEXT: add x8, x8, :lo12:.L.str6
+; CHECK-NEXT: ldr x9, [x8]
+; CHECK-NEXT: adrp x10, spool.splbuf
+; CHECK-NEXT: add x10, x10, :lo12:spool.splbuf
+; CHECK-NEXT: str x9, [x10]
+; CHECK-NEXT: ldur x8, [x8, #6]
+; CHECK-NEXT: stur x8, [x10, #6]
+; CHECK-NEXT: ret
+entry:
call void @llvm.memcpy.p0.p0.i64(ptr @spool.splbuf, ptr @.str6, i64 14, i1 false)
ret void
}
@@ -96,10 +128,12 @@ entry:
%struct.Foo = type { i32, i32, i32, i32 }
define void @t7(ptr nocapture %a, ptr nocapture %b) nounwind {
+; CHECK-LABEL: t7:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr q0, [x1]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
entry:
-; CHECK: t7
-; CHECK: ldr [[REG10:q[0-9]+]], [x1]
-; CHECK: str [[REG10]], [x0]
tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 4 %b, i32 16, i1 false)
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
index df5d9c06d0380..ed4a02bb0abfd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
@@ -80,8 +81,11 @@ declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
define i16 @test_vaddlv_s8(<8 x i8> %a) {
-; CHECK: test_vaddlv_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+; CHECK-LABEL: test_vaddlv_s8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddlv h0, v0.8b
+; CHECK-NEXT: smov w0, v0.h[0]
+; CHECK-NEXT: ret
entry:
%saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %saddlvv.i to i16
@@ -89,16 +93,22 @@ entry:
}
define i32 @test_vaddlv_s16(<4 x i16> %a) {
-; CHECK: test_vaddlv_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+; CHECK-LABEL: test_vaddlv_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddlv s0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
ret i32 %saddlvv.i
}
define i16 @test_vaddlv_u8(<8 x i8> %a) {
-; CHECK: test_vaddlv_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+; CHECK-LABEL: test_vaddlv_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv h0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %uaddlvv.i to i16
@@ -106,16 +116,22 @@ entry:
}
define i32 @test_vaddlv_u16(<4 x i16> %a) {
-; CHECK: test_vaddlv_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+; CHECK-LABEL: test_vaddlv_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv s0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
ret i32 %uaddlvv.i
}
define i16 @test_vaddlvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vaddlvq_s8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddlv h0, v0.16b
+; CHECK-NEXT: smov w0, v0.h[0]
+; CHECK-NEXT: ret
entry:
%saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %saddlvv.i to i16
@@ -123,24 +139,33 @@ entry:
}
define i32 @test_vaddlvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+; CHECK-LABEL: test_vaddlvq_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddlv s0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
ret i32 %saddlvv.i
}
define i64 @test_vaddlvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_s32:
-; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vaddlvq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddlv d0, v0.4s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
ret i64 %saddlvv.i
}
define i16 @test_vaddlvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vaddlvq_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv h0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %uaddlvv.i to i16
@@ -148,24 +173,28 @@ entry:
}
define i32 @test_vaddlvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+; CHECK-LABEL: test_vaddlvq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv s0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
ret i32 %uaddlvv.i
}
define i64 @test_vaddlvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_u32:
-; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vaddlvq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uaddlv d0, v0.4s
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
ret i64 %uaddlvv.i
}
define i8 @test_vmaxv_s8(<8 x i8> %a) {
-; CHECK: test_vmaxv_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
entry:
%smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %smaxv.i to i8
@@ -173,8 +202,6 @@ entry:
}
define i16 @test_vmaxv_s16(<4 x i16> %a) {
-; CHECK: test_vmaxv_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
entry:
%smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %smaxv.i to i16
@@ -182,8 +209,11 @@ entry:
}
define i8 @test_vmaxv_u8(<8 x i8> %a) {
-; CHECK: test_vmaxv_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+; CHECK-LABEL: test_vmaxv_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umaxv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %umaxv.i to i8
@@ -191,8 +221,11 @@ entry:
}
define i16 @test_vmaxv_u16(<4 x i16> %a) {
-; CHECK: test_vmaxv_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+; CHECK-LABEL: test_vmaxv_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umaxv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %umaxv.i to i16
@@ -200,8 +233,6 @@ entry:
}
define i8 @test_vmaxvq_s8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
entry:
%smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %smaxv.i to i8
@@ -209,8 +240,6 @@ entry:
}
define i16 @test_vmaxvq_s16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
entry:
%smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %smaxv.i to i16
@@ -218,16 +247,22 @@ entry:
}
define i32 @test_vmaxvq_s32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_s32:
-; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vmaxvq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smaxv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
ret i32 %smaxv.i
}
define i8 @test_vmaxvq_u8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vmaxvq_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umaxv b0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %umaxv.i to i8
@@ -235,8 +270,11 @@ entry:
}
define i16 @test_vmaxvq_u16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+; CHECK-LABEL: test_vmaxvq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umaxv h0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %umaxv.i to i16
@@ -244,16 +282,17 @@ entry:
}
define i32 @test_vmaxvq_u32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_u32:
-; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vmaxvq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umaxv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
ret i32 %umaxv.i
}
define i8 @test_vminv_s8(<8 x i8> %a) {
-; CHECK: test_vminv_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
entry:
%sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %sminv.i to i8
@@ -261,8 +300,6 @@ entry:
}
define i16 @test_vminv_s16(<4 x i16> %a) {
-; CHECK: test_vminv_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
entry:
%sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %sminv.i to i16
@@ -270,8 +307,11 @@ entry:
}
define i8 @test_vminv_u8(<8 x i8> %a) {
-; CHECK: test_vminv_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+; CHECK-LABEL: test_vminv_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uminv b0, v0.8b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %uminv.i to i8
@@ -279,8 +319,11 @@ entry:
}
define i16 @test_vminv_u16(<4 x i16> %a) {
-; CHECK: test_vminv_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+; CHECK-LABEL: test_vminv_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uminv h0, v0.4h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %uminv.i to i16
@@ -288,8 +331,6 @@ entry:
}
define i8 @test_vminvq_s8(<16 x i8> %a) {
-; CHECK: test_vminvq_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
entry:
%sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %sminv.i to i8
@@ -297,8 +338,6 @@ entry:
}
define i16 @test_vminvq_s16(<8 x i16> %a) {
-; CHECK: test_vminvq_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
entry:
%sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %sminv.i to i16
@@ -306,16 +345,22 @@ entry:
}
define i32 @test_vminvq_s32(<4 x i32> %a) {
-; CHECK: test_vminvq_s32:
-; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vminvq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sminv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
ret i32 %sminv.i
}
define i8 @test_vminvq_u8(<16 x i8> %a) {
-; CHECK: test_vminvq_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vminvq_u8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uminv b0, v0.16b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %uminv.i to i8
@@ -323,8 +368,11 @@ entry:
}
define i16 @test_vminvq_u16(<8 x i16> %a) {
-; CHECK: test_vminvq_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+; CHECK-LABEL: test_vminvq_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uminv h0, v0.8h
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %uminv.i to i16
@@ -332,16 +380,17 @@ entry:
}
define i32 @test_vminvq_u32(<4 x i32> %a) {
-; CHECK: test_vminvq_u32:
-; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vminvq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uminv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
ret i32 %uminv.i
}
define i8 @test_vaddv_s8(<8 x i8> %a) {
-; CHECK: test_vaddv_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %vaddv.i to i8
@@ -349,8 +398,6 @@ entry:
}
define i16 @test_vaddv_s16(<4 x i16> %a) {
-; CHECK: test_vaddv_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %vaddv.i to i16
@@ -358,8 +405,6 @@ entry:
}
define i8 @test_vaddv_u8(<8 x i8> %a) {
-; CHECK: test_vaddv_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
%0 = trunc i32 %vaddv.i to i8
@@ -367,8 +412,6 @@ entry:
}
define i16 @test_vaddv_u16(<4 x i16> %a) {
-; CHECK: test_vaddv_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
%0 = trunc i32 %vaddv.i to i16
@@ -376,8 +419,6 @@ entry:
}
define i8 @test_vaddvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddvq_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %vaddv.i to i8
@@ -385,8 +426,6 @@ entry:
}
define i16 @test_vaddvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddvq_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %vaddv.i to i16
@@ -394,16 +433,17 @@ entry:
}
define i32 @test_vaddvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddvq_s32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vaddvq_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
ret i32 %vaddv.i
}
define i8 @test_vaddvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddvq_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
%0 = trunc i32 %vaddv.i to i8
@@ -411,8 +451,6 @@ entry:
}
define i16 @test_vaddvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddvq_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
%0 = trunc i32 %vaddv.i to i16
@@ -420,40 +458,51 @@ entry:
}
define i32 @test_vaddvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddvq_u32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vaddvq_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addv s0, v0.4s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
entry:
%vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
ret i32 %vaddv.i
}
define float @test_vmaxvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxvq_f32:
-; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vmaxvq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmaxv s0, v0.4s
+; CHECK-NEXT: ret
entry:
%0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
ret float %0
}
define float @test_vminvq_f32(<4 x float> %a) {
-; CHECK: test_vminvq_f32:
-; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vminvq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fminv s0, v0.4s
+; CHECK-NEXT: ret
entry:
%0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
ret float %0
}
define float @test_vmaxnmvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxnmvq_f32:
-; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vmaxnmvq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmaxnmv s0, v0.4s
+; CHECK-NEXT: ret
entry:
%0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
ret float %0
}
define float @test_vminnmvq_f32(<4 x float> %a) {
-; CHECK: test_vminnmvq_f32:
-; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+; CHECK-LABEL: test_vminnmvq_f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fminnmv s0, v0.4s
+; CHECK-NEXT: ret
entry:
%0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)
ret float %0
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index f95fe77997d77..eefa5a9b43d0b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
@@ -5,54 +6,66 @@ declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_addp_v8i8:
+; CHECK-LABEL: test_addp_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp1 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: addp v0.8b, v0.8b, v1.8b
ret <8 x i8> %tmp1
}
declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_addp_v16i8:
+; CHECK-LABEL: test_addp_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
%tmp1 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: addp v0.16b, v0.16b, v1.16b
ret <16 x i8> %tmp1
}
declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_addp_v4i16:
+; CHECK-LABEL: test_addp_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp1 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: addp v0.4h, v0.4h, v1.4h
ret <4 x i16> %tmp1
}
declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_addp_v8i16:
+; CHECK-LABEL: test_addp_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp1 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: addp v0.8h, v0.8h, v1.8h
ret <8 x i16> %tmp1
}
declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_addp_v2i32:
+; CHECK-LABEL: test_addp_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp1 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: addp v0.2s, v0.2s, v1.2s
ret <2 x i32> %tmp1
}
declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_addp_v4i32:
+; CHECK-LABEL: test_addp_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp1 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: addp v0.4s, v0.4s, v1.4s
ret <4 x i32> %tmp1
}
@@ -60,9 +73,11 @@ define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_addp_v2i64:
+; CHECK-LABEL: test_addp_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%val = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: addp v0.2d, v0.2d, v1.2d
ret <2 x i64> %val
}
@@ -71,29 +86,38 @@ declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>)
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_faddp_v2f32:
+; CHECK-LABEL: test_faddp_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%val = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: faddp v0.2s, v0.2s, v1.2s
ret <2 x float> %val
}
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_faddp_v4f32:
+; CHECK-LABEL: test_faddp_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%val = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: faddp v0.4s, v0.4s, v1.4s
ret <4 x float> %val
}
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_faddp_v2f64:
+; CHECK-LABEL: test_faddp_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%val = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: faddp v0.2d, v0.2d, v1.2d
ret <2 x double> %val
}
define i32 @test_vaddv.v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddv.v2i32
-; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: test_vaddv.v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%1 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
ret i32 %1
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-sub.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-sub.ll
index 40836a73e0cab..796f687bcdd50 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-sub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-sub.ll
@@ -1,229 +1,320 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-enable-simd-scalar| FileCheck %s
define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: add8xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = add <8 x i8> %A, %B;
ret <8 x i8> %tmp3
}
define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: add16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
%tmp3 = add <16 x i8> %A, %B;
ret <16 x i8> %tmp3
}
define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: add4xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp3 = add <4 x i16> %A, %B;
ret <4 x i16> %tmp3
}
define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: add8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = add <8 x i16> %A, %B;
ret <8 x i16> %tmp3
}
define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: add2xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = add <2 x i32> %A, %B;
ret <2 x i32> %tmp3
}
define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: add4x32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp3 = add <4 x i32> %A, %B;
ret <4 x i32> %tmp3
}
define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: add2xi64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp3 = add <2 x i64> %A, %B;
ret <2 x i64> %tmp3
}
define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: add2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = fadd <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: add4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp3 = fadd <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: add2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp3 = fadd <2 x double> %A, %B;
ret <2 x double> %tmp3
}
define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: sub8xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = sub <8 x i8> %A, %B;
ret <8 x i8> %tmp3
}
define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: sub16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
%tmp3 = sub <16 x i8> %A, %B;
ret <16 x i8> %tmp3
}
define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: sub4xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp3 = sub <4 x i16> %A, %B;
ret <4 x i16> %tmp3
}
define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: sub8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = sub <8 x i16> %A, %B;
ret <8 x i16> %tmp3
}
define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: sub2xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = sub <2 x i32> %A, %B;
ret <2 x i32> %tmp3
}
define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: sub4x32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp3 = sub <4 x i32> %A, %B;
ret <4 x i32> %tmp3
}
define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: sub2xi64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp3 = sub <2 x i64> %A, %B;
ret <2 x i64> %tmp3
}
define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: sub2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = fsub <2 x float> %A, %B;
ret <2 x float> %tmp3
}
define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: sub4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp3 = fsub <4 x float> %A, %B;
ret <4 x float> %tmp3
}
define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: sub2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp3 = fsub <2 x double> %A, %B;
ret <2 x double> %tmp3
}
define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vadd_f64
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vadd_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
%1 = fadd <1 x double> %a, %b
ret <1 x double> %1
}
define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmul_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmul_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
%1 = fmul <1 x double> %a, %b
ret <1 x double> %1
}
define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vdiv_f64
-; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vdiv_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: ret
%1 = fdiv <1 x double> %a, %b
ret <1 x double> %1
}
define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmla_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmla_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d1, d1, d2
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ret
%1 = fmul <1 x double> %b, %c
%2 = fadd <1 x double> %1, %a
ret <1 x double> %2
}
define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmls_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmls_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d1, d1, d2
+; CHECK-NEXT: fsub d0, d0, d1
+; CHECK-NEXT: ret
%1 = fmul <1 x double> %b, %c
%2 = fsub <1 x double> %a, %1
ret <1 x double> %2
}
define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfms_f64
-; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vfms_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmsub d0, d1, d2, d0
+; CHECK-NEXT: ret
%1 = fsub <1 x double> <double -0.000000e+00>, %b
%2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
ret <1 x double> %2
}
define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfma_f64
-; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vfma_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmadd d0, d1, d2, d0
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
ret <1 x double> %1
}
define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vsub_f64
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vsub_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub d0, d0, d1
+; CHECK-NEXT: ret
%1 = fsub <1 x double> %a, %b
ret <1 x double> %1
}
define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vabd_f64
-; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vabd_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabd d0, d0, d1
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
ret <1 x double> %1
}
define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmax_f64
-; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmax_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmax d0, d0, d1
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
ret <1 x double> %1
}
define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmin_f64
-; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmin_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmin d0, d0, d1
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
ret <1 x double> %1
}
define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmaxnm_f64
-; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vmaxnm_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmaxnm d0, d0, d1
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
ret <1 x double> %1
}
define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vminnm_f64
-; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vminnm_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fminnm d0, d0, d1
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
ret <1 x double> %1
}
define <1 x double> @test_vabs_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vabs_f64
-; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vabs_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
ret <1 x double> %1
}
define <1 x double> @test_vneg_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vneg_f64
-; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-LABEL: test_vneg_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fneg d0, d0
+; CHECK-NEXT: ret
%1 = fsub <1 x double> <double -0.000000e+00>, %a
ret <1 x double> %1
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
index 8e2d0352fddef..6327679756739 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -1,11 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
; arm64 has a separate copy due to intrinsics
define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) {
; CHECK-LABEL: copyTuple.QPair:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v3.4s, #2
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: mov v1.16b, v3.16b
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0]
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, ptr %a)
%extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
@@ -16,10 +24,19 @@ entry:
define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) {
; CHECK-LABEL: copyTuple.QTriple:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1
+; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v31.16b
+; CHECK-NEXT: mov v3.16b, v0.16b
+; CHECK-NEXT: mov v4.16b, v1.16b
+; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0]
+; CHECK-NEXT: mov v3.16b, v31.16b
+; CHECK-NEXT: mov v4.16b, v0.16b
+; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1]
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
%extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
@@ -30,11 +47,22 @@ entry:
define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) {
; CHECK-LABEL: copyTuple.QQuad:
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2
+; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: mov v3.16b, v31.16b
+; CHECK-NEXT: mov v4.16b, v0.16b
+; CHECK-NEXT: mov v5.16b, v1.16b
+; CHECK-NEXT: mov v6.16b, v2.16b
+; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0]
+; CHECK-NEXT: mov v4.16b, v31.16b
+; CHECK-NEXT: mov v5.16b, v0.16b
+; CHECK-NEXT: mov v6.16b, v0.16b
+; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1]
+; CHECK-NEXT: mov v0.16b, v3.16b
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
%extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
index bb67874b01651..f1458b76c525a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
@@ -1,79 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
define <16 x i8> @div16xi8(<16 x i8> %x) {
; CHECK-LABEL: div16xi8:
-; CHECK: movi [[DIVISOR:(v[0-9]+)]].16b, #41
-; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].8h, v0.16b, [[DIVISOR]].16b
-; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].8h, v0.8b, [[DIVISOR]].8b
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).16b]], [[SMULL]].16b, [[SMULL2]].16b
-; CHECK-NEXT: sshr [[SSHR:(v[0-9]+.16b)]], [[UZP2]], #2
-; CHECK-NEXT: usra v0.16b, [[SSHR]], #7
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.16b, #41
+; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: sshr v0.16b, v0.16b, #2
+; CHECK-NEXT: usra v0.16b, v0.16b, #7
+; CHECK-NEXT: ret
%div = sdiv <16 x i8> %x, <i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25>
ret <16 x i8> %div
}
define <8 x i16> @div8xi16(<8 x i16> %x) {
; CHECK-LABEL: div8xi16:
-; CHECK: mov [[TMP:(w[0-9]+)]], #40815
-; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].8h, [[TMP]]
-; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].4s, v0.8h, [[DIVISOR]].8h
-; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].4s, v0.4h, [[DIVISOR]].4h
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).8h]], [[SMULL]].8h, [[SMULL2]].8h
-; CHECK-NEXT: add [[ADD:(v[0-9]+).8h]], [[UZP2]], v0.8h
-; CHECK-NEXT: sshr [[SSHR:(v[0-9]+).8h]], [[ADD]], #12
-; CHECK-NEXT: usra v0.8h, [[SSHR]], #15
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #40815 // =0x9f6f
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: sshr v0.8h, v0.8h, #12
+; CHECK-NEXT: usra v0.8h, v0.8h, #15
+; CHECK-NEXT: ret
%div = sdiv <8 x i16> %x, <i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577>
ret <8 x i16> %div
}
define <4 x i32> @div32xi4(<4 x i32> %x) {
; CHECK-LABEL: div32xi4:
-; CHECK: mov [[TMP:(w[0-9]+)]], #7527
-; CHECK-NEXT: movk [[TMP]], #28805, lsl #16
-; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].4s, [[TMP]]
-; CHECK-NEXT: smull2 [[SMULL2:(v[0-9]+)]].2d, v0.4s, [[DIVISOR]].4s
-; CHECK-NEXT: smull [[SMULL:(v[0-9]+)]].2d, v0.2s, [[DIVISOR]].2s
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).4s]], [[SMULL]].4s, [[SMULL2]].4s
-; CHECK-NEXT: sshr [[SSHR:(v[0-9]+.4s)]], [[UZP2]], #22
-; CHECK-NEXT: usra v0.4s, [[UZP2]], #31
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #7527 // =0x1d67
+; CHECK-NEXT: movk w8, #28805, lsl #16
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: sshr v0.4s, v1.4s, #22
+; CHECK-NEXT: usra v0.4s, v1.4s, #31
+; CHECK-NEXT: ret
%div = sdiv <4 x i32> %x, <i32 9542677, i32 9542677, i32 9542677, i32 9542677>
ret <4 x i32> %div
}
define <16 x i8> @udiv16xi8(<16 x i8> %x) {
; CHECK-LABEL: udiv16xi8:
-; CHECK: movi [[DIVISOR:(v[0-9]+)]].16b, #121
-; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].8h, v0.16b, [[DIVISOR]].16b
-; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].8h, v0.8b, [[DIVISOR]].8b
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).16b]], [[UMULL]].16b, [[UMULL2]].16b
-; CHECK-NEXT: ushr v0.16b, [[UZP2]], #5
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.16b, #121
+; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
+; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ushr v0.16b, v0.16b, #5
+; CHECK-NEXT: ret
%div = udiv <16 x i8> %x, <i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68, i8 68>
ret <16 x i8> %div
}
define <8 x i16> @udiv8xi16(<8 x i16> %x) {
; CHECK-LABEL: udiv8xi16:
-; CHECK: mov [[TMP:(w[0-9]+)]], #16593
-; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].8h, [[TMP]]
-; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].4s, v0.8h, [[DIVISOR]].8h
-; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].4s, v0.4h, [[DIVISOR]].4h
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).8h]], [[UMULL]].8h, [[SMULL2]].8h
-; CHECK-NEXT: sub [[SUB:(v[0-9]+).8h]], v0.8h, [[UZP2]]
-; CHECK-NEXT: usra [[USRA:(v[0-9]+).8h]], [[SUB]], #1
-; CHECK-NEXT: ushr v0.8h, [[USRA]], #12
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #16593 // =0x40d1
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
+; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: usra v1.8h, v0.8h, #1
+; CHECK-NEXT: ushr v0.8h, v1.8h, #12
+; CHECK-NEXT: ret
%div = udiv <8 x i16> %x, <i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537>
ret <8 x i16> %div
}
define <4 x i32> @udiv32xi4(<4 x i32> %x) {
; CHECK-LABEL: udiv32xi4:
-; CHECK: mov [[TMP:(w[0-9]+)]], #16747
-; CHECK-NEXT: movk [[TMP]], #31439, lsl #16
-; CHECK-NEXT: dup [[DIVISOR:(v[0-9]+)]].4s, [[TMP]]
-; CHECK-NEXT: umull2 [[UMULL2:(v[0-9]+)]].2d, v0.4s, [[DIVISOR]].4s
-; CHECK-NEXT: umull [[UMULL:(v[0-9]+)]].2d, v0.2s, [[DIVISOR]].2s
-; CHECK-NEXT: uzp2 [[UZP2:(v[0-9]+).4s]], [[UMULL]].4s, [[SMULL2]].4s
-; CHECK-NEXT: ushr v0.4s, [[UZP2]], #22
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #16747 // =0x416b
+; CHECK-NEXT: movk w8, #31439, lsl #16
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ushr v0.4s, v0.4s, #22
+; CHECK-NEXT: ret
%div = udiv <4 x i32> %x, <i32 8743143, i32 8743143, i32 8743143, i32 8743143>
ret <4 x i32> %div
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
index 92ed23995098e..5d5b940174c4b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
@@ -1,16 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
- ; CHECK-LABEL: test_fmul_lane_ss2S
- ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_fmul_lane_ss2S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmul s0, s0, v1.s[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x float> %v, i32 1
%tmp2 = fmul float %a, %tmp1;
ret float %tmp2;
}
define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
- ; CHECK-LABEL: test_fmul_lane_ss2S_swap
- ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_fmul_lane_ss2S_swap:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmul s0, s0, v1.s[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x float> %v, i32 1
%tmp2 = fmul float %tmp1, %a;
ret float %tmp2;
@@ -18,16 +25,20 @@ define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
- ; CHECK-LABEL: test_fmul_lane_ss4S
- ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-LABEL: test_fmul_lane_ss4S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul s0, s0, v1.s[3]
+; CHECK-NEXT: ret
%tmp1 = extractelement <4 x float> %v, i32 3
%tmp2 = fmul float %a, %tmp1;
ret float %tmp2;
}
define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
- ; CHECK-LABEL: test_fmul_lane_ss4S_swap
- ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-LABEL: test_fmul_lane_ss4S_swap:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul s0, s0, v1.s[3]
+; CHECK-NEXT: ret
%tmp1 = extractelement <4 x float> %v, i32 3
%tmp2 = fmul float %tmp1, %a;
ret float %tmp2;
@@ -35,8 +46,10 @@ define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
- ; CHECK-LABEL: test_fmul_lane_ddD
- ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
+; CHECK-LABEL: test_fmul_lane_ddD:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
%tmp1 = extractelement <1 x double> %v, i32 0
%tmp2 = fmul double %a, %tmp1;
ret double %tmp2;
@@ -45,8 +58,10 @@ define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
- ; CHECK-LABEL: test_fmul_lane_dd2D
- ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_fmul_lane_dd2D:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, v1.d[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 1
%tmp2 = fmul double %a, %tmp1;
ret double %tmp2;
@@ -54,8 +69,10 @@ define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
- ; CHECK-LABEL: test_fmul_lane_dd2D_swap
- ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_fmul_lane_dd2D_swap:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, v1.d[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 1
%tmp2 = fmul double %tmp1, %a;
ret double %tmp2;
@@ -64,24 +81,31 @@ define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
declare float @llvm.aarch64.neon.fmulx.f32(float, float)
define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
- ; CHECK-LABEL: test_fmulx_lane_f32
- ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_fmulx_lane_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmulx s0, s0, v1.s[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x float> %v, i32 1
%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
ret float %tmp2;
}
define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
- ; CHECK-LABEL: test_fmulx_laneq_f32
- ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-LABEL: test_fmulx_laneq_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx s0, s0, v1.s[3]
+; CHECK-NEXT: ret
%tmp1 = extractelement <4 x float> %v, i32 3
%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
ret float %tmp2;
}
define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
- ; CHECK-LABEL: test_fmulx_laneq_f32_swap
- ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-LABEL: test_fmulx_laneq_f32_swap:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx s0, s0, v1.s[3]
+; CHECK-NEXT: ret
%tmp1 = extractelement <4 x float> %v, i32 3
%tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a)
ret float %tmp2;
@@ -90,16 +114,20 @@ define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
declare double @llvm.aarch64.neon.fmulx.f64(double, double)
define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
- ; CHECK-LABEL: test_fmulx_lane_f64
- ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
+; CHECK-LABEL: test_fmulx_lane_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx d0, d0, d1
+; CHECK-NEXT: ret
%tmp1 = extractelement <1 x double> %v, i32 0
%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
ret double %tmp2;
}
define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
- ; CHECK-LABEL: test_fmulx_laneq_f64_0
- ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_fmulx_laneq_f64_0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx d0, d0, v1.d[0]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 0
%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
ret double %tmp2;
@@ -107,16 +135,20 @@ define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
- ; CHECK-LABEL: test_fmulx_laneq_f64_1
- ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_fmulx_laneq_f64_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx d0, d0, v1.d[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 1
%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
ret double %tmp2;
}
define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
- ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap
- ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_fmulx_laneq_f64_1_swap:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmulx d0, d0, v1.d[1]
+; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 1
%tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a)
ret double %tmp2;
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
index 447fb6307f21e..c9a03c888d5b2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
@@ -1,266 +1,338 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
-; CHECK: test_vshr_n_s8
-; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_vshr_n_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.8b, v0.8b, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <8 x i8> %vshr_n
}
define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
-; CHECK: test_vshr_n_s16
-; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+; CHECK-LABEL: test_vshr_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.4h, v0.4h, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
ret <4 x i16> %vshr_n
}
define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
-; CHECK: test_vshr_n_s32
-; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+; CHECK-LABEL: test_vshr_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
ret <2 x i32> %vshr_n
}
define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_s8
-; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+; CHECK-LABEL: test_vshrq_n_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.16b, v0.16b, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %vshr_n
}
define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_s16
-; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrq_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.8h, v0.8h, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %vshr_n
}
define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_s32
-; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+; CHECK-LABEL: test_vshrq_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.4s, v0.4s, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %vshr_n
}
define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_s64
-; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+; CHECK-LABEL: test_vshrq_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2d, v0.2d, #3
+; CHECK-NEXT: ret
%vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
ret <2 x i64> %vshr_n
}
define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
-; CHECK: test_vshr_n_u8
-; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_vshr_n_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8b, v0.8b, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <8 x i8> %vshr_n
}
define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
-; CHECK: test_vshr_n_u16
-; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+; CHECK-LABEL: test_vshr_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.4h, v0.4h, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
ret <4 x i16> %vshr_n
}
define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
-; CHECK: test_vshr_n_u32
-; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+; CHECK-LABEL: test_vshr_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.2s, v0.2s, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
ret <2 x i32> %vshr_n
}
define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_u8
-; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+; CHECK-LABEL: test_vshrq_n_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.16b, v0.16b, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %vshr_n
}
define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_u16
-; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrq_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.8h, v0.8h, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %vshr_n
}
define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_u32
-; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+; CHECK-LABEL: test_vshrq_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.4s, v0.4s, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %vshr_n
}
define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_u64
-; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+; CHECK-LABEL: test_vshrq_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushr v0.2d, v0.2d, #3
+; CHECK-NEXT: ret
%vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
ret <2 x i64> %vshr_n
}
define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_s8
-; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_vsra_n_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.8b, v1.8b, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%1 = add <8 x i8> %vsra_n, %a
ret <8 x i8> %1
}
define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_s16
-; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+; CHECK-LABEL: test_vsra_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.4h, v1.4h, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
%1 = add <4 x i16> %vsra_n, %a
ret <4 x i16> %1
}
define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_s32
-; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+; CHECK-LABEL: test_vsra_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.2s, v1.2s, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
%1 = add <2 x i32> %vsra_n, %a
ret <2 x i32> %1
}
define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_s8
-; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+; CHECK-LABEL: test_vsraq_n_s8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.16b, v1.16b, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%1 = add <16 x i8> %vsra_n, %a
ret <16 x i8> %1
}
define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_s16
-; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vsraq_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.8h, v1.8h, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%1 = add <8 x i16> %vsra_n, %a
ret <8 x i16> %1
}
define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_s32
-; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+; CHECK-LABEL: test_vsraq_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.4s, v1.4s, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %vsra_n, %a
ret <4 x i32> %1
}
define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_s64
-; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+; CHECK-LABEL: test_vsraq_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ssra v0.2d, v1.2d, #3
+; CHECK-NEXT: ret
%vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
%1 = add <2 x i64> %vsra_n, %a
ret <2 x i64> %1
}
define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_u8
-; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+; CHECK-LABEL: test_vsra_n_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.8b, v1.8b, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%1 = add <8 x i8> %vsra_n, %a
ret <8 x i8> %1
}
define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_u16
-; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+; CHECK-LABEL: test_vsra_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.4h, v1.4h, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
%1 = add <4 x i16> %vsra_n, %a
ret <4 x i16> %1
}
define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_u32
-; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+; CHECK-LABEL: test_vsra_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.2s, v1.2s, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
%1 = add <2 x i32> %vsra_n, %a
ret <2 x i32> %1
}
define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_u8
-; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+; CHECK-LABEL: test_vsraq_n_u8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.16b, v1.16b, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%1 = add <16 x i8> %vsra_n, %a
ret <16 x i8> %1
}
define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_u16
-; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vsraq_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.8h, v1.8h, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%1 = add <8 x i16> %vsra_n, %a
ret <8 x i16> %1
}
define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_u32
-; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+; CHECK-LABEL: test_vsraq_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.4s, v1.4s, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
%1 = add <4 x i32> %vsra_n, %a
ret <4 x i32> %1
}
define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_u64
-; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+; CHECK-LABEL: test_vsraq_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usra v0.2d, v1.2d, #3
+; CHECK-NEXT: ret
%vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
%1 = add <2 x i64> %vsra_n, %a
ret <2 x i64> %1
}
define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_s16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrn_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.8b, v0.8h, #3
+; CHECK-NEXT: ret
%1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%vshrn_n = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %vshrn_n
}
define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_s32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vshrn_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.4h, v0.4s, #9
+; CHECK-NEXT: ret
%1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
%vshrn_n = trunc <4 x i32> %1 to <4 x i16>
ret <4 x i16> %vshrn_n
}
define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_s64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vshrn_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.2s, v0.2d, #19
+; CHECK-NEXT: ret
%1 = ashr <2 x i64> %a, <i64 19, i64 19>
%vshrn_n = trunc <2 x i64> %1 to <2 x i32>
ret <2 x i32> %vshrn_n
}
define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_u16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrn_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.8b, v0.8h, #3
+; CHECK-NEXT: ret
%1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%vshrn_n = trunc <8 x i16> %1 to <8 x i8>
ret <8 x i8> %vshrn_n
}
define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_u32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vshrn_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.4h, v0.4s, #9
+; CHECK-NEXT: ret
%1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
%vshrn_n = trunc <4 x i32> %1 to <4 x i16>
ret <4 x i16> %vshrn_n
}
define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_u64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vshrn_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.2s, v0.2d, #19
+; CHECK-NEXT: ret
%1 = lshr <2 x i64> %a, <i64 19, i64 19>
%vshrn_n = trunc <2 x i64> %1 to <2 x i32>
ret <2 x i32> %vshrn_n
}
define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_s16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrn_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%vshrn_n = trunc <8 x i16> %1 to <8 x i8>
%2 = bitcast <8 x i8> %a to <1 x i64>
@@ -271,8 +343,11 @@ define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_s32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vshrn_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
%vshrn_n = trunc <4 x i32> %1 to <4 x i16>
%2 = bitcast <4 x i16> %a to <1 x i64>
@@ -283,8 +358,11 @@ define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_s64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vshrn_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%2 = ashr <2 x i64> %b, <i64 19, i64 19>
%vshrn_n = trunc <2 x i64> %2 to <2 x i32>
@@ -295,8 +373,11 @@ define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_u16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vshrn_high_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%vshrn_n = trunc <8 x i16> %1 to <8 x i8>
%2 = bitcast <8 x i8> %a to <1 x i64>
@@ -307,8 +388,11 @@ define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_u32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vshrn_high_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
%vshrn_n = trunc <4 x i32> %1 to <4 x i16>
%2 = bitcast <4 x i16> %a to <1 x i64>
@@ -319,8 +403,11 @@ define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_u64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vshrn_high_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%2 = lshr <2 x i64> %b, <i64 19, i64 19>
%vshrn_n = trunc <2 x i64> %2 to <2 x i32>
@@ -331,8 +418,11 @@ define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrun_high_n_s16
-; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqshrun_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqshrun to <1 x i64>
@@ -342,8 +432,11 @@ define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrun_high_n_s32
-; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqshrun_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqshrun to <1 x i64>
@@ -353,8 +446,11 @@ define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrun_high_n_s64
-; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqshrun_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqshrun to <1 x i64>
@@ -364,8 +460,11 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vrshrn_high_n_s16
-; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vrshrn_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vrshrn to <1 x i64>
@@ -375,8 +474,11 @@ define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vrshrn_high_n_s32
-; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vrshrn_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vrshrn to <1 x i64>
@@ -386,8 +488,11 @@ define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vrshrn_high_n_s64
-; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vrshrn_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vrshrn to <1 x i64>
@@ -397,8 +502,11 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrun_high_n_s16
-; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqrshrun_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
@@ -408,8 +516,11 @@ define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrun_high_n_s32
-; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqrshrun_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
@@ -419,8 +530,11 @@ define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrun_high_n_s64
-; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqrshrun_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
@@ -430,8 +544,11 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_s16
-; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqshrn_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqshrn to <1 x i64>
@@ -441,8 +558,11 @@ define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_s32
-; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqshrn_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqshrn to <1 x i64>
@@ -452,8 +572,11 @@ define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_s64
-; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqshrn_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqshrn to <1 x i64>
@@ -463,8 +586,11 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_u16
-; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqshrn_high_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqshrn to <1 x i64>
@@ -474,8 +600,11 @@ define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_u32
-; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqshrn_high_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqshrn to <1 x i64>
@@ -485,8 +614,11 @@ define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_u64
-; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqshrn_high_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqshrn to <1 x i64>
@@ -496,8 +628,11 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_s16
-; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqrshrn_high_n_s16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
@@ -507,8 +642,11 @@ define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_s32
-; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqrshrn_high_n_s32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
@@ -518,8 +656,11 @@ define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_s64
-; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqrshrn_high_n_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
@@ -529,8 +670,11 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
}
define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_u16
-; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+; CHECK-LABEL: test_vqrshrn_high_n_u16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #3
+; CHECK-NEXT: ret
%vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
%1 = bitcast <8 x i8> %a to <1 x i64>
%2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
@@ -540,8 +684,11 @@ define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
}
define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_u32
-; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+; CHECK-LABEL: test_vqrshrn_high_n_u32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #9
+; CHECK-NEXT: ret
%vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
%1 = bitcast <4 x i16> %a to <1 x i64>
%2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
@@ -551,8 +698,11 @@ define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
}
define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_u64
-; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+; CHECK-LABEL: test_vqrshrn_high_n_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #19
+; CHECK-NEXT: ret
%1 = bitcast <2 x i32> %a to <1 x i64>
%vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
%2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
@@ -630,29 +780,37 @@ declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_s64_f64
-; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+; CHECK-LABEL: test_vcvt_n_s64_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs d0, d0, #64
+; CHECK-NEXT: ret
%1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
ret <1 x i64> %1
}
define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_u64_f64
-; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+; CHECK-LABEL: test_vcvt_n_u64_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzu d0, d0, #64
+; CHECK-NEXT: ret
%1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
ret <1 x i64> %1
}
define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_s64
-; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+; CHECK-LABEL: test_vcvt_n_f64_s64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf d0, d0, #64
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
ret <1 x double> %1
}
define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_u64
-; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+; CHECK-LABEL: test_vcvt_n_f64_u64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf d0, d0, #64
+; CHECK-NEXT: ret
%1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
ret <1 x double> %1
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
index 87f3956eb20fa..96d5b5ef85415 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
@@ -1,8 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_high_s8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %shuffle.i
@@ -10,7 +14,10 @@ entry:
define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_high_s16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i16> %shuffle.i
@@ -18,7 +25,10 @@ entry:
define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
; CHECK-LABEL: test_vget_high_s32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
ret <2 x i32> %shuffle.i
@@ -26,7 +36,10 @@ entry:
define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_high_s64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
ret <1 x i64> %shuffle.i
@@ -34,7 +47,10 @@ entry:
define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_high_u8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %shuffle.i
@@ -42,7 +58,10 @@ entry:
define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_high_u16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i16> %shuffle.i
@@ -50,7 +69,10 @@ entry:
define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
; CHECK-LABEL: test_vget_high_u32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
ret <2 x i32> %shuffle.i
@@ -58,7 +80,10 @@ entry:
define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_high_u64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
ret <1 x i64> %shuffle.i
@@ -66,7 +91,10 @@ entry:
define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_high_p64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
ret <1 x i64> %shuffle.i
@@ -74,7 +102,10 @@ entry:
define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_high_f16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i16> %shuffle.i
@@ -82,7 +113,10 @@ entry:
define <2 x float> @test_vget_high_f32(<4 x float> %a) {
; CHECK-LABEL: test_vget_high_f32:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
ret <2 x float> %shuffle.i
@@ -90,7 +124,10 @@ entry:
define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_high_p8:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %shuffle.i
@@ -98,7 +135,10 @@ entry:
define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_high_p16:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i16> %shuffle.i
@@ -106,7 +146,10 @@ entry:
define <1 x double> @test_vget_high_f64(<2 x double> %a) {
; CHECK-LABEL: test_vget_high_f64:
-; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
ret <1 x double> %shuffle.i
@@ -114,7 +157,9 @@ entry:
define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_low_s8:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle.i
@@ -122,7 +167,9 @@ entry:
define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_low_s16:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
@@ -130,7 +177,9 @@ entry:
define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
; CHECK-LABEL: test_vget_low_s32:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
@@ -138,7 +187,9 @@ entry:
define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_low_s64:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
@@ -146,7 +197,9 @@ entry:
define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_low_u8:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle.i
@@ -154,7 +207,9 @@ entry:
define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_low_u16:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
@@ -162,7 +217,9 @@ entry:
define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
; CHECK-LABEL: test_vget_low_u32:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %shuffle.i
@@ -170,7 +227,9 @@ entry:
define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_low_u64:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
@@ -178,7 +237,9 @@ entry:
define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
; CHECK-LABEL: test_vget_low_p64:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %shuffle.i
@@ -186,7 +247,9 @@ entry:
define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_low_f16:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
@@ -194,7 +257,9 @@ entry:
define <2 x float> @test_vget_low_f32(<4 x float> %a) {
; CHECK-LABEL: test_vget_low_f32:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
ret <2 x float> %shuffle.i
@@ -202,7 +267,9 @@ entry:
define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
; CHECK-LABEL: test_vget_low_p8:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %shuffle.i
@@ -210,7 +277,9 @@ entry:
define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
; CHECK-LABEL: test_vget_low_p16:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %shuffle.i
@@ -218,7 +287,9 @@ entry:
define <1 x double> @test_vget_low_f64(<2 x double> %a) {
; CHECK-LABEL: test_vget_low_f64:
-; CHECK: ret
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
entry:
%shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
ret <1 x double> %shuffle.i
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
index c739e9dcd906d..18392a26d846d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s
; This is the analogue of AArch64's file of the same name. It's mostly testing
@@ -6,8 +7,15 @@
define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
; CHECK-LABEL: test_sext_extr_cmp_0:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: cset
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w8, ge
+; CHECK-NEXT: sbfx x0, x8, #0, #1
+; CHECK-NEXT: ret
%1 = icmp sge <1 x i64> %v1, %v2
%2 = extractelement <1 x i1> %1, i32 0
%vget_lane = sext i1 %2 to i64
@@ -16,7 +24,11 @@ define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
; CHECK-LABEL: test_sext_extr_cmp_1:
-; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: sbfx x0, x8, #0, #1
+; CHECK-NEXT: ret
%1 = fcmp oeq <1 x double> %v1, %v2
%2 = extractelement <1 x i1> %1, i32 0
%vget_lane = sext i1 %2 to i64
@@ -25,8 +37,10 @@ define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_0:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq d0, d0, d1
+; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b
+; CHECK-NEXT: ret
%1 = icmp eq <1 x i64> %v1, %v2
%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
ret <1 x i64> %res
@@ -34,8 +48,10 @@ define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3
define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_1:
-; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmeq d0, d0, d1
+; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b
+; CHECK-NEXT: ret
%1 = fcmp oeq <1 x double> %v1, %v2
%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
ret <1 x i64> %res
@@ -43,8 +59,10 @@ define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i6
define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
; CHECK-LABEL: test_select_v1i1_2:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq d0, d0, d1
+; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b
+; CHECK-NEXT: ret
%1 = icmp eq <1 x i64> %v1, %v2
%res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
ret <1 x double> %res
@@ -55,10 +73,12 @@ define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x doubl
; scalar setcc. If anything, it exposes more ILP.
define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_3:
-; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
-; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, x1
+; CHECK-NEXT: fmov d2, x0
+; CHECK-NEXT: cmeq d1, d2, d1
+; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tst = icmp eq i64 %lhs, %rhs
%evil = insertelement <1 x i1> undef, i1 %tst, i32 0
%res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
@@ -67,7 +87,14 @@ define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
; CHECK-LABEL: test_br_extr_cmp:
-; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: // %bb.0: // %common.ret
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
%1 = icmp eq <1 x i64> %v1, %v2
%2 = extractelement <1 x i1> %1, i32 0
br i1 %2, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
index 6fa70561f18e7..5a7cc89e1554e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
; FIXME: We should not generate ld/st for such register spill/fill, because the
@@ -6,9 +7,27 @@
; then we can delete it.
define i32 @spill.DPairReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.DPairReg:
-; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0]
+; CHECK-NEXT: cbz w1, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d }, [x8] // 32-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d }, [x8] // 32-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: ret
entry:
%vld = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -26,9 +45,27 @@ if.end:
define i16 @spill.DTripleReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.DTripleReg:
-; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld3 { v0.4h, v1.4h, v2.4h }, [x0]
+; CHECK-NEXT: cbz w1, .LBB1_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: umov w0, v0.h[1]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d, v2.2d }, [x8] // 48-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d, v2.2d }, [x8] // 48-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: umov w0, v0.h[1]
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -46,9 +83,27 @@ if.end:
define i16 @spill.DQuadReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.DQuadReg:
-; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+; CHECK-NEXT: cbz w1, .LBB2_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB2_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x8] // 64-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x8] // 64-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -66,9 +121,27 @@ if.end:
define i32 @spill.QPairReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.QPairReg:
-; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0]
+; CHECK-NEXT: cbz w1, .LBB3_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB3_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d }, [x8] // 32-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d }, [x8] // 32-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: ret
entry:
%vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -86,9 +159,27 @@ if.end:
define float @spill.QTripleReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.QTripleReg:
-; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; CHECK-NEXT: cbz w1, .LBB4_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB4_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d, v2.2d }, [x8] // 48-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d, v2.2d }, [x8] // 48-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: ret
entry:
%vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -106,9 +197,27 @@ if.end:
define i8 @spill.QQuadReg(ptr %arg1, i32 %arg2) {
; CHECK-LABEL: spill.QQuadReg:
-; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
-; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
-; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+; CHECK-NEXT: cbz w1, .LBB5_2
+; CHECK-NEXT: // %bb.1: // %if.end
+; CHECK-NEXT: umov w0, v0.b[1]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB5_2: // %if.then
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x8] // 64-byte Folded Spill
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x8] // 64-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: umov w0, v0.b[1]
+; CHECK-NEXT: ret
entry:
%vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %arg1)
%cmp = icmp eq i32 %arg2, 0
@@ -139,6 +248,18 @@ declare void @foo()
; then we can delete it.
; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
define <8 x i16> @test_2xFPR128Lo(i64 %got, ptr %ptr, <1 x i64> %a) {
+; CHECK-LABEL: test_2xFPR128Lo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: st2 { v0.d, v1.d }[0], [x1]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
tail call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, ptr %ptr)
tail call void @foo()
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
@@ -150,6 +271,19 @@ define <8 x i16> @test_2xFPR128Lo(i64 %got, ptr %ptr, <1 x i64> %a) {
; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
define <8 x i16> @test_3xFPR128Lo(i64 %got, ptr %ptr, <1 x i64> %a) {
+; CHECK-LABEL: test_3xFPR128Lo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: st3 { v0.d, v1.d, v2.d }[0], [x1]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
tail call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, ptr %ptr)
tail call void @foo()
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
@@ -161,6 +295,20 @@ define <8 x i16> @test_3xFPR128Lo(i64 %got, ptr %ptr, <1 x i64> %a) {
; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
define <8 x i16> @test_4xFPR128Lo(i64 %got, ptr %ptr, <1 x i64> %a) {
+; CHECK-LABEL: test_4xFPR128Lo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: mov v3.16b, v0.16b
+; CHECK-NEXT: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x1]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
tail call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, ptr %ptr)
tail call void @foo()
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
index 3e6d6418db7b4..a02a8a62645bb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -4,8 +4,8 @@
define void @test(ptr %p1, ptr %p2) {
; CHECK-LABEL: test:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3
-; CHECK-NEXT: mov w9, #1
+; CHECK-NEXT: mov w8, #3 // =0x3
+; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: str w9, [x1]
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list