[llvm] ac02168 - [ARM] Clean up neon_vabd.ll, vaba.ll and vabd.ll tests a bit. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 06:31:20 PDT 2024
Author: David Green
Date: 2024-06-07T14:31:15+01:00
New Revision: ac02168990aa8429898d0c59fec7a78526638c5c
URL: https://github.com/llvm/llvm-project/commit/ac02168990aa8429898d0c59fec7a78526638c5c
DIFF: https://github.com/llvm/llvm-project/commit/ac02168990aa8429898d0c59fec7a78526638c5c.diff
LOG: [ARM] Clean up neon_vabd.ll, vaba.ll and vabd.ll tests a bit. NFC
Change the target triple to remove some unnecessary instructions.
Added:
Modified:
llvm/test/CodeGen/ARM/neon_vabd.ll
llvm/test/CodeGen/ARM/vaba.ll
llvm/test/CodeGen/ARM/vabd.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll
index 907e11c0cf19d..8695c3e5f3db9 100644
--- a/llvm/test/CodeGen/ARM/neon_vabd.ll
+++ b/llvm/test/CodeGen/ARM/neon_vabd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
;
; SABD
@@ -8,11 +8,8 @@
define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: sabd_8b:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.s8 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <8 x i8> %a to <8 x i16>
%b.sext = sext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a.sext, %b.sext
@@ -24,14 +21,8 @@ define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: sabd_16b:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.sext = sext <16 x i8> %a to <16 x i16>
%b.sext = sext <16 x i8> %b to <16 x i16>
%sub = sub <16 x i16> %a.sext, %b.sext
@@ -43,11 +34,8 @@ define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: sabd_4h:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.s16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <4 x i16> %a to <4 x i32>
%b.sext = sext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a.sext, %b.sext
@@ -59,15 +47,12 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: sabd_4h_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vshl.i16 d16, d16, #8
-; CHECK-NEXT: vshl.i16 d17, d17, #8
+; CHECK-NEXT: vshl.i16 d16, d1, #8
+; CHECK-NEXT: vshl.i16 d17, d0, #8
; CHECK-NEXT: vshr.s16 d16, d16, #8
; CHECK-NEXT: vshr.s16 d17, d17, #8
-; CHECK-NEXT: vabd.s16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 d0, d17, d16
+; CHECK-NEXT: bx lr
%a.sext = sext <4 x i8> %a to <4 x i16>
%b.sext = sext <4 x i8> %b to <4 x i16>
%sub = sub <4 x i16> %a.sext, %b.sext
@@ -78,14 +63,8 @@ define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: sabd_8h:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s16 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.sext = sext <8 x i16> %a to <8 x i32>
%b.sext = sext <8 x i16> %b to <8 x i32>
%sub = sub <8 x i32> %a.sext, %b.sext
@@ -97,12 +76,8 @@ define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: sabd_8h_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.s8 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s8 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <8 x i8> %a to <8 x i16>
%b.sext = sext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a.sext, %b.sext
@@ -113,11 +88,8 @@ define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: sabd_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.s32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <2 x i32> %a to <2 x i64>
%b.sext = sext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a.sext, %b.sext
@@ -129,15 +101,12 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
; CHECK-LABEL: sabd_2s_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vshl.i32 d16, d16, #16
-; CHECK-NEXT: vshl.i32 d17, d17, #16
+; CHECK-NEXT: vshl.i32 d16, d1, #16
+; CHECK-NEXT: vshl.i32 d17, d0, #16
; CHECK-NEXT: vshr.s32 d16, d16, #16
; CHECK-NEXT: vshr.s32 d17, d17, #16
-; CHECK-NEXT: vabd.s32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 d0, d17, d16
+; CHECK-NEXT: bx lr
%a.sext = sext <2 x i16> %a to <2 x i32>
%b.sext = sext <2 x i16> %b to <2 x i32>
%sub = sub <2 x i32> %a.sext, %b.sext
@@ -148,14 +117,8 @@ define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: sabd_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s32 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.sext = sext <4 x i32> %a to <4 x i64>
%b.sext = sext <4 x i32> %b to <4 x i64>
%sub = sub <4 x i64> %a.sext, %b.sext
@@ -167,12 +130,8 @@ define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: sabd_4s_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.s16 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s16 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <4 x i16> %a to <4 x i32>
%b.sext = sext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a.sext, %b.sext
@@ -183,38 +142,35 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: sabd_2d:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: add r12, sp, #24
-; CHECK-NEXT: asr r6, r3, #31
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: vmov r12, lr, d17
-; CHECK-NEXT: vmov r7, r5, d16
-; CHECK-NEXT: subs r2, r2, r12
-; CHECK-NEXT: sbcs r3, r3, lr
-; CHECK-NEXT: sbcs r4, r6, lr, asr #31
-; CHECK-NEXT: sbc r6, r6, lr, asr #31
-; CHECK-NEXT: eor r2, r2, r6, asr #31
-; CHECK-NEXT: eor r3, r3, r6, asr #31
-; CHECK-NEXT: subs r2, r2, r6, asr #31
-; CHECK-NEXT: sbc r3, r3, r6, asr #31
-; CHECK-NEXT: subs r0, r0, r7
-; CHECK-NEXT: asr r6, r1, #31
-; CHECK-NEXT: sbcs r1, r1, r5
-; CHECK-NEXT: sbcs r7, r6, r5, asr #31
-; CHECK-NEXT: vmov.32 d17[0], r2
-; CHECK-NEXT: sbc r7, r6, r5, asr #31
-; CHECK-NEXT: eor r0, r0, r7, asr #31
-; CHECK-NEXT: subs r0, r0, r7, asr #31
-; CHECK-NEXT: vmov.32 d16[0], r0
-; CHECK-NEXT: eor r0, r1, r7, asr #31
-; CHECK-NEXT: sbc r0, r0, r7, asr #31
-; CHECK-NEXT: vmov.32 d17[1], r3
-; CHECK-NEXT: vmov.32 d16[1], r0
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: vmov lr, r1, d1
+; CHECK-NEXT: vmov r2, r3, d3
+; CHECK-NEXT: vmov r12, r0, d0
+; CHECK-NEXT: subs lr, lr, r2
+; CHECK-NEXT: asr r4, r1, #31
+; CHECK-NEXT: sbcs r1, r1, r3
+; CHECK-NEXT: sbcs r2, r4, r3, asr #31
+; CHECK-NEXT: vmov r2, r5, d2
+; CHECK-NEXT: sbc r3, r4, r3, asr #31
+; CHECK-NEXT: eor r4, lr, r3, asr #31
+; CHECK-NEXT: eor r1, r1, r3, asr #31
+; CHECK-NEXT: subs r4, r4, r3, asr #31
+; CHECK-NEXT: sbc lr, r1, r3, asr #31
+; CHECK-NEXT: asr r3, r0, #31
+; CHECK-NEXT: vmov.32 d1[0], r4
+; CHECK-NEXT: subs r2, r12, r2
+; CHECK-NEXT: sbcs r0, r0, r5
+; CHECK-NEXT: sbcs r1, r3, r5, asr #31
+; CHECK-NEXT: sbc r1, r3, r5, asr #31
+; CHECK-NEXT: eor r2, r2, r1, asr #31
+; CHECK-NEXT: eor r0, r0, r1, asr #31
+; CHECK-NEXT: subs r2, r2, r1, asr #31
+; CHECK-NEXT: sbc r0, r0, r1, asr #31
+; CHECK-NEXT: vmov.32 d0[0], r2
+; CHECK-NEXT: vmov.32 d1[1], lr
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: pop {r4, r5, r11, pc}
%a.sext = sext <2 x i64> %a to <2 x i128>
%b.sext = sext <2 x i64> %b to <2 x i128>
%sub = sub <2 x i128> %a.sext, %b.sext
@@ -226,12 +182,8 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: sabd_2d_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.s32 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s32 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.sext = sext <2 x i32> %a to <2 x i64>
%b.sext = sext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a.sext, %b.sext
@@ -246,11 +198,8 @@ define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: uabd_8b:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.u8 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <8 x i8> %a to <8 x i16>
%b.zext = zext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a.zext, %b.zext
@@ -262,14 +211,8 @@ define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: uabd_16b:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.zext = zext <16 x i8> %a to <16 x i16>
%b.zext = zext <16 x i8> %b to <16 x i16>
%sub = sub <16 x i16> %a.zext, %b.zext
@@ -281,11 +224,8 @@ define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: uabd_4h:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.u16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u16 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <4 x i16> %a to <4 x i32>
%b.zext = zext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a.zext, %b.zext
@@ -297,13 +237,10 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: uabd_4h_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbic.i16 d16, #0xff00
-; CHECK-NEXT: vbic.i16 d17, #0xff00
-; CHECK-NEXT: vabd.u16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vbic.i16 d1, #0xff00
+; CHECK-NEXT: vbic.i16 d0, #0xff00
+; CHECK-NEXT: vabd.u16 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <4 x i8> %a to <4 x i16>
%b.zext = zext <4 x i8> %b to <4 x i16>
%sub = sub <4 x i16> %a.zext, %b.zext
@@ -314,14 +251,8 @@ define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: uabd_8h:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u16 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.zext = zext <8 x i16> %a to <8 x i32>
%b.zext = zext <8 x i16> %b to <8 x i32>
%sub = sub <8 x i32> %a.zext, %b.zext
@@ -333,12 +264,8 @@ define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: uabd_8h_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.u8 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u8 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <8 x i8> %a to <8 x i16>
%b.zext = zext <8 x i8> %b to <8 x i16>
%sub = sub <8 x i16> %a.zext, %b.zext
@@ -349,11 +276,8 @@ define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: uabd_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabd.u32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u32 d0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <2 x i32> %a to <2 x i64>
%b.zext = zext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a.zext, %b.zext
@@ -366,13 +290,10 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
; CHECK-LABEL: uabd_2s_promoted_ops:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d16, #0xffff
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vand d17, d17, d16
-; CHECK-NEXT: vand d16, d18, d16
-; CHECK-NEXT: vabd.u32 d16, d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vand d17, d1, d16
+; CHECK-NEXT: vand d16, d0, d16
+; CHECK-NEXT: vabd.u32 d0, d16, d17
+; CHECK-NEXT: bx lr
%a.zext = zext <2 x i16> %a to <2 x i32>
%b.zext = zext <2 x i16> %b to <2 x i32>
%sub = sub <2 x i32> %a.zext, %b.zext
@@ -383,14 +304,8 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: uabd_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u32 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
%a.zext = zext <4 x i32> %a to <4 x i64>
%b.zext = zext <4 x i32> %b to <4 x i64>
%sub = sub <4 x i64> %a.zext, %b.zext
@@ -402,12 +317,8 @@ define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: uabd_4s_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.u16 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u16 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <4 x i16> %a to <4 x i32>
%b.zext = zext <4 x i16> %b to <4 x i32>
%sub = sub <4 x i32> %a.zext, %b.zext
@@ -418,37 +329,34 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: uabd_2d:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: add r12, sp, #24
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: vmov r12, lr, d17
-; CHECK-NEXT: vmov r4, r7, d16
-; CHECK-NEXT: subs r2, r2, r12
-; CHECK-NEXT: sbcs r3, r3, lr
-; CHECK-NEXT: sbcs r5, r6, #0
-; CHECK-NEXT: sbc r5, r6, #0
-; CHECK-NEXT: eor r2, r2, r5, asr #31
-; CHECK-NEXT: eor r3, r3, r5, asr #31
-; CHECK-NEXT: subs r2, r2, r5, asr #31
-; CHECK-NEXT: sbc r3, r3, r5, asr #31
-; CHECK-NEXT: subs r0, r0, r4
-; CHECK-NEXT: sbcs r1, r1, r7
-; CHECK-NEXT: vmov.32 d17[0], r2
-; CHECK-NEXT: sbcs r7, r6, #0
-; CHECK-NEXT: sbc r7, r6, #0
-; CHECK-NEXT: eor r0, r0, r7, asr #31
-; CHECK-NEXT: subs r0, r0, r7, asr #31
-; CHECK-NEXT: vmov.32 d16[0], r0
-; CHECK-NEXT: eor r0, r1, r7, asr #31
-; CHECK-NEXT: sbc r0, r0, r7, asr #31
-; CHECK-NEXT: vmov.32 d17[1], r3
-; CHECK-NEXT: vmov.32 d16[1], r0
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: vmov r0, r12, d3
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: vmov lr, r6, d2
+; CHECK-NEXT: vmov r4, r5, d0
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbcs r2, r3, r12
+; CHECK-NEXT: sbcs r3, r1, #0
+; CHECK-NEXT: sbc r3, r1, #0
+; CHECK-NEXT: eor r0, r0, r3, asr #31
+; CHECK-NEXT: eor r2, r2, r3, asr #31
+; CHECK-NEXT: subs r0, r0, r3, asr #31
+; CHECK-NEXT: sbc r2, r2, r3, asr #31
+; CHECK-NEXT: subs r3, r4, lr
+; CHECK-NEXT: sbcs r6, r5, r6
+; CHECK-NEXT: vmov.32 d1[0], r0
+; CHECK-NEXT: sbcs r5, r1, #0
+; CHECK-NEXT: sbc r1, r1, #0
+; CHECK-NEXT: eor r3, r3, r1, asr #31
+; CHECK-NEXT: subs r0, r3, r1, asr #31
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: eor r0, r6, r1, asr #31
+; CHECK-NEXT: sbc r0, r0, r1, asr #31
+; CHECK-NEXT: vmov.32 d1[1], r2
+; CHECK-NEXT: vmov.32 d0[1], r0
+; CHECK-NEXT: pop {r4, r5, r6, pc}
%a.zext = zext <2 x i64> %a to <2 x i128>
%b.zext = zext <2 x i64> %b to <2 x i128>
%sub = sub <2 x i128> %a.zext, %b.zext
@@ -460,12 +368,8 @@ define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: uabd_2d_promoted_ops:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d16, r2, r3
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vabdl.u32 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u32 q0, d0, d1
+; CHECK-NEXT: bx lr
%a.zext = zext <2 x i32> %a to <2 x i64>
%b.zext = zext <2 x i32> %b to <2 x i64>
%sub = sub <2 x i64> %a.zext, %b.zext
@@ -476,15 +380,9 @@ define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: uabd_v16i8_nuw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vsub.i8 q8, q8, q9
-; CHECK-NEXT: vabs.s8 q8, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vsub.i8 q8, q0, q1
+; CHECK-NEXT: vabs.s8 q0, q8
+; CHECK-NEXT: bx lr
%sub = sub nuw <16 x i8> %a, %b
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
ret <16 x i8> %abs
@@ -493,15 +391,9 @@ define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: uabd_v8i16_nuw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vsub.i16 q8, q8, q9
-; CHECK-NEXT: vabs.s16 q8, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vsub.i16 q8, q0, q1
+; CHECK-NEXT: vabs.s16 q0, q8
+; CHECK-NEXT: bx lr
%sub = sub nuw <8 x i16> %a, %b
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
ret <8 x i16> %abs
@@ -510,15 +402,9 @@ define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: uabd_v4i32_nuw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vsub.i32 q8, q8, q9
-; CHECK-NEXT: vabs.s32 q8, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vsub.i32 q8, q0, q1
+; CHECK-NEXT: vabs.s32 q0, q8
+; CHECK-NEXT: bx lr
%sub = sub nuw <4 x i32> %a, %b
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
ret <4 x i32> %abs
@@ -527,17 +413,11 @@ define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: uabd_v2i64_nuw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: vshr.s64 q9, q8, #63
; CHECK-NEXT: veor q8, q8, q9
-; CHECK-NEXT: vsub.i64 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vsub.i64 q0, q8, q9
+; CHECK-NEXT: bx lr
%sub = sub nuw <2 x i64> %a, %b
%abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
ret <2 x i64> %abs
@@ -546,14 +426,8 @@ define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: sabd_v16i8_nsw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 q0, q0, q1
+; CHECK-NEXT: bx lr
%sub = sub nsw <16 x i8> %a, %b
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
ret <16 x i8> %abs
@@ -562,14 +436,8 @@ define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: sabd_v8i16_nsw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s16 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
%sub = sub nsw <8 x i16> %a, %b
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
ret <8 x i16> %abs
@@ -578,14 +446,8 @@ define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: sabd_v4i32_nsw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s32 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 q0, q0, q1
+; CHECK-NEXT: bx lr
%sub = sub nsw <4 x i32> %a, %b
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
ret <4 x i32> %abs
@@ -594,17 +456,11 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: sabd_v2i64_nsw:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vsub.i64 q8, q8, q9
+; CHECK-NEXT: vsub.i64 q8, q0, q1
; CHECK-NEXT: vshr.s64 q9, q8, #63
; CHECK-NEXT: veor q8, q8, q9
-; CHECK-NEXT: vsub.i64 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vsub.i64 q0, q8, q9
+; CHECK-NEXT: bx lr
%sub = sub nsw <2 x i64> %a, %b
%abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
ret <2 x i64> %abs
@@ -613,14 +469,8 @@ define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: smaxmin_v16i8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
%sub = sub <16 x i8> %a, %b
@@ -630,14 +480,8 @@ define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
; CHECK-LABEL: smaxmin_v8i16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s16 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
%b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
%sub = sub <8 x i16> %a, %b
@@ -647,14 +491,8 @@ define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
; CHECK-LABEL: smaxmin_v4i32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.s32 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
%b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
%sub = sub <4 x i32> %a, %b
@@ -666,57 +504,54 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: add r6, sp, #24
-; CHECK-NEXT: mov r8, #0
-; CHECK-NEXT: vld1.64 {d18, d19}, [r6]
-; CHECK-NEXT: vmov r7, r12, d19
-; CHECK-NEXT: vmov r4, lr, d18
-; CHECK-NEXT: subs r5, r2, r7
-; CHECK-NEXT: sbcs r5, r3, r12
-; CHECK-NEXT: mov r6, r7
-; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movlt r5, #1
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r6, r2
-; CHECK-NEXT: mov r5, r12
-; CHECK-NEXT: vmov.32 d17[0], r6
-; CHECK-NEXT: movne r5, r3
-; CHECK-NEXT: mov r6, r4
-; CHECK-NEXT: vmov.32 d17[1], r5
-; CHECK-NEXT: subs r5, r4, r0
-; CHECK-NEXT: sbcs r5, lr, r1
+; CHECK-NEXT: vmov r1, r0, d3
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: vmov r4, lr, d1
+; CHECK-NEXT: vmov r6, r8, d0
+; CHECK-NEXT: subs r2, r4, r1
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: sbcs r2, lr, r0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mov r2, r1
+; CHECK-NEXT: movne r3, lr
+; CHECK-NEXT: movne r2, r4
+; CHECK-NEXT: vmov.32 d17[0], r2
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov r2, r3, d2
+; CHECK-NEXT: subs r5, r2, r6
+; CHECK-NEXT: sbcs r5, r3, r8
+; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: mov r5, #0
-; CHECK-NEXT: movlt r5, #1
+; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: movne r6, r0
-; CHECK-NEXT: vmov.32 d18[0], r6
-; CHECK-NEXT: subs r6, r7, r2
-; CHECK-NEXT: sbcs r6, r12, r3
-; CHECK-NEXT: mov r6, #0
-; CHECK-NEXT: movlt r6, #1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: movne r7, r2
-; CHECK-NEXT: subs r2, r0, r4
-; CHECK-NEXT: sbcs r2, r1, lr
-; CHECK-NEXT: vmov.32 d19[0], r7
-; CHECK-NEXT: movlt r8, #1
-; CHECK-NEXT: cmp r8, #0
-; CHECK-NEXT: movne r4, r0
-; CHECK-NEXT: mov r0, lr
-; CHECK-NEXT: vmov.32 d16[0], r4
-; CHECK-NEXT: movne r0, r1
-; CHECK-NEXT: cmp r6, #0
-; CHECK-NEXT: movne r12, r3
+; CHECK-NEXT: movne r7, r6
+; CHECK-NEXT: vmov.32 d18[0], r7
+; CHECK-NEXT: subs r7, r1, r4
+; CHECK-NEXT: sbcs r7, r0, lr
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r1, r4
+; CHECK-NEXT: vmov.32 d19[0], r1
+; CHECK-NEXT: subs r1, r6, r2
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r12, #1
+; CHECK-NEXT: cmp r12, #0
+; CHECK-NEXT: movne r2, r6
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: vmov.32 d16[0], r2
+; CHECK-NEXT: movne r1, r8
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r0, lr
; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: vmov.32 d16[1], r0
-; CHECK-NEXT: movne lr, r1
-; CHECK-NEXT: vmov.32 d19[1], r12
-; CHECK-NEXT: vmov.32 d18[1], lr
-; CHECK-NEXT: vsub.i64 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vmov.32 d16[1], r1
+; CHECK-NEXT: movne r3, r8
+; CHECK-NEXT: vmov.32 d19[1], r0
+; CHECK-NEXT: vmov.32 d18[1], r3
+; CHECK-NEXT: vsub.i64 q0, q9, q8
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
%a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
%b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
%sub = sub <2 x i64> %a, %b
@@ -726,14 +561,8 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: umaxmin_v16i8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
%sub = sub <16 x i8> %a, %b
@@ -743,14 +572,8 @@ define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
; CHECK-LABEL: umaxmin_v8i16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u16 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u16 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
%b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
%sub = sub <8 x i16> %a, %b
@@ -760,14 +583,8 @@ define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
; CHECK-LABEL: umaxmin_v4i32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u32 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u32 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
%b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
%sub = sub <4 x i32> %a, %b
@@ -777,18 +594,12 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
; CHECK-LABEL: umaxmin_v2i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vqsub.u64 q10, q8, q9
-; CHECK-NEXT: vqsub.u64 q9, q9, q8
-; CHECK-NEXT: vsub.i64 q10, q10, q8
-; CHECK-NEXT: vadd.i64 q8, q8, q9
-; CHECK-NEXT: vadd.i64 q8, q8, q10
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vqsub.u64 q8, q0, q1
+; CHECK-NEXT: vqsub.u64 q9, q1, q0
+; CHECK-NEXT: vsub.i64 q8, q8, q0
+; CHECK-NEXT: vadd.i64 q9, q0, q9
+; CHECK-NEXT: vadd.i64 q0, q9, q8
+; CHECK-NEXT: bx lr
%a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
%b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
%sub = sub <2 x i64> %a, %b
@@ -798,14 +609,8 @@ define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: umaxmin_v16i8_com1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov d17, r2, r3
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vabd.u8 q8, q8, q9
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 q0, q0, q1
+; CHECK-NEXT: bx lr
%a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
%sub = sub <16 x i8> %a, %b
diff --git a/llvm/test/CodeGen/ARM/vaba.ll b/llvm/test/CodeGen/ARM/vaba.ll
index 14419a345d82b..090eaca96b26f 100644
--- a/llvm/test/CodeGen/ARM/vaba.ll
+++ b/llvm/test/CodeGen/ARM/vaba.ll
@@ -1,15 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-LABEL: vabas8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.s8 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.s8 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -23,10 +22,9 @@ define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.s16 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.s16 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -40,10 +38,9 @@ define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.s32 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.s32 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -57,10 +54,9 @@ define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.u8 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.u8 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -74,10 +70,9 @@ define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.u16 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.u16 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -91,10 +86,9 @@ define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d18, [r0]
-; CHECK-NEXT: vaba.u32 d18, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vldr d0, [r0]
+; CHECK-NEXT: vaba.u32 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -108,11 +102,9 @@ define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.s8 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.s8 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -126,11 +118,9 @@ define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.s16 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.s16 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -144,11 +134,9 @@ define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.s32 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.s32 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -162,11 +150,9 @@ define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.u8 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.u8 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = load <16 x i8>, ptr %C
@@ -180,11 +166,9 @@ define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.u16 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.u16 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = load <8 x i16>, ptr %C
@@ -198,11 +182,9 @@ define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT: vaba.u32 q10, q9, q8
-; CHECK-NEXT: vmov r0, r1, d20
-; CHECK-NEXT: vmov r2, r3, d21
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vaba.u32 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = load <4 x i32>, ptr %C
@@ -232,11 +214,9 @@ define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.s8 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.s8 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -251,11 +231,9 @@ define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.s16 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.s16 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -270,11 +248,9 @@ define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.s32 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.s32 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
@@ -289,11 +265,9 @@ define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.u8 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.u8 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = load <8 x i8>, ptr %C
@@ -308,11 +282,9 @@ define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.u16 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.u16 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = load <4 x i16>, ptr %C
@@ -327,11 +299,9 @@ define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r2]
; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabal.u32 q9, d17, d16
-; CHECK-NEXT: vmov r0, r1, d18
-; CHECK-NEXT: vmov r2, r3, d19
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
+; CHECK-NEXT: vabal.u32 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = load <2 x i32>, ptr %C
diff --git a/llvm/test/CodeGen/ARM/vabd.ll b/llvm/test/CodeGen/ARM/vabd.ll
index 4184e9275a25b..398f7963535cd 100644
--- a/llvm/test/CodeGen/ARM/vabd.ll
+++ b/llvm/test/CodeGen/ARM/vabd.ll
@@ -1,14 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vabds8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.s8 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -20,9 +19,8 @@ define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.s16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -34,9 +32,8 @@ define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.s32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -48,9 +45,8 @@ define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.u8 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -62,9 +58,8 @@ define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.u16 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u16 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -76,9 +71,8 @@ define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.u32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u32 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -90,9 +84,8 @@ define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabd.f32 d16, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.f32 d0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x float>, ptr %A
%tmp2 = load <2 x float>, ptr %B
%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
@@ -104,10 +97,8 @@ define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.s8 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s8 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -119,10 +110,8 @@ define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.s16 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s16 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -134,10 +123,8 @@ define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.s32 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.s32 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -149,10 +136,8 @@ define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.u8 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u8 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <16 x i8>, ptr %A
%tmp2 = load <16 x i8>, ptr %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@@ -164,10 +149,8 @@ define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.u16 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u16 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@@ -179,10 +162,8 @@ define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.u32 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.u32 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@@ -194,10 +175,8 @@ define <4 x float> @vabdQf32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vabd.f32 q8, q9, q8
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabd.f32 q0, q9, q8
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x float>, ptr %A
%tmp2 = load <4 x float>, ptr %B
%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
@@ -229,10 +208,8 @@ define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.s8 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s8 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -245,10 +222,8 @@ define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.s16 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s16 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -261,10 +236,8 @@ define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.s32 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.s32 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -277,10 +250,8 @@ define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.u8 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u8 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = load <8 x i8>, ptr %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -293,10 +264,8 @@ define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.u16 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u16 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = load <4 x i16>, ptr %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -309,10 +278,8 @@ define <2 x i64> @vabdlu32(ptr %A, ptr %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
-; CHECK-NEXT: vabdl.u32 q8, d17, d16
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vabdl.u32 q0, d17, d16
+; CHECK-NEXT: bx lr
%tmp1 = load <2 x i32>, ptr %A
%tmp2 = load <2 x i32>, ptr %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
More information about the llvm-commits
mailing list