[llvm] d81d608 - [AArch64] Add ABD combine tests. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 4 11:18:55 PST 2023
Author: David Green
Date: 2023-02-04T19:18:50Z
New Revision: d81d60876ac6586a79e567062f5d1cc3ac68772b
URL: https://github.com/llvm/llvm-project/commit/d81d60876ac6586a79e567062f5d1cc3ac68772b
DIFF: https://github.com/llvm/llvm-project/commit/d81d60876ac6586a79e567062f5d1cc3ac68772b.diff
LOG: [AArch64] Add ABD combine tests. NFC
Added:
llvm/test/CodeGen/AArch64/abd-combine.ll
Modified:
llvm/test/CodeGen/Thumb2/mve-vabdus.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
new file mode 100644
index 0000000000000..e2ed700bf3636
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -0,0 +1,461 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_base:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+ %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const_lhs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: usubw2 v2.4s, v1.4s, v0.8h
+; CHECK-NEXT: usubw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT: neg v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_both() {
+; CHECK-LABEL: abdu_const_both:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #2
+; CHECK-NEXT: ret
+ %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_bothhigh() {
+; CHECK-LABEL: abdu_const_bothhigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
+ %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_undef(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+ %zextsrc2 = zext <8 x i16> undef to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+
+
+define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_i_base:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #1
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const_lhs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #1
+; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_both() {
+; CHECK-LABEL: abdu_i_const_both:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: movi v1.8h, #3
+; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_bothhigh() {
+; CHECK-LABEL: abdu_i_const_bothhigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-NEXT: mvni v1.8h, #1
+; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_onehigh() {
+; CHECK-LABEL: abdu_i_const_onehigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32766
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_oneneg() {
+; CHECK-LABEL: abdu_i_const_oneneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32766
+; CHECK-NEXT: mvni v1.8h, #1
+; CHECK-NEXT: dup v0.8h, w8
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #3
+; CHECK-NEXT: movi v2.8h, #1
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: uabd v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+ %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+ %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+
+
+
+
+define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_base:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+ %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const_lhs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: ssubw2 v2.4s, v1.4s, v0.8h
+; CHECK-NEXT: ssubw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT: abs v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT: neg v1.4s, v2.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+ %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_both() {
+; CHECK-LABEL: abds_const_both:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #2
+; CHECK-NEXT: ret
+ %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_bothhigh() {
+; CHECK-LABEL: abds_const_bothhigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
+ %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_undef(<8 x i16> %src1) {
+; CHECK-LABEL: abds_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: abs v0.4s, v0.4s
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+ %zextsrc2 = sext <8 x i16> undef to <8 x i32>
+ %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+ %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+ %result = trunc <8 x i32> %abs to <8 x i16>
+ ret <8 x i16> %result
+}
+
+
+
+define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_i_base:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #1
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const_lhs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #1
+; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_both() {
+; CHECK-LABEL: abds_i_const_both:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: movi v1.8h, #3
+; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_bothhigh() {
+; CHECK-LABEL: abds_i_const_bothhigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32766
+; CHECK-NEXT: mvni v1.8h, #128, lsl #8
+; CHECK-NEXT: dup v0.8h, w8
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_onehigh() {
+; CHECK-LABEL: abds_i_const_onehigh:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32766
+; CHECK-NEXT: movi v0.8h, #1
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_oneneg() {
+; CHECK-LABEL: abds_i_const_oneneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32766
+; CHECK-NEXT: mvni v1.8h, #1
+; CHECK-NEXT: dup v0.8h, w8
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abds_i_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abds_i_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #3
+; CHECK-NEXT: movi v2.8h, #1
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: sabd v0.8h, v0.8h, v2.8h
+; CHECK-NEXT: ret
+ %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+ %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %result
+}
+
+
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
index b832d52711e97..87e0997d43f45 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
@@ -613,3 +613,53 @@ vector.body: ; preds = %vector.body, %entry
for.cond.cleanup: ; preds = %vector.body
ret void
}
+
+define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_commutative(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: vabd_v4u32_commutative:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vabd.u32 q2, q1, q0
+; CHECK-NEXT: vabd.u32 q0, q0, q1
+; CHECK-NEXT: vadd.i32 q0, q0, q2
+; CHECK-NEXT: bx lr
+ %azextsrc1 = zext <4 x i32> %src1 to <4 x i64>
+ %azextsrc2 = zext <4 x i32> %src2 to <4 x i64>
+ %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2
+ %aadd2 = sub <4 x i64> zeroinitializer, %aadd1
+ %ac = icmp sge <4 x i64> %aadd1, zeroinitializer
+ %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2
+ %aresult = trunc <4 x i64> %as to <4 x i32>
+ %bzextsrc1 = zext <4 x i32> %src2 to <4 x i64>
+ %bzextsrc2 = zext <4 x i32> %src1 to <4 x i64>
+ %badd1 = sub <4 x i64> %bzextsrc1, %bzextsrc2
+ %badd2 = sub <4 x i64> zeroinitializer, %badd1
+ %bc = icmp sge <4 x i64> %badd1, zeroinitializer
+ %bs = select <4 x i1> %bc, <4 x i64> %badd1, <4 x i64> %badd2
+ %bresult = trunc <4 x i64> %bs to <4 x i32>
+ %r = add <4 x i32> %aresult, %bresult
+ ret <4 x i32> %r
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_shuffle(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: vabd_v4u32_shuffle:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.f32 s8, s7
+; CHECK-NEXT: vmov.f32 s9, s6
+; CHECK-NEXT: vmov.f32 s10, s5
+; CHECK-NEXT: vmov.f32 s11, s4
+; CHECK-NEXT: vmov.f32 s4, s3
+; CHECK-NEXT: vmov.f32 s5, s2
+; CHECK-NEXT: vmov.f32 s6, s1
+; CHECK-NEXT: vmov.f32 s7, s0
+; CHECK-NEXT: vabd.u32 q0, q1, q2
+; CHECK-NEXT: bx lr
+ %s1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %s2 = shufflevector <4 x i32> %src2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %azextsrc1 = zext <4 x i32> %s1 to <4 x i64>
+ %azextsrc2 = zext <4 x i32> %s2 to <4 x i64>
+ %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2
+ %aadd2 = sub <4 x i64> zeroinitializer, %aadd1
+ %ac = icmp sge <4 x i64> %aadd1, zeroinitializer
+ %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2
+ %aresult = trunc <4 x i64> %as to <4 x i32>
+ ret <4 x i32> %aresult
+}
More information about the llvm-commits
mailing list