[llvm] d81d608 - [AArch64] Add ABD combine tests. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 4 11:18:55 PST 2023


Author: David Green
Date: 2023-02-04T19:18:50Z
New Revision: d81d60876ac6586a79e567062f5d1cc3ac68772b

URL: https://github.com/llvm/llvm-project/commit/d81d60876ac6586a79e567062f5d1cc3ac68772b
DIFF: https://github.com/llvm/llvm-project/commit/d81d60876ac6586a79e567062f5d1cc3ac68772b.diff

LOG: [AArch64] Add ABD combine tests. NFC

Added: 
    llvm/test/CodeGen/AArch64/abd-combine.ll

Modified: 
    llvm/test/CodeGen/Thumb2/mve-vabdus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
new file mode 100644
index 0000000000000..e2ed700bf3636
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -0,0 +1,461 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_base:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const_lhs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    usubw2 v2.4s, v1.4s, v0.8h
+; CHECK-NEXT:    usubw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_const_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    neg v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_both() {
+; CHECK-LABEL: abdu_const_both:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #2
+; CHECK-NEXT:    ret
+  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_const_bothhigh() {
+; CHECK-LABEL: abdu_const_bothhigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
+  %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_undef(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %zextsrc2 = zext <8 x i16> undef to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+
+
+define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_i_base:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const_lhs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_const_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_both() {
+; CHECK-LABEL: abdu_i_const_both:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    movi v1.8h, #3
+; CHECK-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_bothhigh() {
+; CHECK-LABEL: abdu_i_const_bothhigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-NEXT:    mvni v1.8h, #1
+; CHECK-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_onehigh() {
+; CHECK-LABEL: abdu_i_const_onehigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32766
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_const_oneneg() {
+; CHECK-LABEL: abdu_i_const_oneneg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32766
+; CHECK-NEXT:    mvni v1.8h, #1
+; CHECK-NEXT:    dup v0.8h, w8
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
+; CHECK-LABEL: abdu_i_reassoc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #3
+; CHECK-NEXT:    movi v2.8h, #1
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+
+
+
+
+define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_base:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const_lhs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    ssubw2 v2.4s, v1.4s, v0.8h
+; CHECK-NEXT:    ssubw v0.4s, v1.4s, v0.4h
+; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abds_const_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    neg v1.4s, v2.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_both() {
+; CHECK-LABEL: abds_const_both:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #2
+; CHECK-NEXT:    ret
+  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_const_bothhigh() {
+; CHECK-LABEL: abds_const_bothhigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
+  %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_undef(<8 x i16> %src1) {
+; CHECK-LABEL: abds_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    abs v1.4s, v1.4s
+; CHECK-NEXT:    abs v0.4s, v0.4s
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %zextsrc2 = sext <8 x i16> undef to <8 x i32>
+  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
+  %result = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %result
+}
+
+
+
+define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_i_base:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const_lhs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_const_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_both() {
+; CHECK-LABEL: abds_i_const_both:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    movi v1.8h, #3
+; CHECK-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_bothhigh() {
+; CHECK-LABEL: abds_i_const_bothhigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32766
+; CHECK-NEXT:    mvni v1.8h, #128, lsl #8
+; CHECK-NEXT:    dup v0.8h, w8
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_onehigh() {
+; CHECK-LABEL: abds_i_const_onehigh:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32766
+; CHECK-NEXT:    movi v0.8h, #1
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_const_oneneg() {
+; CHECK-LABEL: abds_i_const_oneneg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32766
+; CHECK-NEXT:    mvni v1.8h, #1
+; CHECK-NEXT:    dup v0.8h, w8
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abds_i_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
+; CHECK-LABEL: abds_i_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
+; CHECK-LABEL: abds_i_reassoc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #3
+; CHECK-NEXT:    movi v2.8h, #1
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    ret
+  %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  ret <8 x i16> %result
+}
+
+
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
index b832d52711e97..87e0997d43f45 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vabdus.ll
@@ -613,3 +613,53 @@ vector.body:                                      ; preds = %vector.body, %entry
 for.cond.cleanup:                                 ; preds = %vector.body
   ret void
 }
+
+define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_commutative(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: vabd_v4u32_commutative:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vabd.u32 q2, q1, q0
+; CHECK-NEXT:    vabd.u32 q0, q0, q1
+; CHECK-NEXT:    vadd.i32 q0, q0, q2
+; CHECK-NEXT:    bx lr
+  %azextsrc1 = zext <4 x i32> %src1 to <4 x i64>
+  %azextsrc2 = zext <4 x i32> %src2 to <4 x i64>
+  %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2
+  %aadd2 = sub <4 x i64> zeroinitializer, %aadd1
+  %ac = icmp sge <4 x i64> %aadd1, zeroinitializer
+  %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2
+  %aresult = trunc <4 x i64> %as to <4 x i32>
+  %bzextsrc1 = zext <4 x i32> %src2 to <4 x i64>
+  %bzextsrc2 = zext <4 x i32> %src1 to <4 x i64>
+  %badd1 = sub <4 x i64> %bzextsrc1, %bzextsrc2
+  %badd2 = sub <4 x i64> zeroinitializer, %badd1
+  %bc = icmp sge <4 x i64> %badd1, zeroinitializer
+  %bs = select <4 x i1> %bc, <4 x i64> %badd1, <4 x i64> %badd2
+  %bresult = trunc <4 x i64> %bs to <4 x i32>
+  %r = add <4 x i32> %aresult, %bresult
+  ret <4 x i32> %r
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vabd_v4u32_shuffle(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: vabd_v4u32_shuffle:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov.f32 s8, s7
+; CHECK-NEXT:    vmov.f32 s9, s6
+; CHECK-NEXT:    vmov.f32 s10, s5
+; CHECK-NEXT:    vmov.f32 s11, s4
+; CHECK-NEXT:    vmov.f32 s4, s3
+; CHECK-NEXT:    vmov.f32 s5, s2
+; CHECK-NEXT:    vmov.f32 s6, s1
+; CHECK-NEXT:    vmov.f32 s7, s0
+; CHECK-NEXT:    vabd.u32 q0, q1, q2
+; CHECK-NEXT:    bx lr
+  %s1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %s2 = shufflevector <4 x i32> %src2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %azextsrc1 = zext <4 x i32> %s1 to <4 x i64>
+  %azextsrc2 = zext <4 x i32> %s2 to <4 x i64>
+  %aadd1 = sub <4 x i64> %azextsrc1, %azextsrc2
+  %aadd2 = sub <4 x i64> zeroinitializer, %aadd1
+  %ac = icmp sge <4 x i64> %aadd1, zeroinitializer
+  %as = select <4 x i1> %ac, <4 x i64> %aadd1, <4 x i64> %aadd2
+  %aresult = trunc <4 x i64> %as to <4 x i32>
+  ret <4 x i32> %aresult
+}


        


More information about the llvm-commits mailing list