[llvm] [AArch64] Add @llvm.experimental.vector.match (PR #101974)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 23 01:53:52 PDT 2024
================
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+
+define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umov w8, v1.b[0]
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z2.b, w9
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umov w8, v1.h[1]
+; CHECK-NEXT: umov w9, v1.h[0]
+; CHECK-NEXT: umov w10, v1.h[2]
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: mov z3.b, w9
+; CHECK-NEXT: umov w8, v1.h[3]
+; CHECK-NEXT: mov z1.b, w10
+; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: mov p2.b, p3/m, p3.b
+; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: mov p1.b, p2/m, p2.b
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umov w8, v1.b[1]
+; CHECK-NEXT: umov w9, v1.b[0]
+; CHECK-NEXT: umov w10, v1.b[2]
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: mov z3.b, w9
+; CHECK-NEXT: umov w8, v1.b[3]
+; CHECK-NEXT: mov z4.b, w10
+; CHECK-NEXT: umov w9, v1.b[4]
+; CHECK-NEXT: umov w10, v1.b[7]
+; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: umov w8, v1.b[5]
+; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z4.b
+; CHECK-NEXT: mov z3.b, w9
+; CHECK-NEXT: umov w9, v1.b[6]
+; CHECK-NEXT: mov p2.b, p3/m, p3.b
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
+; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z3.b
+; CHECK-NEXT: mov z2.b, w9
+; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: mov z1.b, w10
+; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
+; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: sel p2.b, p2, p2.b, p3.b
+; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: mov p1.b, p2/m, p2.b
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: mov z1.q, q1
+; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
+; CHECK-LABEL: match_v16i8_v1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: dup v1.16b, v1.b[0]
+; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask)
+ ret <16 x i1> %r
+}
+
+define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
+; CHECK-LABEL: match_v16i8_v2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: dup v3.16b, v1.b[4]
+; CHECK-NEXT: dup v1.16b, v1.b[0]
+; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
+; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask)
+ ret <16 x i1> %r
+}
+
+define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
+; CHECK-LABEL: match_v16i8_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: dup v3.16b, v1.b[2]
+; CHECK-NEXT: dup v4.16b, v1.b[0]
+; CHECK-NEXT: dup v5.16b, v1.b[4]
+; CHECK-NEXT: dup v1.16b, v1.b[6]
+; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
+; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
+; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
+; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v1.16b, v4.16b, v3.16b
+; CHECK-NEXT: orr v0.16b, v5.16b, v0.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask)
+ ret <16 x i1> %r
+}
+
+define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask) #0 {
+; CHECK-LABEL: match_v16i8_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: dup v3.16b, v1.b[1]
+; CHECK-NEXT: dup v4.16b, v1.b[0]
+; CHECK-NEXT: dup v5.16b, v1.b[2]
+; CHECK-NEXT: dup v6.16b, v1.b[3]
+; CHECK-NEXT: dup v7.16b, v1.b[4]
+; CHECK-NEXT: dup v16.16b, v1.b[5]
+; CHECK-NEXT: dup v17.16b, v1.b[6]
+; CHECK-NEXT: dup v1.16b, v1.b[7]
+; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
+; CHECK-NEXT: cmeq v4.16b, v0.16b, v4.16b
+; CHECK-NEXT: cmeq v5.16b, v0.16b, v5.16b
+; CHECK-NEXT: cmeq v6.16b, v0.16b, v6.16b
+; CHECK-NEXT: cmeq v7.16b, v0.16b, v7.16b
+; CHECK-NEXT: cmeq v16.16b, v0.16b, v16.16b
+; CHECK-NEXT: orr v3.16b, v4.16b, v3.16b
+; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b
+; CHECK-NEXT: orr v5.16b, v7.16b, v16.16b
+; CHECK-NEXT: cmeq v6.16b, v0.16b, v17.16b
+; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v3.16b, v3.16b, v4.16b
+; CHECK-NEXT: orr v4.16b, v5.16b, v6.16b
+; CHECK-NEXT: orr v3.16b, v3.16b, v4.16b
+; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
+ ret <16 x i1> %r
+}
+
+define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 {
+; CHECK-LABEL: match_v16i8_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b, vl16
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
+; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
+ ret <16 x i1> %r
+}
+
+define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv8i16_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: mov z1.q, q1
+; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 8 x i1> @llvm.experimental.vector.match(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask)
+ ret <vscale x 8 x i1> %r
+}
+
+define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 {
+; CHECK-LABEL: match_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v2.8h, v2.8b, #0
+; CHECK-NEXT: ptrue p0.h, vl8
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: ret
+ %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask)
+ ret <8 x i1> %r
+}
+
----------------
rj-jesus wrote:
Thanks, done.
https://github.com/llvm/llvm-project/pull/101974
More information about the llvm-commits
mailing list