[llvm] [AArch64] Add @llvm.experimental.vector.match (PR #101974)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 07:59:07 PDT 2024
================
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
+
+define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v1i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umov w8, v1.b[0]
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z2.b, w9
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: sel p1.b, p1, p1.b, p2.b
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
+; CHECK-LABEL: match_nxv16i8_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: umov w8, v1.h[1]
+; CHECK-NEXT: umov w9, v1.h[0]
+; CHECK-NEXT: umov w10, v1.h[2]
+; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: mov z3.b, w9
+; CHECK-NEXT: umov w8, v1.h[3]
+; CHECK-NEXT: mov z1.b, w10
+; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: cmpeq p3.b, p1/z, z0.b, z3.b
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: cmpeq p4.b, p1/z, z0.b, z1.b
+; CHECK-NEXT: cmpeq p1.b, p1/z, z0.b, z2.b
+; CHECK-NEXT: mov p2.b, p3/m, p3.b
+; CHECK-NEXT: sel p2.b, p2, p2.b, p4.b
+; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT: mov p1.b, p2/m, p2.b
+; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %r = tail call <vscale x 16 x i1> @llvm.experimental.vector.match(<vscale x 16 x i8> %op1, <4 x i8> %op2, <vscale x 16 x i1> %mask)
+ ret <vscale x 16 x i1> %r
+}
+
+define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
----------------
david-arm wrote:
Yeah I think the support you've added so far is good enough and I do appreciate you making the effort to handle the 64-bit cases!
https://github.com/llvm/llvm-project/pull/101974
More information about the llvm-commits
mailing list