[llvm] e299a08 - [AArch64][SVE] SVE2 intrinsics for character match & histogram generation

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 03:19:00 PST 2020


Author: Kerry McLaughlin
Date: 2020-02-10T11:08:00Z
New Revision: e299a0814956f2391b4fbc540cc8e6dd2e15d58f

URL: https://github.com/llvm/llvm-project/commit/e299a0814956f2391b4fbc540cc8e6dd2e15d58f
DIFF: https://github.com/llvm/llvm-project/commit/e299a0814956f2391b4fbc540cc8e6dd2e15d58f.diff

LOG: [AArch64][SVE] SVE2 intrinsics for character match & histogram generation

Summary:
Implements the following intrinsics:
 - @llvm.aarch64.sve.histcnt
 - @llvm.aarch64.sve.histseg
 - @llvm.aarch64.sve.match
 - @llvm.aarch64.sve.nmatch

Reviewers: c-rhodes, sdesmalen, dancgr, efriedma, rengolin

Reviewed By: c-rhodes

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D74117

Added: 
    llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 15a1d3931f81..524d5790ab4d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1795,6 +1795,20 @@ def int_aarch64_sve_fmlslt_lane   : SVE2_3VectorArgIndexed_Long_Intrinsic;
 
 def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;
 
+//
+// SVE2 - Vector histogram count
+//
+
+def int_aarch64_sve_histcnt : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_histseg : AdvSIMD_2VectorArg_Intrinsic;
+
+//
+// SVE2 - Character match
+//
+
+def int_aarch64_sve_match   : AdvSIMD_SVE_Compare_Intrinsic;
+def int_aarch64_sve_nmatch  : AdvSIMD_SVE_Compare_Intrinsic;
+
 //
 // SVE2 - Unary narrowing operations
 //

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7c9159219964..b8fb037f20fc 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1667,8 +1667,8 @@ let Predicates = [HasSVE2] in {
   defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
 
   // SVE2 character match
-  defm MATCH_PPzZZ  : sve2_char_match<0b0, "match">;
-  defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
+  defm MATCH_PPzZZ  : sve2_char_match<0b0, "match",  int_aarch64_sve_match>;
+  defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>;
 
   // SVE2 bitwise exclusive-or interleaved
   defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
@@ -1686,10 +1686,10 @@ let Predicates = [HasSVE2] in {
   defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
 
   // SVE2 histogram generation (segment)
-  def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
+  def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>;
 
   // SVE2 histogram generation (vector)
-  defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+  defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>;
 
   // SVE2 floating-point base 2 logarithm as integer
   defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8457e76c3f79..5ed3956a7451 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -6828,20 +6828,23 @@ class sve2_char_match<bit sz, bit opc, string asm,
   let Defs = [NZCV];
 }
 
-multiclass sve2_char_match<bit opc, string asm> {
+multiclass sve2_char_match<bit opc, string asm, SDPatternOperator op> {
   def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
   def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+
+  def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  def : SVE_3_Op_Pat<nxv8i1,  op, nxv8i1,  nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
 }
 
 //===----------------------------------------------------------------------===//
 // SVE2 Histogram Computation - Segment Group
 //===----------------------------------------------------------------------===//
 
-class sve2_hist_gen_segment<string asm>
+class sve2_hist_gen_segment<string asm, SDPatternOperator op>
 : I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
-  []>, Sched<[]> {
+  [(set nxv16i8:$Zd, (op nxv16i8:$Zn, nxv16i8:$Zm))]>, Sched<[]> {
   bits<5> Zd;
   bits<5> Zn;
   bits<5> Zm;
@@ -6875,9 +6878,12 @@ class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2_hist_gen_vector<string asm> {
+multiclass sve2_hist_gen_vector<string asm, SDPatternOperator op> {
   def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
   def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
+
+  def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
new file mode 100644
index 000000000000..3c755a3ea59f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-character-match.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; MATCH
+;
+
+define <vscale x 16 x i1> @match_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: match_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @match_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: match_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.match.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                <vscale x 8 x i16> %a,
+                                                                <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i1> %out
+}
+
+;
+; NMATCH
+;
+
+define <vscale x 16 x i1> @nmatch_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: nmatch_i8:
+; CHECK: match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.nmatch.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a,
+                                                                  <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @nmatch_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: nmatch_i16:
+; CHECK: match p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.nmatch.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i1> %out
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.match.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.nmatch.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.nmatch.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
new file mode 100644
index 000000000000..97ac6035c4eb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-vec-hist-count.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; HISTCNT
+;
+
+define <vscale x 4 x i32> @histcnt_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: histcnt_i32:
+; CHECK: histcnt z0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.histcnt.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @histcnt_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: histcnt_i64:
+; CHECK: histcnt z0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.histcnt.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+;
+; HISTSEG
+;
+
+define <vscale x 16 x i8> @histseg(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: histseg:
+; CHECK: histseg z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.histseg.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %out
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.histcnt.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.histcnt.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.histseg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)


        


More information about the llvm-commits mailing list