[llvm] b572d9b - [llvm][sve] Intrinsics for SVE sudot and usdot instructions.

Mon May 18 15:04:27 PDT 2020

Author: Francesco Petrogalli
Date: 2020-05-18T22:02:19Z
New Revision: b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd

URL: https://github.com/llvm/llvm-project/commit/b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd
DIFF: https://github.com/llvm/llvm-project/commit/b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd.diff

LOG: [llvm][sve] Intrinsics for SVE sudot and usdot instructions.

Summary:
This patch adds IR intrinsics for the mnemonics USDOT and SUDOT of the
8.6 extension of Armv8-a.

Reviewers: sdesmalen, efriedma, david-arm

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79876

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 0be3a7f3593d..28a5a16c5a66 100644

--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2267,6 +2267,10 @@ def int_aarch64_sve_ummla : SVE_MatMul_Intrinsic;
 def int_aarch64_sve_smmla : SVE_MatMul_Intrinsic;
 def int_aarch64_sve_usmmla : SVE_MatMul_Intrinsic;
 
+def int_aarch64_sve_usdot : AdvSIMD_SVE_DOT_Intrinsic;
+def int_aarch64_sve_usdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+
 //
 // SVE ACLE: 7.4/5. FP64/FP32 matrix multiply extensions
 //

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 81721926d9dd..7f1599b9838d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1857,9 +1857,9 @@ let Predicates = [HasSVE, HasMatMulInt8] in {
   defm  SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
   defm  UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
   defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
-  def USDOT_ZZZ  : sve_int_dot_mixed<"usdot">;
-  def USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot">;
-  def SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot">;
+  defm USDOT_ZZZ  : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
+  defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
+  defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
 }
 
 let Predicates = [HasSVE, HasMatMulFP32] in {

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 992542c0b75c..25702e15ab5a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7574,12 +7574,18 @@ class sve_int_dot_mixed<string asm>
   let ElementSize = ZPR32.ElementSize;
 }
 
+multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
+  def NAME : sve_int_dot_mixed<asm>;
+
+  def : SVE_3_Op_Pat<nxv4i32, op , nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Dot Product Mixed Sign - Indexed Group
 //===----------------------------------------------------------------------===//
 
 class sve_int_dot_mixed_indexed<bit U, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS:$idx),
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS32b:$idx),
     asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
@@ -7598,6 +7604,12 @@ class sve_int_dot_mixed_indexed<bit U, string asm>
   let ElementSize = ZPR32.ElementSize;
 }
 
+multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
+  def NAME : sve_int_dot_mixed_indexed<U, asm>;
+
+  def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Matrix Multiply Accumulate Group
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
index c295aee43975..6febb71e7db0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
@@ -27,7 +27,93 @@ entry:
   ret <vscale x 4 x i32> %val
 }
 
+define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot:
+; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_0:
+; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[0]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_1:
+; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[1]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_2:
+; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[2]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_3:
+; CHECK-NEXT:  usdot   z0.s, z1.b, z2.b[3]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_0:
+; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[0]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_1:
+; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[1]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_2:
+; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[2]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
+  ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_3:
+; CHECK-NEXT:  sudot   z0.s, z1.b, z2.b[3]
+; CHECK-NEXT : ret
+  %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
+  ret <vscale x 4 x i32> %val
+}
+
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.usmmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+