[llvm] b572d9b - [llvm][sve] Intrinsics for SVE sudot and usdot instructions.
Francesco Petrogalli via llvm-commits
llvm-commits at lists.llvm.org
Mon May 18 15:04:27 PDT 2020
Author: Francesco Petrogalli
Date: 2020-05-18T22:02:19Z
New Revision: b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd
URL: https://github.com/llvm/llvm-project/commit/b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd
DIFF: https://github.com/llvm/llvm-project/commit/b572d9b1a73fad9b6fd3b465aaba48a7c8d8c0bd.diff
LOG: [llvm][sve] Intrinsics for SVE sudot and usdot instructions.
Summary:
This patch adds IR intrinsics for the mnemonics USDOT and SUDOT of the
8.6 extension of Armv8-a.
Reviewers: sdesmalen, efriedma, david-arm
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79876
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 0be3a7f3593d..28a5a16c5a66 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2267,6 +2267,10 @@ def int_aarch64_sve_ummla : SVE_MatMul_Intrinsic;
def int_aarch64_sve_smmla : SVE_MatMul_Intrinsic;
def int_aarch64_sve_usmmla : SVE_MatMul_Intrinsic;
+def int_aarch64_sve_usdot : AdvSIMD_SVE_DOT_Intrinsic;
+def int_aarch64_sve_usdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+
//
// SVE ACLE: 7.4/5. FP64/FP32 matrix multiply extensions
//
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 81721926d9dd..7f1599b9838d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1857,9 +1857,9 @@ let Predicates = [HasSVE, HasMatMulInt8] in {
defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
- def USDOT_ZZZ : sve_int_dot_mixed<"usdot">;
- def USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot">;
- def SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot">;
+ defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
+ defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
+ defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
}
let Predicates = [HasSVE, HasMatMulFP32] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 992542c0b75c..25702e15ab5a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7574,12 +7574,18 @@ class sve_int_dot_mixed<string asm>
let ElementSize = ZPR32.ElementSize;
}
+multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
+ def NAME : sve_int_dot_mixed<asm>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op , nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Integer Dot Product Mixed Sign - Indexed Group
//===----------------------------------------------------------------------===//
class sve_int_dot_mixed_indexed<bit U, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS:$idx),
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS32b:$idx),
asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
@@ -7598,6 +7604,12 @@ class sve_int_dot_mixed_indexed<bit U, string asm>
let ElementSize = ZPR32.ElementSize;
}
+multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
+ def NAME : sve_int_dot_mixed_indexed<U, asm>;
+
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Floating Point Matrix Multiply Accumulate Group
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
index c295aee43975..6febb71e7db0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
@@ -27,7 +27,93 @@ entry:
ret <vscale x 4 x i32> %val
}
+define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot:
+; CHECK-NEXT: usdot z0.s, z1.b, z2.b
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_0:
+; CHECK-NEXT: usdot z0.s, z1.b, z2.b[0]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_1:
+; CHECK-NEXT: usdot z0.s, z1.b, z2.b[1]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_2:
+; CHECK-NEXT: usdot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @usdot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: usdot_lane_3:
+; CHECK-NEXT: usdot z0.s, z1.b, z2.b[3]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_0(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_0:
+; CHECK-NEXT: sudot z0.s, z1.b, z2.b[0]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_1(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_1:
+; CHECK-NEXT: sudot z0.s, z1.b, z2.b[1]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_2(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_2:
+; CHECK-NEXT: sudot z0.s, z1.b, z2.b[2]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 2)
+ ret <vscale x 4 x i32> %val
+}
+
+define <vscale x 4 x i32> @sudot_lane_3(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
+entry:
+; CHECK-LABEL: sudot_lane_3:
+; CHECK-NEXT: sudot z0.s, z1.b, z2.b[3]
+; CHECK-NEXT : ret
+ %val = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 3)
+ ret <vscale x 4 x i32> %val
+}
+
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.smmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ummla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.usmmla.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+
More information about the llvm-commits
mailing list