[clang] [llvm] [AArch64] Add intrinsics for SME FP8 FDOT single and multi instructions (PR #119845)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 01:44:25 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Jonathan Thackray (jthackray)
<details>
<summary>Changes</summary>
Add support for the following SME 8 bit floating-point dot-product intrinsics:
* svdot_single_za16_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_single_za16_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_single_za32_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_single_za32_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_za16_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_za16_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_za32_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
* svdot_za32_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm);
---
Patch is 38.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119845.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/arm_sme.td (+12)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c (+167-7)
- (modified) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c (+16)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+36)
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+8-9)
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+64)
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll (+112)
``````````diff
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 0fae70866cd55e..e7625d8d3e0b8a 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -748,11 +748,23 @@ let SMETargetGuard = "sme2" in {
let SMETargetGuard = "sme-f8f32" in {
def SVDOT_LANE_FP8_ZA32_VG1x2 : Inst<"svdot_lane_za32[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_FP8_ZA32_VG1x4 : Inst<"svdot_lane_za32[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
+
+ def SVDOT_SINGLE_FP8_ZA32_VG1x2 : Inst<"svdot[_single]_za32[_mf8]_vg1x2_fpm", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+ def SVDOT_SINGLE_FP8_ZA32_VG1x4 : Inst<"svdot[_single]_za32[_mf8]_vg1x4_fpm", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+
+ def SVDOT_MULTI_FP8_ZA32_VG1x2 : Inst<"svdot_za32[_mf8]_vg1x2_fpm", "vm22>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+ def SVDOT_MULTI_FP8_ZA32_VG1x4 : Inst<"svdot_za32[_mf8]_vg1x4_fpm", "vm44>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
}
let SMETargetGuard = "sme-f8f16" in {
def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+
+ def SVDOT_SINGLE_FP8_ZA16_VG1x2 : Inst<"svdot[_single]_za16[_mf8]_vg1x2_fpm", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+ def SVDOT_SINGLE_FP8_ZA16_VG1x4 : Inst<"svdot[_single]_za16[_mf8]_vg1x4_fpm", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+
+ def SVDOT_MULTI_FP8_ZA16_VG1x2 : Inst<"svdot_za16[_mf8]_vg1x2_fpm", "vm22>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
+ def SVDOT_MULTI_FP8_ZA16_VG1x4 : Inst<"svdot_za16[_mf8]_vg1x4_fpm", "vm44>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>;
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c
index 74d18c32d5b3ab..a151d162e01085 100644
--- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c
@@ -1,18 +1,18 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// REQUIRES: aarch64-registered-target
-#include <arm_sme.h>
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
#include <arm_sme.h>
#ifdef SVE_OVERLOADED_FORMS
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
#else
-#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
#endif
// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x2(
@@ -32,7 +32,7 @@
void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr);
+ SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm,,)(slice, zn, zm, 3, fpmr);
}
// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x4(
@@ -52,7 +52,7 @@ void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr);
+ SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm,,)(slice, zn, zm, 3, fpmr);
}
// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2(
@@ -72,7 +72,7 @@ void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr);
+ SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm,,)(slice, zn, zm, 3, fpmr);
}
// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x4(
@@ -92,5 +92,165 @@ void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
void test_svdot_lane_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr);
+ SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm,,)(slice, zn, zm, 3, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_single_za32_f8_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za32_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_single_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,_single,_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_single_za32_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za32_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_single_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
+ svmfloat8_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,_single,_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_multi_za32_f8_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za32_f8_vg1x2j13svmfloat8x2_tS_m(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_multi_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8x2_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,,_za32,_mf8,_vg1x2_fpm) (slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_multi_za32_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za32_f8_vg1x4j13svmfloat8x4_tS_m(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_multi_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
+ svmfloat8x4_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,,_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_single_za16_f8_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_single_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,_single,_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_single_za16_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za16_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_single_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
+ svmfloat8_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,_single,_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_multi_za16_f8_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za16_f8_vg1x2j13svmfloat8x2_tS_m(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
+// CPP-CHECK-NEXT: ret void
+//
+void test_svdot_multi_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8x2_t zm, fpm_t fpmr)
+ __arm_streaming __arm_inout("za") {
+ SVE_ACLE_FUNC(svdot,,_za16,_mf8,_vg1x2_fpm) (slice, zn, zm, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svdot_multi_za16_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za16_f8_vg1x4j13svmfloat8x4_tS_m(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119845
More information about the llvm-commits
mailing list