[clang] [llvm] Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (PR #91597)
Momchil Velikov via cfe-commits
cfe-commits at lists.llvm.org
Thu May 9 07:03:30 PDT 2024
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91597
Reverts llvm/llvm-project#88266 due to test failures
error: 'expected-error' diagnostics seen but not expected:
(frontend): '-fsyntax-only' action ignored; '-emit-llvm' action specified previously
>From 0f71196108d1c3c1bb44305a3a8392f406ae71e9 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 9 May 2024 15:01:53 +0100
Subject: [PATCH] =?UTF-8?q?Revert=20"[AArch64]=20Add=20intrinsics=20for=20?=
=?UTF-8?q?multi-vector=20to=20ZA=20array=20vector=20accumula=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit e88ba6d975d887ca001cae30bfa0c53d91165148.
---
clang/include/clang/Basic/arm_sme.td | 10 -
.../acle_sme2_add_sub_za16.c | 193 ------------------
.../acle_sme2_add_sub_za16.c | 29 ---
llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +-
.../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +-
.../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 --------------
6 files changed, 9 insertions(+), 389 deletions(-)
delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
delete mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
delete mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 000bd97a4b25d..1ac6d5170ea28 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,16 +298,6 @@ multiclass ZAAddSub<string n_suffix> {
def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>;
def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>;
}
-
- let TargetGuard = "sme-f16f16|sme-f8f16" in {
- def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
- def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
- }
-
- let TargetGuard = "sme2,b16b16" in {
- def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
- def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
- }
}
defm SVADD : ZAAddSub<"add">;
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
deleted file mode 100644
index d98427fac610b..0000000000000
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
+++ /dev/null
@@ -1,193 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
-
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sme.h>
-
-#ifdef SVE_OVERLOADED_FORMS
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
-#else
-#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
-#endif
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x4_f16j13svfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svadd_za16,_f16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x2_f16j13svfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_f16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x4_f16j13svfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x half> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
-// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svsub_za16,_f16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x2_bf16j14svbfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svadd_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x4_bf16j14svbfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svadd_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svadd_za16,_bf16,_vg1x4)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x2_bf16j14svbfloat16x2_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svsub_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x2)(slice, zn);
-}
-
-// CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_bf16(
-// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-NEXT: ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x4_bf16j14svbfloat16x4_t(
-// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x bfloat> [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] {
-// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 0)
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
-// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
-// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
-// CHECK-CXX-NEXT: ret void
-//
-void test_svsub_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") {
- SVE_ACLE_FUNC(svsub_za16,_bf16,_vg1x4)(slice, zn);
-}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
deleted file mode 100644
index 0eeeac5a50469..0000000000000
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify -emit-llvm %s
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sme.h>
-
-void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4,
- svbfloat16x2_t bzn2, svbfloat16x4_t bzn4) __arm_streaming __arm_inout("za") {
- // expected-error at +1 {{'svadd_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}}
- svadd_za16_f16_vg1x2(slice, zn2);
- // expected-error at +1 {{'svadd_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
- svadd_za16_f16_vg1x4(slice, zn4);
- // expected-error at +1 {{'svsub_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}}
- svsub_za16_f16_vg1x2(slice, zn2);
- // expected-error at +1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
- svsub_za16_f16_vg1x4(slice, zn4);
-
- // expected-error at +1 {{'svadd_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
- svadd_za16_bf16_vg1x2(slice, bzn2);
- // expected-error at +1 {{'svadd_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
- svadd_za16_bf16_vg1x4(slice, bzn4);
- // expected-error at +1 {{'svsub_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
- svsub_za16_bf16_vg1x2(slice, bzn2);
- // expected-error at +1 {{'svsub_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
- svsub_za16_bf16_vg1x4(slice, bzn4);
-}
-
-
-
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 04571faf13067..e31e00a9c76f3 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3481,7 +3481,7 @@ let TargetPrefix = "aarch64" in {
// Multi-vector add/sub and accumulate into ZA
//
foreach intr = ["add", "sub"] in {
- foreach za = ["za16","za32", "za64"] in {
+ foreach za = ["za32", "za64"] in {
def int_aarch64_sme_ # intr # _ # za # _vg1x2 : SME2_ZA_Write_VG2_Intrinsic;
def int_aarch64_sme_ # intr # _ # za # _vg1x4 : SME2_ZA_Write_VG4_Intrinsic;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 5102c602d54e1..574178c8d5244 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -793,10 +793,10 @@ defm LUTI4_S_4ZTZI : sme2p1_luti4_vector_vg4_index<"luti4">;
}
let Predicates = [HasSMEF16F16orSMEF8F16] in {
-defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x2>;
-defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x4>;
-defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x2>;
-defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x4>;
+defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
+defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
}
let Predicates = [HasSMEF16F16] in {
@@ -822,10 +822,10 @@ defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
}
let Predicates = [HasSME2, HasB16B16] in {
-defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x2>;
-defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x4>;
-defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x2>;
-defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x4>;
+defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16>;
defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16>;
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
deleted file mode 100644
index e7a6c0d6c549b..0000000000000
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
+++ /dev/null
@@ -1,148 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
-
-target triple = "aarch64-linux"
-
-define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #0 {
-; CHECK-LABEL: add_f16_vg1x2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 %slice.7, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
- ret void
-}
-
-define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
-; CHECK-LABEL: add_f16_vg1x4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: ret
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3) #1 {
- call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3);
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 %slice.7, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3);
- ret void
-}
-
-define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #1 {
-; CHECK-LABEL: sub_f16_vg1x2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 %slice.7, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
- ret void
-}
-
-define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
-; CHECK-LABEL: sub_f16_vg1x4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: ret
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3) #0 {
- call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3);
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 %slice.7, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
- <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3);
- ret void
-}
-
-define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
-; CHECK-LABEL: add_bf16_vg1x2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)
- ret void
-}
-
-define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
-; CHECK-LABEL: add_bf16_vg1x4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: ret
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3) #2 {
- call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3);
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3);
- ret void
-}
-
-define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
-; CHECK-LABEL: sub_bf16_vg1x2:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h }
-; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)
- ret void
-}
-
-define void @sub_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
-; CHECK-LABEL: sub_bf16_vg1x4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h }
-; CHECK-NEXT: ret
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3) #2 {
- call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3);
- %slice.7 = add i32 %slice, 7
- call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 %slice.7, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
- <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3);
- ret void
-}
-
-attributes #0 = { nounwind "target-features"="+sme-f16f16" }
-attributes #1 = { nounwind "target-features"="+sme-f8f16" }
-attributes #2 = { nounwind "target-features"="+sme2,+bf16,+b16b16" }
More information about the cfe-commits
mailing list