[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)

Kerry McLaughlin via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 24 09:34:27 PST 2023


https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/71176

>From c975abe9015d5c9f5f7c7388101900cbcf738ab6 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 2 Nov 2023 17:02:32 +0000
Subject: [PATCH 1/3] [Clang][SME2] Add outer product and accumulate/subtract
 builtins

Adds the following SME2 builtins:
 - svmop(a|s)_za32,
 - svbmop(a|s)_za32

See https://github.com/ARM-software/acle/pull/217
---
 clang/include/clang/Basic/arm_sme.td          |  16 ++
 .../aarch64-sme2-intrinsics/acle_sme2_mop.c   | 170 ++++++++++++++++++
 .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp |  18 ++
 3 files changed, 204 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
 create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index b5655afdf419ecf..1f88b39468105f2 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,3 +298,19 @@ multiclass ZAAddSub<string n_suffix> {
 
 defm SVADD : ZAAddSub<"add">;
 defm SVSUB : ZAAddSub<"sub">;
+
+//
+// Outer produce and accumulate/subtract
+//
+
+let TargetGuard = "sme2" in {
+  def SVSMOPA  : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+  def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+
+  def SVSMOPS  : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+  def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+
+  def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+
+  def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
new file mode 100644
index 000000000000000..bb804a523c449d3
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// MOPA
+
+// CHECK-LABEL: @test_svmopa_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+  SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmopa_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svmopa_u16u10__SVBool_tS_u12__SVUint16_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+  SVE_ACLE_FUNC(svmopa_za32,_u16,_m,)(3, pn, pm, zn, zm);
+}
+
+// MOPS
+
+// CHECK-LABEL: @test_svmops_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svmops_s16u10__SVBool_tS_u11__SVInt16_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+  SVE_ACLE_FUNC(svmops_za32,_s16,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svmops_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.umops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z15test_svmops_u16u10__SVBool_tS_u12__SVUint16_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.umops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+  SVE_ACLE_FUNC(svmops_za32,_u16,_m,)(3, pn, pm, zn, zm);
+}
+
+// BMOPA
+
+// CHECK-LABEL: @test_svbmopa_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmopa_u16u10__SVBool_tS_u12__SVUint32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmopa_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
+  SVE_ACLE_FUNC(svbmopa_za32,_u32,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svbmopa_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmopa_s16u10__SVBool_tS_u11__SVInt32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmopa_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) {
+  SVE_ACLE_FUNC(svbmopa_za32,_s32,_m,)(3, pn, pm, zn, zm);
+}
+
+// BMOPS
+
+// CHECK-LABEL: @test_svbmops_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmops_u16u10__SVBool_tS_u12__SVUint32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmops_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
+  SVE_ACLE_FUNC(svbmops_za32,_u32,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svbmops_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmops_s16u10__SVBool_tS_u11__SVInt32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmops_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) {
+  SVE_ACLE_FUNC(svbmops_za32,_s32,_m,)(3, pn, pm, zn, zm);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
new file mode 100644
index 000000000000000..83a70aca4dcbb59
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN:    -target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) {
+  // Test Tile Range
+  svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svmopa_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  svmops_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svmops_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  svbmopa_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}

>From 57b3fd3b0ea209afd5e2d82a88d821dfc469e771 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 13 Nov 2023 10:23:55 +0000
Subject: [PATCH 2/3] - Add arm_streaming & arm_shared_za attributes to the
 tests

---
 .../aarch64-sme2-intrinsics/acle_sme2_mop.c      | 16 ++++++++--------
 .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp    |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
index bb804a523c449d3..10bdffd71a010ed 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
@@ -33,7 +33,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -51,7 +51,7 @@ void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svmopa_za32,_u16,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -71,7 +71,7 @@ void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svmops_za32,_s16,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -89,7 +89,7 @@ void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.umops.za32.nxv8i16(i32 3, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svmops_za32,_u16,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -109,7 +109,7 @@ void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svbmopa_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
+void test_svbmopa_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svbmopa_za32,_u32,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -127,7 +127,7 @@ void test_svbmopa_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svbmopa_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) {
+void test_svbmopa_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svbmopa_za32,_s32,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -147,7 +147,7 @@ void test_svbmopa_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svbmops_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
+void test_svbmops_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svbmops_za32,_u32,_m,)(3, pn, pm, zn, zm);
 }
 
@@ -165,6 +165,6 @@ void test_svbmops_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) {
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svbmops_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) {
+void test_svbmops_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svbmops_za32,_s32,_m,)(3, pn, pm, zn, zm);
 }
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 83a70aca4dcbb59..656a82ecea14012 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -5,7 +5,7 @@
 
 #include <arm_sme_draft_spec_subject_to_change.h>
 
-void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) {
+void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za {
   // Test Tile Range
   svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
   svmopa_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}

>From 04988b1bae74c54a771ca9ac657bf882d30ee2cc Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 24 Nov 2023 17:08:21 +0000
Subject: [PATCH 3/3] - Fixed typo in arm_sme.td comment - Fixed bmopa/s test
 names and split into their own file - Added missing tests to
 acle_sme2_imm.cpp

---
 clang/include/clang/Basic/arm_sme.td          |  2 +-
 .../aarch64-sme2-intrinsics/acle_sme2_bmop.c  | 94 +++++++++++++++++++
 .../aarch64-sme2-intrinsics/acle_sme2_mop.c   | 76 ---------------
 .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp |  3 +
 4 files changed, 98 insertions(+), 77 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 1f88b39468105f2..d55deeaa40bbcd5 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -300,7 +300,7 @@ defm SVADD : ZAAddSub<"add">;
 defm SVSUB : ZAAddSub<"sub">;
 
 //
-// Outer produce and accumulate/subtract
+// Outer product and accumulate/subtract
 //
 
 let TargetGuard = "sme2" in {
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c
new file mode 100644
index 000000000000000..67a330625884f87
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c
@@ -0,0 +1,94 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// BMOPA
+
+// CHECK-LABEL: @test_svbmopa_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmopa_u32u10__SVBool_tS_u12__SVUint32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmopa_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
+  SVE_ACLE_FUNC(svbmopa_za32,_u32,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svbmopa_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmopa_s32u10__SVBool_tS_u11__SVInt32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmopa_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
+  SVE_ACLE_FUNC(svbmopa_za32,_s32,_m,)(3, pn, pm, zn, zm);
+}
+
+// BMOPS
+
+// CHECK-LABEL: @test_svbmops_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmops_u32u10__SVBool_tS_u12__SVUint32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmops_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
+  SVE_ACLE_FUNC(svbmops_za32,_u32,_m,)(3, pn, pm, zn, zm);
+}
+
+// CHECK-LABEL: @test_svbmops_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svbmops_s32u10__SVBool_tS_u11__SVInt32_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svbmops_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
+  SVE_ACLE_FUNC(svbmops_za32,_s32,_m,)(3, pn, pm, zn, zm);
+}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
index 10bdffd71a010ed..eef99eb1b75a374 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c
@@ -92,79 +92,3 @@ void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm
 void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za {
   SVE_ACLE_FUNC(svmops_za32,_u16,_m,)(3, pn, pm, zn, zm);
 }
-
-// BMOPA
-
-// CHECK-LABEL: @test_svbmopa_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svbmopa_u16u10__SVBool_tS_u12__SVUint32_tS0_(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svbmopa_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svbmopa_za32,_u32,_m,)(3, pn, pm, zn, zm);
-}
-
-// CHECK-LABEL: @test_svbmopa_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svbmopa_s16u10__SVBool_tS_u11__SVInt32_tS0_(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svbmopa_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svbmopa_za32,_s32,_m,)(3, pn, pm, zn, zm);
-}
-
-// BMOPS
-
-// CHECK-LABEL: @test_svbmops_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svbmops_u16u10__SVBool_tS_u12__SVUint32_tS0_(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svbmops_u16(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svbmops_za32,_u32,_m,)(3, pn, pm, zn, zm);
-}
-
-// CHECK-LABEL: @test_svbmops_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CHECK-NEXT:    ret void
-//
-// CPP-CHECK-LABEL: @_Z16test_svbmops_s16u10__SVBool_tS_u11__SVInt32_tS0_(
-// CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PN:%.*]])
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PM:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]])
-// CPP-CHECK-NEXT:    ret void
-//
-void test_svbmops_s16(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svbmops_za32,_s32,_m,)(3, pn, pm, zn, zm);
-}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 656a82ecea14012..4c35a238d9f9e2c 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -14,5 +14,8 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t
   svmops_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 
   svbmopa_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svbmopa_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  svbmops_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
   svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }



More information about the cfe-commits mailing list