[clang] 9f6250f - [Clang][AArch64][SME] Add vector load/store (ld1/st1) intrinsics
Bryan Chan via cfe-commits
cfe-commits at lists.llvm.org
Sun May 28 18:01:13 PDT 2023
Author: Bryan Chan
Date: 2023-05-28T21:08:13-04:00
New Revision: 9f6250f591057e68c0bda564716b6918b8e39a84
URL: https://github.com/llvm/llvm-project/commit/9f6250f591057e68c0bda564716b6918b8e39a84
DIFF: https://github.com/llvm/llvm-project/commit/9f6250f591057e68c0bda564716b6918b8e39a84.diff
LOG: [Clang][AArch64][SME] Add vector load/store (ld1/st1) intrinsics
This patch adds support for the following SME ACLE intrinsics (as defined
in https://arm-software.github.io/acle/main/acle.html):
- svld1_hor_za8 // also for _za16, _za32, _za64 and _za128
- svld1_hor_vnum_za8 // also for _za16, _za32, _za64 and _za128
- svld1_ver_za8 // also for _za16, _za32, _za64 and _za128
- svld1_ver_vnum_za8 // also for _za16, _za32, _za64 and _za128
- svst1_hor_za8 // also for _za16, _za32, _za64 and _za128
- svst1_hor_vnum_za8 // also for _za16, _za32, _za64 and _za128
- svst1_ver_za8 // also for _za16, _za32, _za64 and _za128
- svst1_ver_vnum_za8 // also for _za16, _za32, _za64 and _za128
SveEmitter.cpp is extended to generate arm_sme.h (currently named
arm_sme_draft_spec_subject_to_change.h) and other SME definitions from
arm_sme.td, which is modeled after arm_sve.td. Common TableGen definitions
are moved into arm_sve_sme_incl.td.
Co-authored-by: Sagar Kulkarni <sagar.kulkarni1 at huawei.com>
Reviewed By: sdesmalen, kmclaughlin
Differential Revision: https://reviews.llvm.org/D127910
Added:
clang/include/clang/Basic/BuiltinsSME.def
clang/include/clang/Basic/arm_sme.td
clang/include/clang/Basic/arm_sve_sme_incl.td
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
Modified:
clang/include/clang/Basic/BuiltinsAArch64.def
clang/include/clang/Basic/BuiltinsARM.def
clang/include/clang/Basic/BuiltinsNEON.def
clang/include/clang/Basic/CMakeLists.txt
clang/include/clang/Basic/TargetBuiltins.h
clang/include/clang/Basic/arm_sve.td
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Headers/CMakeLists.txt
clang/lib/Sema/SemaChecking.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/utils/TableGen/SveEmitter.cpp
clang/utils/TableGen/TableGen.cpp
clang/utils/TableGen/TableGenBackends.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index e6672a1702e94..33c8ecbb1c454 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -269,4 +269,5 @@ TARGET_HEADER_BUILTIN(__readx18qword, "ULLiULi", "nh", INTRIN_H, ALL_MS_LANGUAGE
#undef BUILTIN
#undef LANGBUILTIN
+#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def
index eabf830b359ca..ae65b7760d4d9 100644
--- a/clang/include/clang/Basic/BuiltinsARM.def
+++ b/clang/include/clang/Basic/BuiltinsARM.def
@@ -343,4 +343,5 @@ TARGET_HEADER_BUILTIN(_InterlockedDecrement64_rel, "LLiLLiD*", "nh", INTRIN_H, A
#undef BUILTIN
#undef LANGBUILTIN
+#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsNEON.def b/clang/include/clang/Basic/BuiltinsNEON.def
index b8eb5a7b6173b..9627005ba9824 100644
--- a/clang/include/clang/Basic/BuiltinsNEON.def
+++ b/clang/include/clang/Basic/BuiltinsNEON.def
@@ -19,3 +19,4 @@
#undef GET_NEON_BUILTINS
#undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsSME.def b/clang/include/clang/Basic/BuiltinsSME.def
new file mode 100644
index 0000000000000..180ee20295ccd
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsSME.def
@@ -0,0 +1,21 @@
+//===--- BuiltinsSME.def - SME Builtin function database --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SME-specific builtin function database. Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+// The format of this database matches clang/Basic/Builtins.def.
+
+#define GET_SME_BUILTINS
+#include "clang/Basic/arm_sme_builtins.inc"
+#undef GET_SME_BUILTINS
+
+#undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 53a713b13ea39..67153da383f5a 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -72,6 +72,15 @@ clang_tablegen(arm_sve_typeflags.inc -gen-arm-sve-typeflags
clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks
SOURCE arm_sve.td
TARGET ClangARMSveSemaRangeChecks)
+clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltins)
+clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltinCG)
+clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeSemaRangeChecks)
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
SOURCE arm_cde.td
TARGET ClangARMCdeBuiltinsDef)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 8fd792054c269..3597d28a8def2 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -48,11 +48,22 @@ namespace clang {
enum {
LastNEONBuiltin = NEON::FirstTSBuiltin - 1,
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
#include "clang/Basic/BuiltinsSVE.def"
FirstTSBuiltin,
};
}
+ namespace SME {
+ enum {
+ LastSVEBuiltin = SVE::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
+#include "clang/Basic/BuiltinsSME.def"
+ FirstTSBuiltin,
+ };
+ }
+
/// AArch64 builtins
namespace AArch64 {
enum {
@@ -60,6 +71,8 @@ namespace clang {
LastNEONBuiltin = NEON::FirstTSBuiltin - 1,
FirstSVEBuiltin = NEON::FirstTSBuiltin,
LastSVEBuiltin = SVE::FirstTSBuiltin - 1,
+ FirstSMEBuiltin = SVE::FirstTSBuiltin,
+ LastSMEBuiltin = SME::FirstTSBuiltin - 1,
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
#include "clang/Basic/BuiltinsAArch64.def"
LastTSBuiltin
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
new file mode 100644
index 0000000000000..4b174412aaffe
--- /dev/null
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -0,0 +1,74 @@
+//===--- arm_sme.td - ARM SME compiler interface ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TableGen definitions from which the ARM SME header
+// file will be generated. See:
+//
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_sve_sme_incl.td"
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads
+
+multiclass ZALoad<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
+ let TargetGuard = "sme" in {
+ def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimiPQ", t,
+ [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA],
+ MemEltTyDefault, i_prefix # "_horiz", ch>;
+
+ def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimiPQl", t,
+ [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA],
+ MemEltTyDefault, i_prefix # "_horiz", ch>;
+
+ def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimiPQ", t,
+ [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA],
+ MemEltTyDefault, i_prefix # "_vert", ch>;
+
+ def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimiPQl", t,
+ [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA],
+ MemEltTyDefault, i_prefix # "_vert", ch>;
+ }
+}
+
+defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Stores
+
+multiclass ZAStore<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
+ let TargetGuard = "sme" in {
+ def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimiP%", t,
+ [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA],
+ MemEltTyDefault, i_prefix # "_horiz", ch>;
+
+ def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimiP%l", t,
+ [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA],
+ MemEltTyDefault, i_prefix # "_horiz", ch>;
+
+ def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimiP%", t,
+ [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA],
+ MemEltTyDefault, i_prefix # "_vert", ch>;
+
+ def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimiP%l", t,
+ [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA],
+ MemEltTyDefault, i_prefix # "_vert", ch>;
+ }
+}
+
+defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
+defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
+defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
+defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index bd2db7ef17be8..aa7c0553671a5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -13,253 +13,7 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Instruction definitions
-//===----------------------------------------------------------------------===//
-// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
-// a sequence of typespecs.
-//
-// The name is the base name of the intrinsic, for example "svld1". This is
-// then mangled by the tblgen backend to add type information ("svld1_s16").
-//
-// A typespec is a sequence of uppercase characters (modifiers) followed by one
-// lowercase character. A typespec encodes a particular "base type" of the
-// intrinsic.
-//
-// An example typespec is "Us" - unsigned short - svuint16_t. The available
-// typespec codes are given below.
-//
-// The string given to an Inst class is a sequence of typespecs. The intrinsic
-// is instantiated for every typespec in the sequence. For example "sdUsUd".
-//
-// The prototype is a string that defines the return type of the intrinsic
-// and the type of each argument. The return type and every argument gets a
-// "modifier" that can change in some way the "base type" of the intrinsic.
-//
-// The modifier 'd' means "default" and does not modify the base type in any
-// way. The available modifiers are given below.
-//
-// Typespecs
-// ---------
-// c: char
-// s: short
-// i: int
-// l: long
-// f: float
-// h: half-float
-// d: double
-// b: bfloat
-
-// Typespec modifiers
-// ------------------
-// P: boolean
-// U: unsigned
-
-// Prototype modifiers
-// -------------------
-// prototype: return (arg, arg, ...)
-//
-// 2,3,4: array of default vectors
-// v: void
-// x: vector of signed integers
-// u: vector of unsigned integers
-// d: default
-// c: const pointer type
-// P: predicate type
-// s: scalar of element type
-// a: scalar of element type (splat to vector type)
-// R: scalar of 1/2 width element type (splat to vector type)
-// r: scalar of 1/4 width element type (splat to vector type)
-// @: unsigned scalar of 1/4 width element type (splat to vector type)
-// e: 1/2 width unsigned elements, 2x element count
-// b: 1/4 width unsigned elements, 4x element count
-// h: 1/2 width elements, 2x element count
-// q: 1/4 width elements, 4x element count
-// o: 4x width elements, 1/4 element count
-//
-// w: vector of element type promoted to 64bits, vector maintains
-// signedness of its element type.
-// f: element type promoted to uint64_t (splat to vector type)
-// j: element type promoted to 64bits (splat to vector type)
-// K: element type bitcast to a signed integer (splat to vector type)
-// L: element type bitcast to an unsigned integer (splat to vector type)
-//
-// i: constant uint64_t
-// k: int32_t
-// l: int64_t
-// m: uint32_t
-// n: uint64_t
-
-// t: svint32_t
-// z: svuint32_t
-// g: svuint64_t
-// O: svfloat16_t
-// M: svfloat32_t
-// N: svfloat64_t
-
-// J: Prefetch type (sv_prfop)
-// A: pointer to int8_t
-// B: pointer to int16_t
-// C: pointer to int32_t
-// D: pointer to int64_t
-
-// E: pointer to uint8_t
-// F: pointer to uint16_t
-// G: pointer to uint32_t
-// H: pointer to uint64_t
-
-// Q: const pointer to void
-
-// S: const pointer to int8_t
-// T: const pointer to int16_t
-// U: const pointer to int32_t
-// V: const pointer to int64_t
-//
-// W: const pointer to uint8_t
-// X: const pointer to uint16_t
-// Y: const pointer to uint32_t
-// Z: const pointer to uint64_t
-
-class MergeType<int val, string suffix=""> {
- int Value = val;
- string Suffix = suffix;
-}
-def MergeNone : MergeType<0>;
-def MergeAny : MergeType<1, "_x">;
-def MergeOp1 : MergeType<2, "_m">;
-def MergeZero : MergeType<3, "_z">;
-def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit
-def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument.
-
-class EltType<int val> {
- int Value = val;
-}
-def EltTyInvalid : EltType<0>;
-def EltTyInt8 : EltType<1>;
-def EltTyInt16 : EltType<2>;
-def EltTyInt32 : EltType<3>;
-def EltTyInt64 : EltType<4>;
-def EltTyFloat16 : EltType<5>;
-def EltTyFloat32 : EltType<6>;
-def EltTyFloat64 : EltType<7>;
-def EltTyBool8 : EltType<8>;
-def EltTyBool16 : EltType<9>;
-def EltTyBool32 : EltType<10>;
-def EltTyBool64 : EltType<11>;
-def EltTyBFloat16 : EltType<12>;
-
-class MemEltType<int val> {
- int Value = val;
-}
-def MemEltTyDefault : MemEltType<0>;
-def MemEltTyInt8 : MemEltType<1>;
-def MemEltTyInt16 : MemEltType<2>;
-def MemEltTyInt32 : MemEltType<3>;
-def MemEltTyInt64 : MemEltType<4>;
-
-class FlagType<int val> {
- int Value = val;
-}
-
-// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h
-// and include/clang/Basic/TargetBuiltins.h
-def NoFlags : FlagType<0x00000000>;
-def FirstEltType : FlagType<0x00000001>;
-// : :
-// : :
-def EltTypeMask : FlagType<0x0000000f>;
-def FirstMemEltType : FlagType<0x00000010>;
-// : :
-// : :
-def MemEltTypeMask : FlagType<0x00000070>;
-def FirstMergeTypeMask : FlagType<0x00000080>;
-// : :
-// : :
-def MergeTypeMask : FlagType<0x00000380>;
-def FirstSplatOperand : FlagType<0x00000400>;
-// : :
-// These flags are used to specify which scalar operand
-// needs to be duplicated/splatted into a vector.
-// : :
-def SplatOperandMask : FlagType<0x00001C00>;
-def IsLoad : FlagType<0x00002000>;
-def IsStore : FlagType<0x00004000>;
-def IsGatherLoad : FlagType<0x00008000>;
-def IsScatterStore : FlagType<0x00010000>;
-def IsStructLoad : FlagType<0x00020000>;
-def IsStructStore : FlagType<0x00040000>;
-def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
-def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
-def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
-def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
-def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
-def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
-def IsByteIndexed : FlagType<0x01000000>;
-def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
-def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
-def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
-def IsGatherPrefetch : FlagType<0x10000000>;
-def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
-def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
-def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
-def IsTupleCreate : FlagType<0x100000000>;
-def IsTupleGet : FlagType<0x200000000>;
-def IsTupleSet : FlagType<0x400000000>;
-def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
-def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
-
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
- int Value = val;
-}
-def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16 : ImmCheckType<1>; // 1..16
-def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7 : ImmCheckType<6>; // 0..7
-def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13 : ImmCheckType<12>; // 0..13
-def ImmCheck0_1 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3 : ImmCheckType<15>; // 0..3
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
- int Arg = arg;
- int EltSizeArg = eltSizeArg;
- ImmCheckType Kind = kind;
-}
-
-class Inst<string n, string p, string t, MergeType mt, string i,
- list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
- string Name = n;
- string Prototype = p;
- string Types = t;
- string TargetGuard = "sve";
- int Merge = mt.Value;
- string MergeSuffix = mt.Suffix;
- string LLVMIntrinsic = i;
- list<FlagType> Flags = ft;
- list<ImmCheck> ImmChecks = ch;
- int MemEltType = met.Value;
-}
-
-// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8")
-class SInst<string n, string p, string t, MergeType mt, string i = "",
- list<FlagType> ft = [], list<ImmCheck> ch = []>
- : Inst<n, p, t, mt, i, ft, ch, MemEltTyDefault> {
-}
-
-// MInst: Instructions which access memory
-class MInst<string n, string p, string t, list<FlagType> f,
- MemEltType met = MemEltTyDefault, string i = "">
- : Inst<n, p, t, MergeNone, i, f, [], met> {
-}
+include "arm_sve_sme_incl.td"
////////////////////////////////////////////////////////////////////////////////
// Loads
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
new file mode 100644
index 0000000000000..f68140d386473
--- /dev/null
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -0,0 +1,274 @@
+//===--- arm_sve_sme_incl.td - ARM SVE/SME compiler interface -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common properites of TableGen definitions use for both
+// SVE and SME intrinsics.
+//
+// https://developer.arm.com/architectures/system-architectures/software-standards/acle
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions
+//===----------------------------------------------------------------------===//
+// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and
+// a sequence of typespecs.
+//
+// The name is the base name of the intrinsic, for example "svld1". This is
+// then mangled by the tblgen backend to add type information ("svld1_s16").
+//
+// A typespec is a sequence of uppercase characters (modifiers) followed by one
+// lowercase character. A typespec encodes a particular "base type" of the
+// intrinsic.
+//
+// An example typespec is "Us" - unsigned short - svuint16_t. The available
+// typespec codes are given below.
+//
+// The string given to an Inst class is a sequence of typespecs. The intrinsic
+// is instantiated for every typespec in the sequence. For example "sdUsUd".
+//
+// The prototype is a string that defines the return type of the intrinsic
+// and the type of each argument. The return type and every argument gets a
+// "modifier" that can change in some way the "base type" of the intrinsic.
+//
+// The modifier 'd' means "default" and does not modify the base type in any
+// way. The available modifiers are given below.
+//
+// Typespecs
+// ---------
+// c: char
+// s: short
+// i: int
+// l: long
+// q: int128_t
+// f: float
+// h: half-float
+// d: double
+// b: bfloat
+
+// Typespec modifiers
+// ------------------
+// P: boolean
+// U: unsigned
+
+// Prototype modifiers
+// -------------------
+// prototype: return (arg, arg, ...)
+//
+// 2,3,4: array of default vectors
+// v: void
+// x: vector of signed integers
+// u: vector of unsigned integers
+// d: default
+// c: const pointer type
+// P: predicate type
+// s: scalar of element type
+// a: scalar of element type (splat to vector type)
+// R: scalar of 1/2 width element type (splat to vector type)
+// r: scalar of 1/4 width element type (splat to vector type)
+// @: unsigned scalar of 1/4 width element type (splat to vector type)
+// e: 1/2 width unsigned elements, 2x element count
+// b: 1/4 width unsigned elements, 4x element count
+// h: 1/2 width elements, 2x element count
+// q: 1/4 width elements, 4x element count
+// o: 4x width elements, 1/4 element count
+//
+// w: vector of element type promoted to 64bits, vector maintains
+// signedness of its element type.
+// f: element type promoted to uint64_t (splat to vector type)
+// j: element type promoted to 64bits (splat to vector type)
+// K: element type bitcast to a signed integer (splat to vector type)
+// L: element type bitcast to an unsigned integer (splat to vector type)
+//
+// i: constant uint64_t
+// k: int32_t
+// l: int64_t
+// m: uint32_t
+// n: uint64_t
+
+// t: svint32_t
+// z: svuint32_t
+// g: svuint64_t
+// O: svfloat16_t
+// M: svfloat32_t
+// N: svfloat64_t
+
+// J: Prefetch type (sv_prfop)
+
+// %: pointer to void
+
+// A: pointer to int8_t
+// B: pointer to int16_t
+// C: pointer to int32_t
+// D: pointer to int64_t
+
+// E: pointer to uint8_t
+// F: pointer to uint16_t
+// G: pointer to uint32_t
+// H: pointer to uint64_t
+
+// Q: const pointer to void
+
+// S: const pointer to int8_t
+// T: const pointer to int16_t
+// U: const pointer to int32_t
+// V: const pointer to int64_t
+//
+// W: const pointer to uint8_t
+// X: const pointer to uint16_t
+// Y: const pointer to uint32_t
+// Z: const pointer to uint64_t
+
+class MergeType<int val, string suffix=""> {
+ int Value = val;
+ string Suffix = suffix;
+}
+def MergeNone : MergeType<0>;
+def MergeAny : MergeType<1, "_x">;
+def MergeOp1 : MergeType<2, "_m">;
+def MergeZero : MergeType<3, "_z">;
+def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit
+def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument.
+
+class EltType<int val> {
+ int Value = val;
+}
+def EltTyInvalid : EltType<0>;
+def EltTyInt8 : EltType<1>;
+def EltTyInt16 : EltType<2>;
+def EltTyInt32 : EltType<3>;
+def EltTyInt64 : EltType<4>;
+def EltTyInt128 : EltType<5>;
+def EltTyFloat16 : EltType<6>;
+def EltTyFloat32 : EltType<7>;
+def EltTyFloat64 : EltType<8>;
+def EltTyBool8 : EltType<9>;
+def EltTyBool16 : EltType<10>;
+def EltTyBool32 : EltType<11>;
+def EltTyBool64 : EltType<12>;
+def EltTyBFloat16 : EltType<13>;
+
+class MemEltType<int val> {
+ int Value = val;
+}
+def MemEltTyDefault : MemEltType<0>;
+def MemEltTyInt8 : MemEltType<1>;
+def MemEltTyInt16 : MemEltType<2>;
+def MemEltTyInt32 : MemEltType<3>;
+def MemEltTyInt64 : MemEltType<4>;
+
+class FlagType<int val> {
+ int Value = val;
+}
+
+// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h
+// and include/clang/Basic/TargetBuiltins.h
+def NoFlags : FlagType<0x00000000>;
+def FirstEltType : FlagType<0x00000001>;
+// : :
+// : :
+def EltTypeMask : FlagType<0x0000000f>;
+def FirstMemEltType : FlagType<0x00000010>;
+// : :
+// : :
+def MemEltTypeMask : FlagType<0x00000070>;
+def FirstMergeTypeMask : FlagType<0x00000080>;
+// : :
+// : :
+def MergeTypeMask : FlagType<0x00000380>;
+def FirstSplatOperand : FlagType<0x00000400>;
+// : :
+// These flags are used to specify which scalar operand
+// needs to be duplicated/splatted into a vector.
+// : :
+def SplatOperandMask : FlagType<0x00001C00>;
+def IsLoad : FlagType<0x00002000>;
+def IsStore : FlagType<0x00004000>;
+def IsGatherLoad : FlagType<0x00008000>;
+def IsScatterStore : FlagType<0x00010000>;
+def IsStructLoad : FlagType<0x00020000>;
+def IsStructStore : FlagType<0x00040000>;
+def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
+def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
+def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
+def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
+def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
+def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
+def IsByteIndexed : FlagType<0x01000000>;
+def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
+def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
+def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
+def IsGatherPrefetch : FlagType<0x10000000>;
+def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
+def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
+def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
+def IsTupleCreate : FlagType<0x100000000>;
+def IsTupleGet : FlagType<0x200000000>;
+def IsTupleSet : FlagType<0x400000000>;
+def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
+def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
+def IsStreaming : FlagType<0x2000000000>;
+def IsStreamingCompatible : FlagType<0x4000000000>;
+def IsSharedZA : FlagType<0x8000000000>;
+def IsPreservesZA : FlagType<0x10000000000>;
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+class ImmCheckType<int val> {
+ int Value = val;
+}
+def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16 : ImmCheckType<1>; // 1..16
+def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7 : ImmCheckType<6>; // 0..7
+def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
+def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
+def ImmCheck0_13 : ImmCheckType<12>; // 0..13
+def ImmCheck0_1 : ImmCheckType<13>; // 0..1
+def ImmCheck0_2 : ImmCheckType<14>; // 0..2
+def ImmCheck0_3 : ImmCheckType<15>; // 0..3
+def ImmCheck0_0 : ImmCheckType<16>; // 0..0
+def ImmCheck0_15 : ImmCheckType<17>; // 0..15
+
+class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+ int Arg = arg;
+ int EltSizeArg = eltSizeArg;
+ ImmCheckType Kind = kind;
+}
+
+class Inst<string n, string p, string t, MergeType mt, string i,
+ list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
+ string Name = n;
+ string Prototype = p;
+ string Types = t;
+ string TargetGuard = "sve";
+ int Merge = mt.Value;
+ string MergeSuffix = mt.Suffix;
+ string LLVMIntrinsic = i;
+ list<FlagType> Flags = ft;
+ list<ImmCheck> ImmChecks = ch;
+ int MemEltType = met.Value;
+}
+
+// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8")
+class SInst<string n, string p, string t, MergeType mt, string i = "",
+ list<FlagType> ft = [], list<ImmCheck> ch = []>
+ : Inst<n, p, t, mt, i, ft, ch, MemEltTyDefault> {
+}
+
+// MInst: Instructions which access memory
+class MInst<string n, string p, string t, list<FlagType> f,
+ MemEltType met = MemEltTyDefault, string i = "",
+ list<ImmCheck> ch = []>
+ : Inst<n, p, t, MergeNone, i, f, ch, met> {
+}
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index ea9995fbe82ee..6de1728b1e50c 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -37,6 +37,12 @@ static constexpr Builtin::Info BuiltinInfo[] = {
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#include "clang/Basic/BuiltinsSVE.def"
+#define BUILTIN(ID, TYPE, ATTRS) \
+ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
+ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#include "clang/Basic/BuiltinsSME.def"
+
#define BUILTIN(ID, TYPE, ATTRS) \
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \
@@ -772,16 +778,19 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
if (Feature == "+sme") {
HasSME = true;
HasBFloat16 = true;
+ HasFullFP16 = true;
}
if (Feature == "+sme-f64f64") {
HasSME = true;
HasSMEF64F64 = true;
HasBFloat16 = true;
+ HasFullFP16 = true;
}
if (Feature == "+sme-i16i64") {
HasSME = true;
HasSMEI16I64 = true;
HasBFloat16 = true;
+ HasFullFP16 = true;
}
if (Feature == "+sb")
HasSB = true;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c4f1a436fef1b..a0bb35d098a59 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6759,11 +6759,29 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
#undef SVEMAP1
#undef SVEMAP2
+#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
+ { \
+ #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
+ TypeModifier \
+ }
+
+#define SMEMAP2(NameBase, TypeModifier) \
+ { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
+static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
+#define GET_SME_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sme_builtin_cg.inc"
+#undef GET_SME_LLVM_INTRINSIC_MAP
+};
+
+#undef SMEMAP1
+#undef SMEMAP2
+
static bool NEONSIMDIntrinsicsProvenSorted = false;
static bool AArch64SIMDIntrinsicsProvenSorted = false;
static bool AArch64SISDIntrinsicsProvenSorted = false;
static bool AArch64SVEIntrinsicsProvenSorted = false;
+static bool AArch64SMEIntrinsicsProvenSorted = false;
static const ARMVectorIntrinsicInfo *
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
@@ -8912,6 +8930,8 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
return Builder.getInt32Ty();
case SVETypeFlags::EltTyInt64:
return Builder.getInt64Ty();
+ case SVETypeFlags::EltTyInt128:
+ return Builder.getInt128Ty();
case SVETypeFlags::EltTyFloat16:
return Builder.getHalfTy();
@@ -9030,6 +9050,7 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
switch (VTy->getMinNumElements()) {
default:
llvm_unreachable("unsupported element count!");
+ case 1:
case 2:
case 4:
case 8:
@@ -9391,6 +9412,41 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
return Store;
}
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+ llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false);
+ return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
+Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags,
+ SmallVectorImpl<Value *> &Ops,
+ unsigned IntID) {
+ Ops[3] = EmitSVEPredicateCast(
+ Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
+
+ SmallVector<Value *> NewOps;
+ NewOps.push_back(Ops[3]);
+
+ llvm::Value *BasePtr = Ops[4];
+
+ // If the intrinsic contains the vnum parameter, multiply it with the vector
+ // size in bytes.
+ if (Ops.size() == 6) {
+ Function *StreamingVectorLength =
+ CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+ llvm::Value *StreamingVectorLengthCall =
+ Builder.CreateCall(StreamingVectorLength);
+ llvm::Value *Mulvl =
+ Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl");
+ // The type of the ptr parameter is void *, so use Int8Ty here.
+ BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl);
+ }
+ NewOps.push_back(BasePtr);
+ NewOps.push_back(Ops[0]);
+ NewOps.push_back(EmitTileslice(Ops[2], Ops[1]));
+ Function *F = CGM.getIntrinsic(IntID);
+ return Builder.CreateCall(F, NewOps);
+}
+
// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
@@ -9817,6 +9873,43 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return nullptr;
}
+Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ // Find out if any arguments are required to be integer constant expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+ llvm::SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ if ((ICEArguments & (1 << i)) == 0)
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ else {
+ // If this is required to be a constant, constant fold it so that we know
+ // that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(i)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+
+ // Immediates for SVE llvm intrinsics are always 32bit. We can safely
+ // truncate because the immediate has been range checked and no valid
+ // immediate requires more than a handful of bits.
+ *Result = Result->extOrTrunc(32);
+ Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
+ }
+ }
+
+ auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
+ AArch64SMEIntrinsicsProvenSorted);
+ SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ if (TypeFlags.isLoad() || TypeFlags.isStore())
+ return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+
+ /// Should not happen
+ return nullptr;
+}
+
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
@@ -9824,6 +9917,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID <= clang::AArch64::LastSVEBuiltin)
return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
+ if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
+ BuiltinID <= clang::AArch64::LastSMEBuiltin)
+ return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
+
unsigned HintID = static_cast<unsigned>(-1);
switch (BuiltinID) {
default: break;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e9ad5e7551e3e..0f720596165ff 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4245,6 +4245,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
+ llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base);
llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
@@ -4259,6 +4260,11 @@ class CodeGenFunction : public CodeGenTypeCache {
unsigned IntID);
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags,
+ llvm::SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm::Triple::ArchType Arch);
llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 33168d09f0f39..d41443f0b285c 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -340,6 +340,8 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
# Generate arm_sve.h
clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h)
+ # Generate arm_sme_draft_spec_subject_to_change.h
+ clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme_draft_spec_subject_to_change.h)
# Generate arm_bf16.h
clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h)
# Generate arm_mve.h
@@ -360,6 +362,7 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
list(APPEND aarch64_only_generated_files
"${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h"
+ "${CMAKE_CURRENT_BINARY_DIR}/arm_sme_draft_spec_subject_to_change.h"
"${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h"
)
endif()
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c02f4f5a5269b..d94e1d0beeaef 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2879,6 +2879,9 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
#define GET_SVE_IMMEDIATE_CHECK
#include "clang/Basic/arm_sve_sema_rangechecks.inc"
#undef GET_SVE_IMMEDIATE_CHECK
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
}
// Perform all the immediate checks for this builtin call.
@@ -2984,6 +2987,14 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
HasError = true;
break;
+ case SVETypeFlags::ImmCheck0_0:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_15:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
+ HasError = true;
+ break;
}
}
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 562fba190f1a4..4da5a370f949f 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5772,6 +5772,14 @@ static bool ArmSveAliasValid(ASTContext &Context, unsigned BuiltinID,
BuiltinID <= AArch64::LastSVEBuiltin;
}
+static bool ArmSmeAliasValid(ASTContext &Context, unsigned BuiltinID,
+ StringRef AliasName) {
+ if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+ BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
+ return BuiltinID >= AArch64::FirstSMEBuiltin &&
+ BuiltinID <= AArch64::LastSMEBuiltin;
+}
+
static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.isArgIdent(0)) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
@@ -5784,7 +5792,8 @@ static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
StringRef AliasName = cast<FunctionDecl>(D)->getIdentifier()->getName();
bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
- if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
+ if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName) &&
+ !ArmSmeAliasValid(S.Context, BuiltinID, AliasName)) ||
(!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
new file mode 100644
index 0000000000000..c309bde627f7d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
@@ -0,0 +1,148 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef DISABLE_SME_ATTRIBUTES
+#define ARM_STREAMING_ATTR
+#else
+#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
+#endif
+
+// CHECK-C-LABEL: @test_svld1_hor_za8(
+// CHECK-CXX-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za8(0, slice_base, 0, pg, ptr);
+ svld1_hor_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za16(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za16ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za16(0, slice_base, 0, pg, ptr);
+ svld1_hor_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za32(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za32ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za32(0, slice_base, 0, pg, ptr);
+ svld1_hor_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za64(
+// CHECK-CXX-LABEL: @_Z19test_svld1_hor_za64ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za64(0, slice_base, 0, pg, ptr);
+ svld1_hor_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_za128(
+// CHECK-CXX-LABEL: @_Z20test_svld1_hor_za128ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_hor_za128(0, slice_base, 0, pg, ptr);
+ svld1_hor_za128(15, slice_base, 0, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za8(
+// CHECK-CXX-LABEL: @_Z18test_svld1_ver_za8ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za8(0, slice_base, 0, pg, ptr);
+ svld1_ver_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za16(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za16ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za16(0, slice_base, 0, pg, ptr);
+ svld1_ver_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za32(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za32ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za32(0, slice_base, 0, pg, ptr);
+ svld1_ver_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za64(
+// CHECK-CXX-LABEL: @_Z19test_svld1_ver_za64ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za64(0, slice_base, 0, pg, ptr);
+ svld1_ver_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_za128(
+// CHECK-CXX-LABEL: @_Z20test_svld1_ver_za128ju10__SVBool_tPKv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+ svld1_ver_za128(0, slice_base, 0, pg, ptr);
+ svld1_ver_za128(15, slice_base, 0, pg, ptr);
+}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
new file mode 100644
index 0000000000000..6c80ef55f8189
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
@@ -0,0 +1,178 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef DISABLE_SME_ATTRIBUTES
+#define ARM_STREAMING_ATTR
+#else
+#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
+#endif
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_hor_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_hor_za8(
+// CHECK-CXX-LABEL: @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svld1_ver_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+ svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
new file mode 100644
index 0000000000000..067745f7d4a05
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
@@ -0,0 +1,148 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef DISABLE_SME_ATTRIBUTES
+#define ARM_STREAMING_ATTR
+#else
+#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
+#endif
+
+// CHECK-C-LABEL: @test_svst1_hor_za8(
+// CHECK-CXX-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za8(0, slice_base, 0, pg, ptr);
+ svst1_hor_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za16(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za16ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za16(0, slice_base, 0, pg, ptr);
+ svst1_hor_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za32(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za32ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za32(0, slice_base, 0, pg, ptr);
+ svst1_hor_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za64(
+// CHECK-CXX-LABEL: @_Z19test_svst1_hor_za64ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za64(0, slice_base, 0, pg, ptr);
+ svst1_hor_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_za128(
+// CHECK-CXX-LABEL: @_Z20test_svst1_hor_za128ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_hor_za128(0, slice_base, 0, pg, ptr);
+ svst1_hor_za128(15, slice_base, 0, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za8(
+// CHECK-CXX-LABEL: @_Z18test_svst1_ver_za8ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[PTR]], i32 0, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za8(0, slice_base, 0, pg, ptr);
+ svst1_ver_za8(0, slice_base, 15, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za16(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za16ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 1, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za16(0, slice_base, 0, pg, ptr);
+ svst1_ver_za16(1, slice_base, 7, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za32(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za32ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 3, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za32(0, slice_base, 0, pg, ptr);
+ svst1_ver_za32(3, slice_base, 3, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za64(
+// CHECK-CXX-LABEL: @_Z19test_svst1_ver_za64ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE1:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 7, i32 [[TILESLICE1]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za64(0, slice_base, 0, pg, ptr);
+ svst1_ver_za64(7, slice_base, 1, pg, ptr);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_za128(
+// CHECK-CXX-LABEL: @_Z20test_svst1_ver_za128ju10__SVBool_tPv(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR:%.*]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+ svst1_ver_za128(0, slice_base, 0, pg, ptr);
+ svst1_ver_za128(15, slice_base, 0, pg, ptr);
+}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
new file mode 100644
index 0000000000000..4af93ac38dcca
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
@@ -0,0 +1,178 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef DISABLE_SME_ATTRIBUTES
+#define ARM_STREAMING_ATTR
+#else
+#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
+#endif
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY:ptr|i8\*]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_hor_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svst1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za8(
+// CHECK-CXX-LABEL: @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG:%.*]], [[PTRTY]] [[TMP1]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], [[PTRTY]] [[TMP1]], i32 0, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za16(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 7
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 1, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za32(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 3
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 3, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za64(
+// CHECK-CXX-LABEL: @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: [[TILESLICE2:%.*]] = add i32 [[SLICE_BASE]], 1
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 7, i32 [[TILESLICE2]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum);
+}
+
+// CHECK-C-LABEL: @test_svst1_ver_vnum_za128(
+// CHECK-CXX-LABEL: @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPvl(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]]
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, [[PTRTY]] [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 0, i32 [[SLICE_BASE:%.*]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], [[PTRTY]] [[TMP2]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-NEXT: ret void
+//
+ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+ svst1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum);
+ svst1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum);
+}
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
new file mode 100644
index 0000000000000..ccb0fec8d4c0f
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
@@ -0,0 +1,131 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
+// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fsyntax-only -verify -verify-ignore-unexpected=error %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef DISABLE_SME_ATTRIBUTES
+#define ARM_STREAMING_ATTR
+#else
+#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
+#endif
+
+ARM_STREAMING_ATTR
+void test_range_0_0(svbool_t pg, void *ptr) {
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_za8,,,)(1, -1, 15, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_za128,,,)(0, -1, -1, pg, ptr);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_za128,,,)(15, -1, 1, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(-1, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(1, -1, 15, pg, ptr, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(0, -1, -1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 1 is outside the valid range [0, 0]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(15, -1, 1, pg, ptr, 1);
+}
+
+ARM_STREAMING_ATTR
+void test_range_0_1(svbool_t pg, void *ptr) {
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_za16,,,)(2, -1, 7, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_za64,,,)(0, -1, -1, pg, ptr);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(7, -1, 2, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(-1, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(2, -1, 7, pg, ptr, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(0, -1, -1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 2 is outside the valid range [0, 1]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(7, -1, 2, pg, ptr, 1);
+}
+
+ARM_STREAMING_ATTR
+void test_range_0_3(svbool_t pg, void *ptr) {
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_za32,,,)(4, -1, 3, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_za32,,,)(0, -1, -1, pg, ptr);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_za32,,,)(3, -1, 4, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(-1, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(4, -1, 3, pg, ptr, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(0, -1, -1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 4 is outside the valid range [0, 3]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(3, -1, 4, pg, ptr, 1);
+}
+
+ARM_STREAMING_ATTR
+void test_range_0_7(svbool_t pg, void *ptr) {
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(8, -1, 1, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_za16,,,)(0, -1, -1, pg, ptr);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_za16,,,)(1, -1, 8, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(-1, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(8, -1, 1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(0, -1, -1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 8 is outside the valid range [0, 7]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(1, -1, 8, pg, ptr, 1);
+}
+
+ARM_STREAMING_ATTR
+void test_range_0_15(svbool_t pg, void *ptr) {
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_za128,,,)(16, -1, 0, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(0, -1, -1, pg, ptr);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_za8,,,)(0, -1, 16, pg, ptr);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(-1, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, -1, 0, pg, ptr, 1);
+ // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(0, -1, -1, pg, ptr, 1);
+ // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(0, -1, 16, pg, ptr, 1);
+}
+
+ARM_STREAMING_ATTR
+void test_constant(uint64_t u64, svbool_t pg, void *ptr) {
+ SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}}
+ SVE_ACLE_FUNC(svld1_ver_za16,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svld1_ver_za16' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_ver_za64,,,)(0, u64, u64, pg, ptr); // expected-error {{argument to 'svst1_ver_za64' must be a constant integer}}
+ SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}}
+ SVE_ACLE_FUNC(svld1_ver_vnum_za16,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svld1_ver_vnum_za16' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}}
+ SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svst1_ver_vnum_za64' must be a constant integer}}
+}
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
new file mode 100644
index 0000000000000..b384244ac6c6a
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fsyntax-only -verify -emit-llvm -o - %s
+// REQUIRES: aarch64-registered-target
+
+// Test that functions with the correct target attributes can use the correct SME intrinsics.
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+__attribute__((target("sme")))
+void test_sme(svbool_t pg, void *ptr) {
+ svld1_hor_za8(0, 0, 0, pg, ptr);
+}
+
+__attribute__((target("arch=armv8-a+sme")))
+void test_arch_sme(svbool_t pg, void *ptr) {
+ svld1_hor_vnum_za32(0, 0, 0, pg, ptr, 0);
+}
+
+__attribute__((target("+sme")))
+void test_plus_sme(svbool_t pg, void *ptr) {
+ svst1_ver_za16(0, 0, 0, pg, ptr);
+}
+
+void undefined(svbool_t pg, void *ptr) {
+ svst1_ver_vnum_za64(0, 0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}}
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index d7f1e5af4db26..55a298eebfcf9 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -228,7 +228,7 @@ class Intrinsic {
}
/// Emits the intrinsic declaration to the ostream.
- void emitIntrinsic(raw_ostream &OS) const;
+ void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const;
private:
std::string getMergeSuffix() const { return MergeSuffix; }
@@ -347,8 +347,21 @@ class SVEEmitter {
/// Create the SVETypeFlags used in CGBuiltins
void createTypeFlags(raw_ostream &o);
+ /// Emit arm_sme.h.
+ void createSMEHeader(raw_ostream &o);
+
+ /// Emit all the SME __builtin prototypes and code needed by Sema.
+ void createSMEBuiltins(raw_ostream &o);
+
+ /// Emit all the information needed to map builtin -> LLVM IR intrinsic.
+ void createSMECodeGenMap(raw_ostream &o);
+
+ /// Emit all the range checks for the immediates.
+ void createSMERangeChecks(raw_ostream &o);
+
/// Create intrinsic and add it to \p Out
- void createIntrinsic(Record *R, SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
+ void createIntrinsic(Record *R,
+ SmallVectorImpl<std::unique_ptr<Intrinsic>> &Out);
};
} // end anonymous namespace
@@ -481,6 +494,9 @@ void SVEType::applyTypespec() {
case 'l':
ElementBitwidth = 64;
break;
+ case 'q':
+ ElementBitwidth = 128;
+ break;
case 'h':
Float = true;
ElementBitwidth = 16;
@@ -758,6 +774,11 @@ void SVEType::applyModifier(char Mod) {
NumVectors = 0;
Signed = true;
break;
+ case '%':
+ Pointer = true;
+ Void = true;
+ NumVectors = 0;
+ break;
case 'A':
Pointer = true;
ElementBitwidth = Bitwidth = 8;
@@ -919,15 +940,29 @@ std::string Intrinsic::mangleName(ClassKind LocalCK) const {
getMergeSuffix();
}
-void Intrinsic::emitIntrinsic(raw_ostream &OS) const {
+void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const {
bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1;
std::string FullName = mangleName(ClassS);
std::string ProtoName = mangleName(getClassKind());
+ std::string SMEAttrs = "";
+
+ if (Flags & Emitter.getEnumValueForFlag("IsStreaming"))
+ SMEAttrs += ", arm_streaming";
+ if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible"))
+ SMEAttrs += ", arm_streaming_compatible";
+ if (Flags & Emitter.getEnumValueForFlag("IsSharedZA"))
+ SMEAttrs += ", arm_shared_za";
+ if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA"))
+ SMEAttrs += ", arm_preserves_za";
OS << (IsOverloaded ? "__aio " : "__ai ")
<< "__attribute__((__clang_arm_builtin_alias("
- << "__builtin_sve_" << FullName << ")))\n";
+ << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_")
+ << FullName << ")";
+ if (!SMEAttrs.empty())
+ OS << SMEAttrs;
+ OS << "))\n";
OS << getTypes()[0].str() << " " << ProtoName << "(";
for (unsigned I = 0; I < getTypes().size() - 1; ++I) {
@@ -984,6 +1019,8 @@ uint64_t SVEEmitter::encodeTypeFlags(const SVEType &T) {
return encodeEltType("EltTyInt32");
case 64:
return encodeEltType("EltTyInt64");
+ case 128:
+ return encodeEltType("EltTyInt128");
default:
llvm_unreachable("Unhandled integer element bitwidth!");
}
@@ -1228,7 +1265,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
// Actually emit the intrinsic declarations.
for (auto &I : Defs)
- I->emitIntrinsic(OS);
+ I->emitIntrinsic(OS, *this);
OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n";
OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n";
@@ -1377,6 +1414,165 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createSMEHeader(raw_ostream &OS) {
+ OS << "/*===---- arm_sme_draft_spec_subject_to_change.h - ARM SME intrinsics "
+ "------===\n"
+ " *\n"
+ " *\n"
+ " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+ "Exceptions.\n"
+ " * See https://llvm.org/LICENSE.txt for license information.\n"
+ " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ " *\n"
+ " *===-----------------------------------------------------------------"
+ "------===\n"
+ " */\n\n";
+
+ OS << "#ifndef __ARM_SME_H\n";
+ OS << "#define __ARM_SME_H\n\n";
+
+ OS << "#if !defined(__LITTLE_ENDIAN__)\n";
+ OS << "#error \"Big endian is currently not supported for arm_sme_draft_spec_subject_to_change.h\"\n";
+ OS << "#endif\n";
+
+ OS << "#include <arm_sve.h> \n\n";
+
+ OS << "/* Function attributes */\n";
+ OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
+ "__nodebug__))\n\n";
+
+ OS << "#ifdef __cplusplus\n";
+ OS << "extern \"C\" {\n";
+ OS << "#endif\n\n";
+
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // Sort intrinsics in header file by following order/priority similar to SVE:
+ // - Architectural guard
+ // - Class (is intrinsic overloaded or not)
+ // - Intrinsic name
+ std::stable_sort(Defs.begin(), Defs.end(),
+ [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ auto ToTuple = [](const std::unique_ptr<Intrinsic> &I) {
+ return std::make_tuple(I->getGuard(),
+ (unsigned)I->getClassKind(),
+ I->getName());
+ };
+ return ToTuple(A) < ToTuple(B);
+ });
+
+ // Actually emit the intrinsic declaration.
+ for (auto &I : Defs) {
+ I->emitIntrinsic(OS, *this);
+ }
+
+ OS << "#ifdef __cplusplus\n";
+ OS << "} // extern \"C\"\n";
+ OS << "#endif\n\n";
+ OS << "#undef __ai\n\n";
+ OS << "#endif /* __ARM_SME_H */\n";
+}
+
+void SVEEmitter::createSMEBuiltins(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_BUILTINS\n";
+ for (auto &Def : Defs) {
+ // Only create BUILTINs for non-overloaded intrinsics, as overloaded
+ // declarations only live in the header file.
+ if (Def->getClassKind() != ClassG)
+ OS << "TARGET_BUILTIN(__builtin_sme_" << Def->getMangledName() << ", \""
+ << Def->getBuiltinTypeStr() << "\", \"n\", \"" << Def->getGuard()
+ << "\")\n";
+ }
+
+ OS << "#endif\n\n";
+}
+
+void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ OS << "#ifdef GET_SME_LLVM_INTRINSIC_MAP\n";
+ for (auto &Def : Defs) {
+ // Builtins only exist for non-overloaded intrinsics, overloaded
+ // declarations only live in the header file.
+ if (Def->getClassKind() == ClassG)
+ continue;
+
+ uint64_t Flags = Def->getFlags();
+ auto FlagString = std::to_string(Flags);
+
+ std::string LLVMName = Def->getLLVMName();
+ std::string Builtin = Def->getMangledName();
+ if (!LLVMName.empty())
+ OS << "SMEMAP1(" << Builtin << ", " << LLVMName << ", " << FlagString
+ << "),\n";
+ else
+ OS << "SMEMAP2(" << Builtin << ", " << FlagString << "),\n";
+ }
+ OS << "#endif\n\n";
+}
+
+void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV) {
+ createIntrinsic(R, Defs);
+ }
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+
+ OS << "#ifdef GET_SME_IMMEDIATE_CHECK\n";
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
+ Def->getImmChecks().empty())
+ continue;
+
+ OS << "case SME::BI__builtin_sme_" << Def->getMangledName() << ":\n";
+ for (auto &Check : Def->getImmChecks())
+ OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", "
+ << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n";
+ OS << " break;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
namespace clang {
void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createHeader(OS);
@@ -1398,4 +1594,19 @@ void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createTypeFlags(OS);
}
+void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMEHeader(OS);
+}
+
+void EmitSmeBuiltins(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMEBuiltins(OS);
+}
+
+void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMECodeGenMap(OS);
+}
+
+void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createSMERangeChecks(OS);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 2fc6639d832f0..1ce8d6b046118 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -82,6 +82,10 @@ enum ActionType {
GenArmSveBuiltinCG,
GenArmSveTypeFlags,
GenArmSveRangeChecks,
+ GenArmSmeHeader,
+ GenArmSmeBuiltins,
+ GenArmSmeBuiltinCG,
+ GenArmSmeRangeChecks,
GenArmCdeHeader,
GenArmCdeBuiltinDef,
GenArmCdeBuiltinSema,
@@ -226,6 +230,14 @@ cl::opt<ActionType> Action(
"Generate arm_sve_typeflags.inc for clang"),
clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks",
"Generate arm_sve_sema_rangechecks.inc for clang"),
+ clEnumValN(GenArmSmeHeader, "gen-arm-sme-header",
+ "Generate arm_sme.h for clang"),
+ clEnumValN(GenArmSmeBuiltins, "gen-arm-sme-builtins",
+ "Generate arm_sme_builtins.inc for clang"),
+ clEnumValN(GenArmSmeBuiltinCG, "gen-arm-sme-builtin-codegen",
+ "Generate arm_sme_builtin_cg_map.inc for clang"),
+ clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks",
+ "Generate arm_sme_sema_rangechecks.inc for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
@@ -454,6 +466,18 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSveRangeChecks:
EmitSveRangeChecks(Records, OS);
break;
+ case GenArmSmeHeader:
+ EmitSmeHeader(Records, OS);
+ break;
+ case GenArmSmeBuiltins:
+ EmitSmeBuiltins(Records, OS);
+ break;
+ case GenArmSmeBuiltinCG:
+ EmitSmeBuiltinCG(Records, OS);
+ break;
+ case GenArmSmeRangeChecks:
+ EmitSmeRangeChecks(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index f7df3dc0ebb67..9474c47a340ac 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -101,6 +101,11 @@ void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
More information about the cfe-commits
mailing list