[clang] [AArch64][Clang] Update untyped sme intrinsics with fp8 variants (PR #124543)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 27 05:50:28 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (Lukacma)
<details>
<summary>Changes</summary>
This patch adds fp8 variants to the following untyped SME intrinsics based on [ACLE](https://github.com/ARM-software/acle/blob/main/main/acle.md):
```
svint8_t svread_hor_za8[_s8]_m(svint8_t zd, svbool_t pg,
uint64_t tile, uint32_t slice)
svint8_t svread_hor_za128[_s8]_m(svint8_t zd, svbool_t pg,
uint64_t tile, uint32_t slice)
void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
svint8_t zn)
void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg,
svint8_t zn)
svint8_t svluti2_lane_zt_s8(uint64_t zt, svuint8_t zn, uint64_t imm_idx)
svint8x2_t svluti2_lane_zt_s8_x2(uint64_t zt, svuint8_t zn,
svint8x4_t svluti2_lane_zt_s8_x4(uint64_t zt, svuint8_t zn,
svint8_t svluti4_lane_zt_s8(uint64_t zt, svuint8_t zn, uint64_t imm_idx)
svint16x4_t svluti4_lane_zt_s16_x4(uint64_t zt, svuint8_t zn,
svint8x2_t svread_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
svint8x4_t svread_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
svint8x2_t svread_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
svint8x4_t svread_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
svint8x2_t svread_za8_s8_vg1x2(uint32_t slice)
svint8x4_t svread_za8_s8_vg1x4(uint32_t slice)
void svwrite_hor_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
void svwrite_hor_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
void svwrite_ver_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
void svwrite_ver_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
void svwrite_za8[_s8]_vg1x2(uint32_t slice, svint8x2_t zn)
void svwrite_za8[_s8]_vg1x4(uint32_t slice, svint8x4_t zn)
svuint8x2_t svsel[_u8_x2](svcount_t png, svuint8x2_t zn, svuint8x2_t zm)
svuint8x4_t svsel[_u8_x4](svcount_t png, svuint8x4_t zn, svuint8x4_t zm)
svint8x2_t svzip[_s8_x2](svint8x2_t zn) __arm_streaming;
svint8x4_t svzip[_s8_x4](svint8x4_t zn) __arm_streaming;
svint8x2_t svzipq[_s8_x2](svint8x2_t zn) __arm_streaming;
svint8x4_t svzipq[_s8_x4](svint8x4_t zn) __arm_streaming;
svint8x2_t svuzp[_s8_x2](svint8x2_t zn) __arm_streaming;
svint8x4_t svuzp[_s8_x4](svint8x4_t zn) __arm_streaming;
svint8x2_t svuzpq[_s8_x2](svint8x2_t zn) __arm_streaming;
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
svuint8_t svrevd[_u8]_m(svuint8_t zd, svbool_t pg, svuint8_t zn);
svuint8_t svrevd[_u8]_z(svbool_t pg, svuint8_t zn);
svuint8_t svrevd[_u8]_x(svbool_t pg, svuint8_t zn);
```
---
Patch is 120.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124543.diff
19 Files Affected:
- (modified) clang/include/clang/Basic/arm_sme.td (+16-16)
- (modified) clang/include/clang/Basic/arm_sve.td (+12-12)
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c (+134)
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_write.c (+135)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c (+13)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c (+14)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c (+14)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c (+14)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c (+14)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c (+83-1)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c (+13)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c (+57-43)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c (+28)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c (+28)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c (+28)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c (+28)
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c (+74-4)
- (modified) clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c (+136)
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c (+42)
``````````diff
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 891ed9874bb3d0..51cab572ce357d 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -110,11 +110,11 @@ multiclass ZARead<string n_suffix, string t, string i_prefix, list<ImmCheck> ch>
}
}
-defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>;
+defm SVREAD_ZA8 : ZARead<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>;
defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>]>;
defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>]>;
-defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>;
+defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>;
////////////////////////////////////////////////////////////////////////////////
// Write horizontal/vertical ZA slices
@@ -131,11 +131,11 @@ multiclass ZAWrite<string n_suffix, string t, string i_prefix, list<ImmCheck> ch
}
}
-defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVWRITE_ZA8 : ZAWrite<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>;
+defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>;
////////////////////////////////////////////////////////////////////////////////
// SME - Zero
@@ -350,7 +350,7 @@ multiclass ZAWrite_VG<string n, string t, string i, list<ImmCheck> checks> {
}
let SMETargetGuard = "sme2" in {
- defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
+ defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
@@ -366,7 +366,7 @@ multiclass ZARead_VG<string n, string t, string i, list<ImmCheck> checks> {
}
let SMETargetGuard = "sme2" in {
- defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
+ defm SVREAD_ZA8 : ZARead_VG<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>;
@@ -722,7 +722,7 @@ def IN_STREAMING_MODE : Inst<"__arm_in_streaming_mode", "sv", "Pc", MergeNone,
// lookup table expand four contiguous registers
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>;
}
@@ -730,16 +730,16 @@ let SMETargetGuard = "sme2" in {
// lookup table expand one register
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
- def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
}
//
// lookup table expand two contiguous registers
//
let SMETargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
- def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
}
//
@@ -811,12 +811,12 @@ multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, li
}
}
-defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
@@ -834,15 +834,15 @@ multiclass ZAReadzSingle<string n_suffix, string t, string i_prefix, list<ImmChe
}
}
-defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
+defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>;
-defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
+defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlmbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>;
multiclass ZAReadzArray<string vg_num>{
let SMETargetGuard = "sme2p1" in {
- def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
+ def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUcm", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index e7001bac450e89..12cca992a21ab7 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2104,7 +2104,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>;
-defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
+defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlmbhfd", "aarch64_sve_revd">;
}
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2223,8 +2223,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
// 2-way and 4-way selects
- def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>;
- def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>;
+ def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>;
+ def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>;
// SRSHL / URSHL
def SVSRSHL_SINGLE_X2 : SInst<"svrshl[_single_{d}_x2]", "22d", "csil", MergeNone, "aarch64_sve_srshl_single_x2", [IsStreaming], []>;
@@ -2402,15 +2402,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
//
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
- def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>;
- def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>;
- def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>;
- def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>;
-
- def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>;
- def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>;
- def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>;
- def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>;
+ def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>;
+ def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>;
+ def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>;
+ def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>;
+
+ def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>;
+ def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>;
+ def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>;
+ def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>;
}
//
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
index 508fad09ea715b..0605f9eef036fd 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c
@@ -318,6 +318,41 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_hor_za8_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za8_mf8_1u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za8_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ uint32_t slice = slice_base + 15;
+ return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice);
+}
+
// CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za16_f16(
// CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
@@ -754,6 +789,38 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za128_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za128_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z27test_svread_hor_za128_mf8_1u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_hor_za128_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 15, slice_base);
+}
+
// CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za128_f16(
// CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
@@ -1202,6 +1269,41 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-C-NEXT: entry:
+// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_ver_za8_mf8u13__SVMfloat8_tu10__SVBool_tj(
+// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT: entry:
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svread_ver_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") {
+ return SME_ACLE_FUNC(svread_ver_za8, _mf8, _m)(zd, pg, 0, slice_base);
+}
+
+// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8_1(
+// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124543
More information about the cfe-commits
mailing list