[clang] [llvm] [AARCH64] Add intrinsic support for new s/udot intrinsics (PR #189424)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 09:03:53 PDT 2026
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/189424
>From 4eef064266de835a8ff7079c4059db5cc5b38af1 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 30 Mar 2026 16:23:08 +0000
Subject: [PATCH 1/2] [AARCH64] Add intrinsic support for new fdot intrinsics
---
clang/include/clang/Basic/arm_sve.td | 8 +
.../sve2p3-intrinsics/acle_sve2p3_dot.c | 84 +++++++++
...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 58 +++++++
...sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c | 160 ++++++++++++++++++
.../acle_sve2p3_imm.cpp | 14 ++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 12 ++
.../CodeGen/AArch64/sve2p3-intrinsics-dots.ll | 46 +++++
8 files changed, 386 insertions(+), 4 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
create mode 100644 clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..336c83bfbcdf5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2476,3 +2476,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2p2" in {
def FMUL_X2 : SInst<"svmul[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_fmul_x2", [IsStreaming], []>;
def FMUL_X4 : SInst<"svmul[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_fmul_x4", [IsStreaming], []>;
}
+
+let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
+ def SVDOT_X2_SH : SInst<"svdot[_{d}_{2}]", "ddhh", "s", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>;
+ def SVDOT_X2_UH : SInst<"svdot[_{d}_{2}]", "ddhh", "Us", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>;
+
+ def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
+ def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
new file mode 100644
index 0000000000000..e32ec95f4b6c8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
@@ -0,0 +1,84 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svdot_s16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_s16_x2u11__SVInt16_tu10__SVInt8_tS0_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
+{
+ return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_u16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_u16_x2u12__SVUint16_tu11__SVUint8_tS0_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
+{
+ return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_lane_s16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_s16_x2u11__SVInt16_tu10__SVInt8_tS0_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
+{
+ return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7);
+}
+
+// CHECK-LABEL: @test_svdot_lane_u16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_u16_x2u12__SVUint16_tu11__SVUint8_tS0_(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
+{
+ return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7);
+}
diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
new file mode 100644
index 0000000000000..40750dbbb86c8
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -0,0 +1,58 @@
+// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
+
+void test(void) {
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+
+ svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+ svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+
+ svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+ svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+
+ svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+ svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+ svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+ svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+ svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
new file mode 100644
index 0000000000000..9c31ebde4f7f8
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
@@ -0,0 +1,160 @@
+// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve-aes2 -verify=guard
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +ssve-aes -target-feature +sve -target-feature +sve-aes2 -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,sve-aes2" streaming_guard="sme,sve-aes2,ssve-aes" flags="feature-dependent"
+
+void test(void) {
+ svuint8_t svuint8_t_val;
+ svuint8x2_t svuint8x2_t_val;
+ svuint8x4_t svuint8x4_t_val;
+ svuint64_t svuint64_t_val;
+ svuint64x2_t svuint64x2_t_val;
+ uint64_t uint64_t_val;
+
+ svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ svpmull_pair(svuint64_t_val, svuint64_t_val);
+ svpmull_pair(svuint64_t_val, uint64_t_val);
+ svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+ svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+ svuint8_t svuint8_t_val;
+ svuint8x2_t svuint8x2_t_val;
+ svuint8x4_t svuint8x4_t_val;
+ svuint64_t svuint64_t_val;
+ svuint64x2_t svuint64x2_t_val;
+ uint64_t uint64_t_val;
+
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair(svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair(svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+ svuint8_t svuint8_t_val;
+ svuint8x2_t svuint8x2_t_val;
+ svuint8x4_t svuint8x4_t_val;
+ svuint64_t svuint64_t_val;
+ svuint64x2_t svuint64x2_t_val;
+ uint64_t uint64_t_val;
+
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair(svuint64_t_val, svuint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair(svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+ // guard-error at +1 {{builtin can only be called from a non-streaming function}}
+ svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
new file mode 100644
index 0000000000000..e0004effa48da
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
@@ -0,0 +1,14 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s
+
+#include <arm_sve.h>
+
+void test_svdot_lane_x2_imm_0_7(svint16_t s16, svuint16_t u16, svint8_t s8,
+ svuint8_t u8) {
+ svdot_lane_s16_s8(s16, s8, s8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ svdot_lane_u16_u8(u16, u8, u8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+
+ svdot_lane_s16_s8(s16, s8, s8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ svdot_lane_u16_u8(u16, u8, u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c5a3bd504adf9..1255fbe73a5b7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4804,8 +4804,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
// SVE2 integer dot product
- def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;
- def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>;
+ defm SDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
+ defm UDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
def : Pat<(nxv8i16 (partial_reduce_umla nxv8i16:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
(UDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>;
@@ -4813,8 +4813,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
(SDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>;
// SVE2 integer indexed dot product
- def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">;
- def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
+ defm SDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>;
+ defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>;
// SVE2 fp convert, narrow and interleave to integer, rounding toward zero
defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8a3f52090ab4c..e411c221fe7f5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3821,6 +3821,12 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri
def : SVE_3_Op_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
}
+multiclass sve2p3_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intrinsic> {
+ def NAME : sve_intx_dot<0b01, 0b00000, u, mnemonic, ZPR16, ZPR8>;
+
+ def : SVE_3_Op_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Integer Dot Product Group - Indexed Group
//===----------------------------------------------------------------------===//
@@ -10015,6 +10021,12 @@ multiclass sve2p1_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intr
def : SVE_4_Op_Imm_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
+multiclass sve2p3_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intrinsic> {
+ def NAME : sve_intx_dot_by_indexed_elem_x<u, mnemonic>;
+
+ def : SVE_4_Op_Imm_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>;
+}
+
class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatternOperator op>
: I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd",
"", [(set pnrty:$PNd, (op))]>, Sched<[]> {
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll
new file mode 100644
index 0000000000000..4636ffb122d6b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -force-streaming < %s | FileCheck %s
+
+define <vscale x 8 x i16> @sdot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: sdot_x2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sdot z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @udot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: udot_x2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: udot z0.h, z1.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @sdot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: sdot_lane_x2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sdot z0.h, z1.b, z2.b[7]
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 7)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @udot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: udot_lane_x2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: udot z0.h, z1.b, z2.b[7]
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 7)
+ ret <vscale x 8 x i16> %out
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32)
>From eb9d580c6b394c9d8895f35c791996d3064ad6da Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 1 Apr 2026 16:03:15 +0000
Subject: [PATCH 2/2] Fix tests
---
.../AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
index e32ec95f4b6c8..8ad4fec2aae52 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
@@ -8,9 +8,7 @@
#include <arm_sve.h>
-#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
-#define ATTR __arm_streaming_compatible
-#elif defined(__ARM_FEATURE_SME)
+#if defined(__ARM_FEATURE_SME)
#define ATTR __arm_streaming
#else
#define ATTR
@@ -18,9 +16,9 @@
#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
#else
-#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
#endif
// CHECK-LABEL: @test_svdot_s16_x2(
@@ -35,7 +33,7 @@
//
svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
{
- return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3);
+ return SVE_ACLE_FUNC(svdot,_s16_s8)(op1, op2, op3);
}
// CHECK-LABEL: @test_svdot_u16_x2(
@@ -50,7 +48,7 @@ svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
//
svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
{
- return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3);
+ return SVE_ACLE_FUNC(svdot,_u16_u8)(op1, op2, op3);
}
// CHECK-LABEL: @test_svdot_lane_s16_x2(
@@ -65,7 +63,7 @@ svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
//
svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
{
- return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7);
+ return SVE_ACLE_FUNC(svdot_lane,_s16_s8)(op1, op2, op3, 7);
}
// CHECK-LABEL: @test_svdot_lane_u16_x2(
@@ -80,5 +78,5 @@ svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
//
svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
{
- return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7);
+ return SVE_ACLE_FUNC(svdot_lane,_u16_u8)(op1, op2, op3, 7);
}
More information about the llvm-commits
mailing list