[clang] [llvm] [AArch64] Add 9.7 CVT data processing intrinsics (PR #186807)
Martin Wehking via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 18 07:24:57 PDT 2026
https://github.com/MartinWehking updated https://github.com/llvm/llvm-project/pull/186807
>From 759eecd18b5ba07c1d1c7fc151cf2c824baffb1b Mon Sep 17 00:00:00 2001
From: Martin Wehking <martin.wehking at arm.com>
Date: Mon, 16 Mar 2026 11:01:10 +0000
Subject: [PATCH 1/4] [AArch64] Add 9.7 data processing intrinsics
Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn,
fcvtzun.
The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags.
ACLE Patch:
https://github.com/ARM-software/acle/pull/428
---
clang/include/clang/Basic/arm_sve.td | 27 ++
.../acle_sve2_fp_int_cvtn_x2.c | 105 ++++++++
.../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 189 +++++++++++++
llvm/include/llvm/IR/IntrinsicsAArch64.td | 33 +++
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 15 +-
.../AArch64/sve2p3-intrinsics-fp-converts.ll | 255 ++++++++++++++++++
.../sve2p3-intrinsics-fp-converts_x2.ll | 157 +++++++++++
8 files changed, 785 insertions(+), 8 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..852cc60c6e0b3 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -997,6 +997,33 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a
def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>;
}
+
+let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
+def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+
+def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+
+def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+
+def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+}
+
////////////////////////////////////////////////////////////////////////////////
// Permutations and selection
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
new file mode 100644
index 0000000000000..a4a7c58e1ced9
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
@@ -0,0 +1,105 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+// CHECK-LABEL: @test_svcvt_s8_f16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
+ return svcvt_s8_f16_x2(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_s16_f32_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
+ return svcvt_s16_f32_x2(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_s32_f64_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
+ return svcvt_s32_f64_x2(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_u8_f16_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
+ return svcvt_u8_f16_x2(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_u16_f32_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
+ return svcvt_u16_f32_x2(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_u32_f64_x2(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
+ return svcvt_u32_f64_x2(zn);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
new file mode 100644
index 0000000000000..6b7252e045e33
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
@@ -0,0 +1,189 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
+// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+//
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+// CHECK-LABEL: @test_svcvtb_f16_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
+ return svcvtb_f16_s8(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f32_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
+ return svcvtb_f32_s16(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f64_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
+ return svcvtb_f64_s32(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f16_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
+ return svcvtb_f16_u8(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f32_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
+ return svcvtb_f32_u16(zn);
+}
+
+// CHECK-LABEL: @test_svcvtb_f64_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
+ return svcvtb_f64_u32(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f16_s8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
+ return svcvtt_f16_s8(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f32_s16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
+ return svcvtt_f32_s16(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f64_s32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
+ return svcvtt_f64_s32(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f16_u8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
+ return svcvtt_f16_u8(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f32_u16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
+ return svcvtt_f32_u16(zn);
+}
+
+// CHECK-LABEL: @test_svcvt_f64_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
+// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR {
+ return svcvtt_f64_u32(zn);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 75929cbc222ad..d9f7314740953 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1051,6 +1051,7 @@ def llvm_nxv4i1_ty : LLVMType<nxv4i1>;
def llvm_nxv8i1_ty : LLVMType<nxv8i1>;
def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
+def llvm_nxv8i16_ty : LLVMType<nxv8i16>;
def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
@@ -2610,6 +2611,29 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
+//
+// SVE2 - Multi-vector narrowing convert to floating point
+//
+
+class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN>
+ : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>;
+
+def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
+def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
+def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
+def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
+def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
+def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
+def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
+def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
+def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
//
// SVE2 - Floating-point integer binary logarithm
//
@@ -3526,6 +3550,10 @@ let TargetPrefix = "aarch64" in {
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
+ class SVE2_CVT_VG2_Single_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, LLVMMatchType<1>],
+ [IntrNoMem]>;
//
// Multi-vector fused multiply-add/subtract
//
@@ -4053,6 +4081,11 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
+// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point
+
+def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic;
+def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic;
+
//
// FP8 Intrinsics
//
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 926593022b537..72a6f3bd49abe 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4776,14 +4776,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
// SVE2 fp convert, narrow and interleave to integer, rounding toward zero
- defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
- defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>;
+ defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn>;
+ defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun>;
// SVE2 signed/unsigned integer convert to floating-point
- defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>;
- defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>;
- defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>;
- defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>;
+ defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">;
+ defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, "int_aarch64_sve_scvtflt">;
+ defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, "int_aarch64_sve_ucvtfb">;
+ defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, "int_aarch64_sve_ucvtflt">;
// SVE2 saturating shift right narrow by immediate and interleave
defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8a3f52090ab4c..0958b3b665e32 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -11423,10 +11423,17 @@ class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp,
let Inst{4-0} = Zd;
}
-multiclass sve2_fp_to_int_downcvt<string asm, bit U> {
+multiclass sve2_fp_to_int_downcvt<string asm, bit U, SDPatternOperator op> {
def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>;
def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>;
def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>;
+
+ def : Pat<(nxv16i8 (op nxv8f16:$Zn1, nxv8f16:$Zn2)),
+ (!cast<Instruction>(NAME # _HtoB) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
+ def : Pat<(nxv8i16 (op nxv4f32:$Zn1, nxv4f32:$Zn2)),
+ (!cast<Instruction>(NAME # _StoH) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
+ def : Pat<(nxv4i32 (op nxv2f64:$Zn1, nxv2f64:$Zn2)),
+ (!cast<Instruction>(NAME # _DtoS) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
}
//===----------------------------------------------------------------------===//
@@ -11446,8 +11453,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC,
let Inst{4-0} = Zd;
}
-multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> {
+multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> {
def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>;
def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>;
def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>;
+
+ def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
+ def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), nxv8i16, !cast<Instruction>(NAME # _HtoS)>;
+ def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), nxv4i32, !cast<Instruction>(NAME # _StoD)>;
}
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
new file mode 100644
index 0000000000000..46778fc14b81f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR
+;
+; SVCVTB (SCVTFB / UCVTFB)
+;
+
+define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) {
+; SVE2P3-LABEL: scvtfb_f16_i8:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtf z0.h, z0.b
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtfb_f16_i8:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtf z0.h, z0.b
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtfb_f16_i8:
+; STR: // %bb.0:
+; STR-NEXT: scvtf z0.h, z0.b
+; STR-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) {
+; SVE2P3-LABEL: scvtfb_f32_i16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtf z0.s, z0.h
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtfb_f32_i16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtf z0.s, z0.h
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtfb_f32_i16:
+; STR: // %bb.0:
+; STR-NEXT: scvtf z0.s, z0.h
+; STR-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) {
+; SVE2P3-LABEL: scvtfb_f64_i32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtf z0.d, z0.s
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtfb_f64_i32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtf z0.d, z0.s
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtfb_f64_i32:
+; STR: // %bb.0:
+; STR-NEXT: scvtf z0.d, z0.s
+; STR-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) {
+; SVE2P3-LABEL: ucvtfb_f16_i8:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtf z0.h, z0.b
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtfb_f16_i8:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtf z0.h, z0.b
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtfb_f16_i8:
+; STR: // %bb.0:
+; STR-NEXT: ucvtf z0.h, z0.b
+; STR-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) {
+; SVE2P3-LABEL: ucvtfb_f32_i16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtf z0.s, z0.h
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtfb_f32_i16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtf z0.s, z0.h
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtfb_f32_i16:
+; STR: // %bb.0:
+; STR-NEXT: ucvtf z0.s, z0.h
+; STR-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) {
+; SVE2P3-LABEL: ucvtfb_f64_i32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtf z0.d, z0.s
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtfb_f64_i32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtf z0.d, z0.s
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtfb_f64_i32:
+; STR: // %bb.0:
+; STR-NEXT: ucvtf z0.d, z0.s
+; STR-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn)
+ ret <vscale x 2 x double> %res
+}
+
+;
+; SVCVTT (SCVTFLT / UCVTFLT)
+;
+
+define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) {
+; SVE2P3-LABEL: scvtflt_f16_i8:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtflt z0.h, z0.b
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtflt_f16_i8:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtflt z0.h, z0.b
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtflt_f16_i8:
+; STR: // %bb.0:
+; STR-NEXT: scvtflt z0.h, z0.b
+; STR-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) {
+; SVE2P3-LABEL: scvtflt_f32_i16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtflt z0.s, z0.h
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtflt_f32_i16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtflt z0.s, z0.h
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtflt_f32_i16:
+; STR: // %bb.0:
+; STR-NEXT: scvtflt z0.s, z0.h
+; STR-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) {
+; SVE2P3-LABEL: scvtflt_f64_i32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: scvtflt z0.d, z0.s
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: scvtflt_f64_i32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: scvtflt z0.d, z0.s
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: scvtflt_f64_i32:
+; STR: // %bb.0:
+; STR-NEXT: scvtflt z0.d, z0.s
+; STR-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn)
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) {
+; SVE2P3-LABEL: ucvtflt_f16_i8:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtflt z0.h, z0.b
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtflt_f16_i8:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtflt z0.h, z0.b
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtflt_f16_i8:
+; STR: // %bb.0:
+; STR-NEXT: ucvtflt z0.h, z0.b
+; STR-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) {
+; SVE2P3-LABEL: ucvtflt_f32_i16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtflt z0.s, z0.h
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtflt_f32_i16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtflt z0.s, z0.h
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtflt_f32_i16:
+; STR: // %bb.0:
+; STR-NEXT: ucvtflt z0.s, z0.h
+; STR-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) {
+; SVE2P3-LABEL: ucvtflt_f64_i32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: ucvtflt z0.d, z0.s
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: ucvtflt_f64_i32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: ucvtflt z0.d, z0.s
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: ucvtflt_f64_i32:
+; STR: // %bb.0:
+; STR-NEXT: ucvtflt z0.d, z0.s
+; STR-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn)
+ ret <vscale x 2 x double> %res
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
new file mode 100644
index 0000000000000..4c99a4c241318
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
@@ -0,0 +1,157 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR
+;
+; FCVTZSN
+;
+
+define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
+; SVE2P3-LABEL: fcvtzsn_i8_f16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzsn_i8_f16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzsn_i8_f16:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
+; STR-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; SVE2P3-LABEL: fcvtzsn_i16_f32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzsn_i16_f32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzsn_i16_f32:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
+; STR-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
+; SVE2P3-LABEL: fcvtzsn_i32_f64:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzsn_i32_f64:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzsn_i32_f64:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
+; STR-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
+ ret <vscale x 4 x i32> %res
+}
+
+;
+; FCVTZUN
+;
+
+define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
+; SVE2P3-LABEL: fcvtzun_i8_f16:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzun_i8_f16:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzun_i8_f16:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h }
+; STR-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
+ ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; SVE2P3-LABEL: fcvtzun_i16_f32:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzun_i16_f32:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzun_i16_f32:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s }
+; STR-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
+; SVE2P3-LABEL: fcvtzun_i32_f64:
+; SVE2P3: // %bb.0:
+; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d }
+; SVE2P3-NEXT: ret
+;
+; SME2P3-LABEL: fcvtzun_i32_f64:
+; SME2P3: // %bb.0:
+; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SME2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d }
+; SME2P3-NEXT: ret
+;
+; STR-LABEL: fcvtzun_i32_f64:
+; STR: // %bb.0:
+; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d }
+; STR-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
+ ret <vscale x 4 x i32> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double>, <vscale x 2 x double>)
>From 609c8fbb67451d8872a45d0efda13b5392d8f9b5 Mon Sep 17 00:00:00 2001
From: Martin Wehking <martin.wehking at arm.com>
Date: Wed, 18 Mar 2026 10:18:50 +0000
Subject: [PATCH 2/4] Fix overload and address comments
---
clang/include/clang/Basic/arm_sve.td | 36 +--
.../acle_sve2_fp_int_cvtn_x2.c | 49 ++--
...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 121 +++++++++
llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 +-
.../AArch64/sve2p3-intrinsics-fp-converts.ll | 237 ++++--------------
.../sve2p3-intrinsics-fp-converts_x2.ll | 19 +-
6 files changed, 233 insertions(+), 242 deletions(-)
create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 852cc60c6e0b3..c55a2d03f2037 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -999,29 +999,29 @@ def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "a
}
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
-def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
+def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F16_S8 : SInst<"svcvtt_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_S16 : SInst<"svcvtt_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_S32 : SInst<"svcvtt_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F16_U8 : SInst<"svcvtt_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_U16 : SInst<"svcvtt_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_U32 : SInst<"svcvtt_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F16_S8 : SInst<"svcvtb_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_S16 : SInst<"svcvtb_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_S32 : SInst<"svcvtb_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F16_U8 : SInst<"svcvtb_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_U16 : SInst<"svcvtb_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_U32 : SInst<"svcvtb_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
index a4a7c58e1ced9..e2cd71bd8b062 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
@@ -1,9 +1,13 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+//
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
@@ -20,86 +24,93 @@
#define MODE_ATTR
#endif
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
// CHECK-LABEL: @test_svcvt_s8_f16_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
- return svcvt_s8_f16_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_s8,_f16_x2)(zn);
}
// CHECK-LABEL: @test_svcvt_s16_f32_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
- return svcvt_s16_f32_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_s16,_f32_x2)(zn);
}
// CHECK-LABEL: @test_svcvt_s32_f64_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
- return svcvt_s32_f64_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_s32,_f64_x2)(zn);
}
// CHECK-LABEL: @test_svcvt_u8_f16_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
- return svcvt_u8_f16_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_u8,_f16_x2)(zn);
}
// CHECK-LABEL: @test_svcvt_u16_f32_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
- return svcvt_u16_f32_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_u16,_f32_x2)(zn);
}
// CHECK-LABEL: @test_svcvt_u32_f64_x2(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
- return svcvt_u32_f64_x2(zn);
+ return SVE_ACLE_FUNC(svcvt_u32,_f64_x2)(zn);
}
diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
new file mode 100644
index 0000000000000..76290675e3b93
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -0,0 +1,121 @@
+// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
+
+void test(void) {
+ svfloat16x2_t svfloat16x2_t_val;
+ svfloat32x2_t svfloat32x2_t_val;
+ svfloat64x2_t svfloat64x2_t_val;
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svint32_t svint32_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+ svuint32_t svuint32_t_val;
+
+ svcvt_s8(svfloat16x2_t_val);
+ svcvt_s8_f16_x2(svfloat16x2_t_val);
+ svcvt_s16(svfloat32x2_t_val);
+ svcvt_s16_f32_x2(svfloat32x2_t_val);
+ svcvt_s32(svfloat64x2_t_val);
+ svcvt_s32_f64_x2(svfloat64x2_t_val);
+ svcvt_u8(svfloat16x2_t_val);
+ svcvt_u8_f16_x2(svfloat16x2_t_val);
+ svcvt_u16(svfloat32x2_t_val);
+ svcvt_u16_f32_x2(svfloat32x2_t_val);
+ svcvt_u32(svfloat64x2_t_val);
+ svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16_s8(svint8_t_val);
+ svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32_s16(svint16_t_val);
+ svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64_s32(svint32_t_val);
+ svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16_s8(svint8_t_val);
+ svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32_s16(svint16_t_val);
+ svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64_s32(svint32_t_val);
+ svcvtt_f64_u32(svuint32_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+ svfloat16x2_t svfloat16x2_t_val;
+ svfloat32x2_t svfloat32x2_t_val;
+ svfloat64x2_t svfloat64x2_t_val;
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svint32_t svint32_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+ svuint32_t svuint32_t_val;
+
+ svcvt_s8(svfloat16x2_t_val);
+ svcvt_s8_f16_x2(svfloat16x2_t_val);
+ svcvt_s16(svfloat32x2_t_val);
+ svcvt_s16_f32_x2(svfloat32x2_t_val);
+ svcvt_s32(svfloat64x2_t_val);
+ svcvt_s32_f64_x2(svfloat64x2_t_val);
+ svcvt_u8(svfloat16x2_t_val);
+ svcvt_u8_f16_x2(svfloat16x2_t_val);
+ svcvt_u16(svfloat32x2_t_val);
+ svcvt_u16_f32_x2(svfloat32x2_t_val);
+ svcvt_u32(svfloat64x2_t_val);
+ svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16_s8(svint8_t_val);
+ svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32_s16(svint16_t_val);
+ svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64_s32(svint32_t_val);
+ svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16_s8(svint8_t_val);
+ svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32_s16(svint16_t_val);
+ svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64_s32(svint32_t_val);
+ svcvtt_f64_u32(svuint32_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+ svfloat16x2_t svfloat16x2_t_val;
+ svfloat32x2_t svfloat32x2_t_val;
+ svfloat64x2_t svfloat64x2_t_val;
+ svint8_t svint8_t_val;
+ svint16_t svint16_t_val;
+ svint32_t svint32_t_val;
+ svuint8_t svuint8_t_val;
+ svuint16_t svuint16_t_val;
+ svuint32_t svuint32_t_val;
+
+ svcvt_s8(svfloat16x2_t_val);
+ svcvt_s8_f16_x2(svfloat16x2_t_val);
+ svcvt_s16(svfloat32x2_t_val);
+ svcvt_s16_f32_x2(svfloat32x2_t_val);
+ svcvt_s32(svfloat64x2_t_val);
+ svcvt_s32_f64_x2(svfloat64x2_t_val);
+ svcvt_u8(svfloat16x2_t_val);
+ svcvt_u8_f16_x2(svfloat16x2_t_val);
+ svcvt_u16(svfloat32x2_t_val);
+ svcvt_u16_f32_x2(svfloat32x2_t_val);
+ svcvt_u32(svfloat64x2_t_val);
+ svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16_s8(svint8_t_val);
+ svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32_s16(svint16_t_val);
+ svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64_s32(svint32_t_val);
+ svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16_s8(svint8_t_val);
+ svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32_s16(svint16_t_val);
+ svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64_s32(svint32_t_val);
+ svcvtt_f64_u32(svuint32_t_val);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index d9f7314740953..29520c17a3950 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3550,7 +3550,7 @@ let TargetPrefix = "aarch64" in {
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
[IntrNoMem]>;
- class SVE2_CVT_VG2_Single_Intrinsic
+ class SVE2_CVT_VG2_Narrowing_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>],
[IntrNoMem]>;
@@ -4038,6 +4038,12 @@ let TargetPrefix = "aarch64" in {
LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
+
+ //
+ // SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point
+ //
+ def int_aarch64_sve_fcvtzsn_x2: SVE2_CVT_VG2_Narrowing_Intrinsic;
+ def int_aarch64_sve_fcvtzun_x2: SVE2_CVT_VG2_Narrowing_Intrinsic;
}
// SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
@@ -4081,11 +4087,6 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
-// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point
-
-def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic;
-def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic;
-
//
// FP8 Intrinsics
//
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
index 46778fc14b81f..b842571e1ef8e 100644
--- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll
@@ -1,121 +1,61 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s
;
; SVCVTB (SCVTFB / UCVTFB)
;
define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) {
-; SVE2P3-LABEL: scvtfb_f16_i8:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtf z0.h, z0.b
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtfb_f16_i8:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtf z0.h, z0.b
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtfb_f16_i8:
-; STR: // %bb.0:
-; STR-NEXT: scvtf z0.h, z0.b
-; STR-NEXT: ret
+; CHECK-LABEL: scvtfb_f16_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf z0.h, z0.b
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) {
-; SVE2P3-LABEL: scvtfb_f32_i16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtf z0.s, z0.h
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtfb_f32_i16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtf z0.s, z0.h
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtfb_f32_i16:
-; STR: // %bb.0:
-; STR-NEXT: scvtf z0.s, z0.h
-; STR-NEXT: ret
+; CHECK-LABEL: scvtfb_f32_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf z0.s, z0.h
+; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) {
-; SVE2P3-LABEL: scvtfb_f64_i32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtf z0.d, z0.s
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtfb_f64_i32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtf z0.d, z0.s
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtfb_f64_i32:
-; STR: // %bb.0:
-; STR-NEXT: scvtf z0.d, z0.s
-; STR-NEXT: ret
+; CHECK-LABEL: scvtfb_f64_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf z0.d, z0.s
+; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn)
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) {
-; SVE2P3-LABEL: ucvtfb_f16_i8:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtf z0.h, z0.b
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtfb_f16_i8:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtf z0.h, z0.b
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtfb_f16_i8:
-; STR: // %bb.0:
-; STR-NEXT: ucvtf z0.h, z0.b
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtfb_f16_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf z0.h, z0.b
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) {
-; SVE2P3-LABEL: ucvtfb_f32_i16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtf z0.s, z0.h
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtfb_f32_i16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtf z0.s, z0.h
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtfb_f32_i16:
-; STR: // %bb.0:
-; STR-NEXT: ucvtf z0.s, z0.h
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtfb_f32_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf z0.s, z0.h
+; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) {
-; SVE2P3-LABEL: ucvtfb_f64_i32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtf z0.d, z0.s
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtfb_f64_i32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtf z0.d, z0.s
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtfb_f64_i32:
-; STR: // %bb.0:
-; STR-NEXT: ucvtf z0.d, z0.s
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtfb_f64_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf z0.d, z0.s
+; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn)
ret <vscale x 2 x double> %res
}
@@ -125,131 +65,56 @@ define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) {
;
define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) {
-; SVE2P3-LABEL: scvtflt_f16_i8:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtflt z0.h, z0.b
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtflt_f16_i8:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtflt z0.h, z0.b
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtflt_f16_i8:
-; STR: // %bb.0:
-; STR-NEXT: scvtflt z0.h, z0.b
-; STR-NEXT: ret
+; CHECK-LABEL: scvtflt_f16_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtflt z0.h, z0.b
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) {
-; SVE2P3-LABEL: scvtflt_f32_i16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtflt z0.s, z0.h
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtflt_f32_i16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtflt z0.s, z0.h
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtflt_f32_i16:
-; STR: // %bb.0:
-; STR-NEXT: scvtflt z0.s, z0.h
-; STR-NEXT: ret
+; CHECK-LABEL: scvtflt_f32_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtflt z0.s, z0.h
+; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) {
-; SVE2P3-LABEL: scvtflt_f64_i32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: scvtflt z0.d, z0.s
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: scvtflt_f64_i32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: scvtflt z0.d, z0.s
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: scvtflt_f64_i32:
-; STR: // %bb.0:
-; STR-NEXT: scvtflt z0.d, z0.s
-; STR-NEXT: ret
+; CHECK-LABEL: scvtflt_f64_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtflt z0.d, z0.s
+; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn)
ret <vscale x 2 x double> %res
}
define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) {
-; SVE2P3-LABEL: ucvtflt_f16_i8:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtflt z0.h, z0.b
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtflt_f16_i8:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtflt z0.h, z0.b
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtflt_f16_i8:
-; STR: // %bb.0:
-; STR-NEXT: ucvtflt z0.h, z0.b
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtflt_f16_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtflt z0.h, z0.b
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) {
-; SVE2P3-LABEL: ucvtflt_f32_i16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtflt z0.s, z0.h
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtflt_f32_i16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtflt z0.s, z0.h
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtflt_f32_i16:
-; STR: // %bb.0:
-; STR-NEXT: ucvtflt z0.s, z0.h
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtflt_f32_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtflt z0.s, z0.h
+; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn)
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) {
-; SVE2P3-LABEL: ucvtflt_f64_i32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: ucvtflt z0.d, z0.s
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: ucvtflt_f64_i32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: ucvtflt z0.d, z0.s
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: ucvtflt_f64_i32:
-; STR: // %bb.0:
-; STR-NEXT: ucvtflt z0.d, z0.s
-; STR-NEXT: ret
+; CHECK-LABEL: ucvtflt_f64_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtflt z0.d, z0.s
+; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn)
ret <vscale x 2 x double> %res
}
-declare <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32>)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32>)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32>)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
index 4c99a4c241318..121f8b87255f5 100644
--- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
@@ -25,7 +25,7 @@ define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8
; STR: // %bb.0:
; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
; STR-NEXT: ret
- %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
ret <vscale x 16 x i8> %res
}
@@ -48,7 +48,7 @@ define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x
; STR: // %bb.0:
; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
; STR-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
ret <vscale x 8 x i16> %res
}
@@ -71,7 +71,7 @@ define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x
; STR: // %bb.0:
; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
; STR-NEXT: ret
- %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
ret <vscale x 4 x i32> %res
}
@@ -98,7 +98,7 @@ define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8
; STR: // %bb.0:
; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h }
; STR-NEXT: ret
- %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
+ %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
ret <vscale x 16 x i8> %res
}
@@ -121,7 +121,7 @@ define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x
; STR: // %bb.0:
; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s }
; STR-NEXT: ret
- %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+ %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
ret <vscale x 8 x i16> %res
}
@@ -144,14 +144,7 @@ define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x
; STR: // %bb.0:
; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d }
; STR-NEXT: ret
- %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
+ %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
ret <vscale x 4 x i32> %res
}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double>, <vscale x 2 x double>)
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double>, <vscale x 2 x double>)
>From b42d0cd99e9570a5a770eff75ac6a73a8f4e69a1 Mon Sep 17 00:00:00 2001
From: Martin Wehking <martin.wehking at arm.com>
Date: Wed, 18 Mar 2026 11:11:36 +0000
Subject: [PATCH 3/4] Fix intrinsic name and simplify CHECK lines
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +-
.../sve2p3-intrinsics-fp-converts_x2.ll | 180 +++++++-----------
2 files changed, 71 insertions(+), 113 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 72a6f3bd49abe..e422f8e1826d4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4776,8 +4776,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
// SVE2 fp convert, narrow and interleave to integer, rounding toward zero
- defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn>;
- defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun>;
+ defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn_x2>;
+ defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun_x2>;
// SVE2 signed/unsigned integer convert to floating-point
defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">;
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
index 121f8b87255f5..7e05793cabcc1 100644
--- a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll
@@ -1,76 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING
;
; FCVTZSN
;
define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
-; SVE2P3-LABEL: fcvtzsn_i8_f16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzsn_i8_f16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzsn_i8_f16:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzsn_i8_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i8_f16:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzsn z0.b, { z0.h, z1.h }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
ret <vscale x 16 x i8> %res
}
define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
-; SVE2P3-LABEL: fcvtzsn_i16_f32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzsn_i16_f32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzsn_i16_f32:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzsn_i16_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i16_f32:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzsn z0.h, { z0.s, z1.s }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
-; SVE2P3-LABEL: fcvtzsn_i32_f64:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzsn_i32_f64:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzsn_i32_f64:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzsn_i32_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzsn_i32_f64:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzsn z0.s, { z0.d, z1.d }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
ret <vscale x 4 x i32> %res
}
@@ -80,70 +59,49 @@ define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x
;
define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
-; SVE2P3-LABEL: fcvtzun_i8_f16:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzun_i8_f16:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzun_i8_f16:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzun_i8_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzun z0.b, { z0.h, z1.h }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i8_f16:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzun z0.b, { z0.h, z1.h }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.x2.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
ret <vscale x 16 x i8> %res
}
define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
-; SVE2P3-LABEL: fcvtzun_i16_f32:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzun_i16_f32:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzun_i16_f32:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzun_i16_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzun z0.h, { z0.s, z1.s }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i16_f32:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzun z0.h, { z0.s, z1.s }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.x2.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
-; SVE2P3-LABEL: fcvtzun_i32_f64:
-; SVE2P3: // %bb.0:
-; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SVE2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d }
-; SVE2P3-NEXT: ret
-;
-; SME2P3-LABEL: fcvtzun_i32_f64:
-; SME2P3: // %bb.0:
-; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; SME2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d }
-; SME2P3-NEXT: ret
-;
-; STR-LABEL: fcvtzun_i32_f64:
-; STR: // %bb.0:
-; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d }
-; STR-NEXT: ret
+; CHECK-LABEL: fcvtzun_i32_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: fcvtzun z0.s, { z0.d, z1.d }
+; CHECK-NEXT: ret
+;
+; CHECK-STREAMING-LABEL: fcvtzun_i32_f64:
+; CHECK-STREAMING: // %bb.0:
+; CHECK-STREAMING-NEXT: fcvtzun z0.s, { z0.d, z1.d }
+; CHECK-STREAMING-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.x2.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
ret <vscale x 4 x i32> %res
}
>From 2ef7108e2e70e5ef002a7bbc65cd90d2905a6d55 Mon Sep 17 00:00:00 2001
From: Martin Wehking <martin.wehking at arm.com>
Date: Wed, 18 Mar 2026 14:20:46 +0000
Subject: [PATCH 4/4] Reintroduce overloaded short forms for intrinsics
Adapt the test cases accordingly.
---
clang/include/clang/Basic/arm_sve.td | 24 ++++++-------
.../acle_sve2_fp_int_cvtn_x2.c | 3 --
.../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 36 +++++++++++--------
...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 36 +++++++++++++++++++
4 files changed, 70 insertions(+), 29 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index c55a2d03f2037..9f5893ab35b02 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1007,21 +1007,21 @@ def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch6
def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun_x2", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
-def SVCVTT_F16_S8 : SInst<"svcvtt_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_S16 : SInst<"svcvtt_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_S32 : SInst<"svcvtt_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F16_U8 : SInst<"svcvtt_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F32_U16 : SInst<"svcvtt_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTT_F64_U32 : SInst<"svcvtt_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F16_S8 : SInst<"svcvtb_f16_s8", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_S16 : SInst<"svcvtb_f32_s16", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_S32 : SInst<"svcvtb_f64_s32", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F16_U8 : SInst<"svcvtb_f16_u8", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F32_U16 : SInst<"svcvtb_f32_u16", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
-def SVCVTB_F64_U32 : SInst<"svcvtb_f64_u32", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
+def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
index e2cd71bd8b062..c4e4a863dda57 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c
@@ -3,12 +3,10 @@
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-//
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
@@ -25,7 +23,6 @@
#endif
#ifdef SVE_OVERLOADED_FORMS
-// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
#else
#define SVE_ACLE_FUNC(A1,A2) A1##A2
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
index 6b7252e045e33..26e077d05c28b 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c
@@ -1,10 +1,12 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
@@ -20,6 +22,12 @@
#define MODE_ATTR
#endif
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+#endif
+
// CHECK-LABEL: @test_svcvtb_f16_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
@@ -31,7 +39,7 @@
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
- return svcvtb_f16_s8(zn);
+ return SVE_ACLE_FUNC(svcvtb_f16,_s8)(zn);
}
// CHECK-LABEL: @test_svcvtb_f32_s16(
@@ -45,7 +53,7 @@ svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
- return svcvtb_f32_s16(zn);
+ return SVE_ACLE_FUNC(svcvtb_f32,_s16)(zn);
}
// CHECK-LABEL: @test_svcvtb_f64_s32(
@@ -59,7 +67,7 @@ svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
- return svcvtb_f64_s32(zn);
+ return SVE_ACLE_FUNC(svcvtb_f64,_s32)(zn);
}
// CHECK-LABEL: @test_svcvtb_f16_u8(
@@ -73,7 +81,7 @@ svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
- return svcvtb_f16_u8(zn);
+ return SVE_ACLE_FUNC(svcvtb_f16,_u8)(zn);
}
// CHECK-LABEL: @test_svcvtb_f32_u16(
@@ -87,7 +95,7 @@ svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
- return svcvtb_f32_u16(zn);
+ return SVE_ACLE_FUNC(svcvtb_f32,_u16)(zn);
}
// CHECK-LABEL: @test_svcvtb_f64_u32(
@@ -101,7 +109,7 @@ svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
- return svcvtb_f64_u32(zn);
+ return SVE_ACLE_FUNC(svcvtb_f64,_u32)(zn);
}
// CHECK-LABEL: @test_svcvt_f16_s8(
@@ -115,7 +123,7 @@ svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
- return svcvtt_f16_s8(zn);
+ return SVE_ACLE_FUNC(svcvtt_f16,_s8)(zn);
}
// CHECK-LABEL: @test_svcvt_f32_s16(
@@ -129,7 +137,7 @@ svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
- return svcvtt_f32_s16(zn);
+ return SVE_ACLE_FUNC(svcvtt_f32,_s16)(zn);
}
// CHECK-LABEL: @test_svcvt_f64_s32(
@@ -143,7 +151,7 @@ svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
- return svcvtt_f64_s32(zn);
+ return SVE_ACLE_FUNC(svcvtt_f64,_s32)(zn);
}
// CHECK-LABEL: @test_svcvt_f16_u8(
@@ -157,7 +165,7 @@ svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
- return svcvtt_f16_u8(zn);
+ return SVE_ACLE_FUNC(svcvtt_f16,_u8)(zn);
}
// CHECK-LABEL: @test_svcvt_f32_u16(
@@ -171,7 +179,7 @@ svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
- return svcvtt_f32_u16(zn);
+ return SVE_ACLE_FUNC(svcvtt_f32,_u16)(zn);
}
// CHECK-LABEL: @test_svcvt_f64_u32(
@@ -185,5 +193,5 @@ svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR {
- return svcvtt_f64_u32(zn);
+ return SVE_ACLE_FUNC(svcvtt_f64,_u32)(zn);
}
diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
index 76290675e3b93..78503015d2f8d 100644
--- a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
+++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -32,16 +32,28 @@ void test(void) {
svcvt_u16_f32_x2(svfloat32x2_t_val);
svcvt_u32(svfloat64x2_t_val);
svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16(svint8_t_val);
+ svcvtb_f16(svuint8_t_val);
svcvtb_f16_s8(svint8_t_val);
svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32(svint16_t_val);
+ svcvtb_f32(svuint16_t_val);
svcvtb_f32_s16(svint16_t_val);
svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64(svint32_t_val);
+ svcvtb_f64(svuint32_t_val);
svcvtb_f64_s32(svint32_t_val);
svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16(svint8_t_val);
+ svcvtt_f16(svuint8_t_val);
svcvtt_f16_s8(svint8_t_val);
svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32(svint16_t_val);
+ svcvtt_f32(svuint16_t_val);
svcvtt_f32_s16(svint16_t_val);
svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64(svint32_t_val);
+ svcvtt_f64(svuint32_t_val);
svcvtt_f64_s32(svint32_t_val);
svcvtt_f64_u32(svuint32_t_val);
}
@@ -69,16 +81,28 @@ void test_streaming(void) __arm_streaming{
svcvt_u16_f32_x2(svfloat32x2_t_val);
svcvt_u32(svfloat64x2_t_val);
svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16(svint8_t_val);
+ svcvtb_f16(svuint8_t_val);
svcvtb_f16_s8(svint8_t_val);
svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32(svint16_t_val);
+ svcvtb_f32(svuint16_t_val);
svcvtb_f32_s16(svint16_t_val);
svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64(svint32_t_val);
+ svcvtb_f64(svuint32_t_val);
svcvtb_f64_s32(svint32_t_val);
svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16(svint8_t_val);
+ svcvtt_f16(svuint8_t_val);
svcvtt_f16_s8(svint8_t_val);
svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32(svint16_t_val);
+ svcvtt_f32(svuint16_t_val);
svcvtt_f32_s16(svint16_t_val);
svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64(svint32_t_val);
+ svcvtt_f64(svuint32_t_val);
svcvtt_f64_s32(svint32_t_val);
svcvtt_f64_u32(svuint32_t_val);
}
@@ -106,16 +130,28 @@ void test_streaming_compatible(void) __arm_streaming_compatible{
svcvt_u16_f32_x2(svfloat32x2_t_val);
svcvt_u32(svfloat64x2_t_val);
svcvt_u32_f64_x2(svfloat64x2_t_val);
+ svcvtb_f16(svint8_t_val);
+ svcvtb_f16(svuint8_t_val);
svcvtb_f16_s8(svint8_t_val);
svcvtb_f16_u8(svuint8_t_val);
+ svcvtb_f32(svint16_t_val);
+ svcvtb_f32(svuint16_t_val);
svcvtb_f32_s16(svint16_t_val);
svcvtb_f32_u16(svuint16_t_val);
+ svcvtb_f64(svint32_t_val);
+ svcvtb_f64(svuint32_t_val);
svcvtb_f64_s32(svint32_t_val);
svcvtb_f64_u32(svuint32_t_val);
+ svcvtt_f16(svint8_t_val);
+ svcvtt_f16(svuint8_t_val);
svcvtt_f16_s8(svint8_t_val);
svcvtt_f16_u8(svuint8_t_val);
+ svcvtt_f32(svint16_t_val);
+ svcvtt_f32(svuint16_t_val);
svcvtt_f32_s16(svint16_t_val);
svcvtt_f32_u16(svuint16_t_val);
+ svcvtt_f64(svint32_t_val);
+ svcvtt_f64(svuint32_t_val);
svcvtt_f64_s32(svint32_t_val);
svcvtt_f64_u32(svuint32_t_val);
}
More information about the cfe-commits
mailing list