[clang] [llvm] [AARCH64] Add FEAT_SSVE_FEXPA and fix unsupported features list (PR #134368)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 04:27:31 PDT 2025
https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/134368
This patch adds new feature introduced in [2025-03 release](https://developer.arm.com/documentation/ddi0602/2025-03/SVE-Instructions/FEXPA--Floating-point-exponential-accelerator-) and changes feature requirements for fexpa instructions and intrinsics.
Additionally it fixes unsupported features list by moving fearures dependent on sme2p1 to correct location.
>From c1d652f7bb3f6ff272eca6dabc762b9504d7126a Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 4 Apr 2025 11:18:23 +0000
Subject: [PATCH] [AARCH64] Add support for FEAT_SSVE_FEXPA extension and
update fix unsopported features list
---
clang/include/clang/Basic/arm_sve.td | 6 ++++--
.../CodeGen/AArch64/sve-intrinsics/acle_sve_expa.c | 14 +++++++++++---
.../Driver/print-supported-extensions-aarch64.c | 1 +
llvm/lib/Target/AArch64/AArch64.td | 5 ++---
llvm/lib/Target/AArch64/AArch64Features.td | 4 +++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++++
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +-
llvm/test/CodeGen/AArch64/sve-intrinsics-fexpa.ll | 2 +-
llvm/test/MC/AArch64/SVE/fexpa.s | 8 ++++----
9 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 35263541b67ae..f09f40ce9202e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -836,9 +836,11 @@ defm SVRINTP : SInstZPZ<"svrintp", "hfd", "aarch64_sve_frintp">;
defm SVRINTX : SInstZPZ<"svrintx", "hfd", "aarch64_sve_frintx">;
defm SVRINTZ : SInstZPZ<"svrintz", "hfd", "aarch64_sve_frintz">;
defm SVSQRT : SInstZPZ<"svsqrt", "hfd", "aarch64_sve_fsqrt">;
-
+def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x", [VerifyRuntimeMode]>{
+ let SVETargetGuard = "sve";
+ let SMETargetGuard = "sme2,ssve-fexpa";
+}
let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in {
-def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x">;
def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">;
def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">;
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_expa.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_expa.c
index 52b6822a833f7..8c34017b7750b 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_expa.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_expa.c
@@ -1,10 +1,12 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +ssve-fexpa -target-feature +sme2 -target-feature +sme -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +ssve-fexpa -target-feature +sme2 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sve.h>
#ifdef SVE_OVERLOADED_FORMS
@@ -14,6 +16,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
// CHECK-LABEL: @test_svexpa_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fexpa.x.nxv8f16(<vscale x 8 x i16> [[OP:%.*]])
@@ -24,7 +32,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fexpa.x.nxv8f16(<vscale x 8 x i16> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-svfloat16_t test_svexpa_f16(svuint16_t op)
+svfloat16_t test_svexpa_f16(svuint16_t op) STREAMING
{
return SVE_ACLE_FUNC(svexpa,_f16,,)(op);
}
@@ -39,7 +47,7 @@ svfloat16_t test_svexpa_f16(svuint16_t op)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fexpa.x.nxv4f32(<vscale x 4 x i32> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-svfloat32_t test_svexpa_f32(svuint32_t op)
+svfloat32_t test_svexpa_f32(svuint32_t op) STREAMING
{
return SVE_ACLE_FUNC(svexpa,_f32,,)(op);
}
@@ -54,7 +62,7 @@ svfloat32_t test_svexpa_f32(svuint32_t op)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fexpa.x.nxv2f64(<vscale x 2 x i64> [[OP:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
-svfloat64_t test_svexpa_f64(svuint64_t op)
+svfloat64_t test_svexpa_f64(svuint64_t op) STREAMING
{
return SVE_ACLE_FUNC(svexpa,_f64,,)(op);
}
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 38a3f54eb4794..539c1937a9712 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -81,6 +81,7 @@
// CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable Armv9.6-A SVE AES support in streaming SVE mode
// CHECK-NEXT: ssve-bitperm FEAT_SSVE_BitPerm Enable Armv9.6-A SVE BitPerm support in streaming SVE mode
+// CHECK-NEXT: ssve-fexpa FEAT_SSVE_FEXPA Enable SVE FEXPA instruction in Streaming SVE mode
// CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 3677f669c3481..b66c88e2d6245 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -74,12 +74,11 @@ def SVEUnsupported : AArch64Unsupported {
}
let F = [HasSME2p2, HasSVE2p2_or_SME2p2, HasNonStreamingSVE_or_SME2p2,
- HasNonStreamingSVE2p2_or_SME2p2, HasNonStreamingSVE2_or_SSVE_BitPerm,
- HasSME_MOP4, HasSME_TMOP] in
+ HasNonStreamingSVE2p2_or_SME2p2] in
def SME2p2Unsupported : AArch64Unsupported;
def SME2p1Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSME2p1, HasSVE2p1_or_SME2p1, HasNonStreamingSVE2p1_or_SSVE_AES],
+ let F = !listconcat([HasSME2p1, HasSVE2p1_or_SME2p1, HasNonStreamingSVE2p1_or_SSVE_AES, HasSME_MOP4, HasSME_TMOP, HasNonStreamingSVE_or_SSVE_FEXPA, HasNonStreamingSVE2_or_SSVE_BitPerm],
SME2p2Unsupported.F);
}
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 357f526d5e308..f4f931a5cdab1 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -576,7 +576,9 @@ def FeatureSME_MOP4: ExtensionWithMArch<"sme-mop4", "SME_MOP4", "FEAT_SME_MOP4",
def FeatureSME_TMOP: ExtensionWithMArch<"sme-tmop", "SME_TMOP", "FEAT_SME_TMOP",
"Enable SME Structured sparsity outer product instructions.", [FeatureSME2]>;
-//===----------------------------------------------------------------------===//
+def FeatureSSVE_FEXPA : ExtensionWithMArch<"ssve-fexpa", "SSVE_FEXPA", "FEAT_SSVE_FEXPA",
+ "Enable SVE FEXPA instruction in Streaming SVE mode", [FeatureSME2]>;
+
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a3b1ae55df028..7d0c453201dec 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -294,6 +294,10 @@ def HasNonStreamingSVE2_or_SSVE_BitPerm
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_BitPerm())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_BitPerm), "sve2 or ssve-bitperm">;
+def HasNonStreamingSVE_or_SSVE_FEXPA
+ : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
+ "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_FEXPA())">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSSVE_FEXPA), "sve or ssve-fexpa">;
// A subset of NEON instructions are legal in Streaming SVE execution mode,
// so don't need the additional check for 'isNeonAvailable'.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a2f326c994c2f..b40c82a25e7ba 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -980,7 +980,7 @@ let Predicates = [HasSVE_or_SME] in {
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
} // End HasSVE_or_SME
-let Predicates = [HasNonStreamingSVE_or_SME2p2] in {
+let Predicates = [HasNonStreamingSVE_or_SSVE_FEXPA] in {
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
} // End HasSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fexpa.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fexpa.ll
index 00e000f642377..021d4855905e7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fexpa.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fexpa.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -force-streaming -mattr=+sme2p2 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -force-streaming -mattr=+ssve-fexpa < %s | FileCheck %s
define <vscale x 8 x half> @fexpa_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: fexpa_h:
diff --git a/llvm/test/MC/AArch64/SVE/fexpa.s b/llvm/test/MC/AArch64/SVE/fexpa.s
index c51b1e2b1d3e5..c6386255b274e 100644
--- a/llvm/test/MC/AArch64/SVE/fexpa.s
+++ b/llvm/test/MC/AArch64/SVE/fexpa.s
@@ -1,6 +1,6 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+ssve-fexpa < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
@@ -14,17 +14,17 @@
fexpa z0.h, z31.h
// CHECK-INST: fexpa z0.h, z31.h
// CHECK-ENCODING: [0xe0,0xbb,0x60,0x04]
-// CHECK-ERROR: instruction requires: sve or sme2p2
+// CHECK-ERROR: instruction requires: sve or ssve-fexpa
// CHECK-UNKNOWN: 0460bbe0 <unknown>
fexpa z0.s, z31.s
// CHECK-INST: fexpa z0.s, z31.s
// CHECK-ENCODING: [0xe0,0xbb,0xa0,0x04]
-// CHECK-ERROR: instruction requires: sve or sme2p2
+// CHECK-ERROR: instruction requires: sve or ssve-fexpa
// CHECK-UNKNOWN: 04a0bbe0 <unknown>
fexpa z0.d, z31.d
// CHECK-INST: fexpa z0.d, z31.d
// CHECK-ENCODING: [0xe0,0xbb,0xe0,0x04]
-// CHECK-ERROR: instruction requires: sve or sme2p2
+// CHECK-ERROR: instruction requires: sve or ssve-fexpa
// CHECK-UNKNOWN: 04e0bbe0 <unknown>
More information about the llvm-commits
mailing list