[llvm] [LLVM][CodeGen][SME] hasB16b16() is not sufficient to prove BFADD availability. (PR #154143)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 08:55:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
The FEAT_SVE_B16B16 arithmetic instructions are only available to streaming mode functions when SME2 is available.
https://developer.arm.com/documentation/ddi0602/2025-06/SVE-Instructions/BFADD--predicated---BFloat16-add--predicated--?lang=en
---
Full diff: https://github.com/llvm/llvm-project/pull/154143.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-3)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+5-1)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+4-4)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+7)
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-arith.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2072e48914ae6..834e02a5b1d72 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1769,7 +1769,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
- if (Subtarget->hasSVEB16B16()) {
+ if (Subtarget->hasSVEB16B16() &&
+ Subtarget->isNonStreamingSVEorSME2Available()) {
setOperationAction(ISD::FADD, VT, Legal);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
@@ -1791,7 +1792,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32);
}
- if (!Subtarget->hasSVEB16B16()) {
+ if (!Subtarget->hasSVEB16B16() ||
+ !Subtarget->isNonStreamingSVEorSME2Available()) {
for (auto Opcode : {ISD::FADD, ISD::FMA, ISD::FMAXIMUM, ISD::FMAXNUM,
ISD::FMINIMUM, ISD::FMINNUM, ISD::FMUL, ISD::FSUB}) {
setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32);
@@ -18123,7 +18125,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
case MVT::f64:
return true;
case MVT::bf16:
- return VT.isScalableVector() && Subtarget->hasSVEB16B16();
+ return VT.isScalableVector() && Subtarget->hasSVEB16B16() &&
+ Subtarget->isNonStreamingSVEorSME2Available();
default:
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8cfbff938a395..0b4307fbdf6df 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -143,7 +143,7 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->isSVEAvailable()">,
AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
-def HasSVEB16B16 : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEB16B16()">,
+def HasSVEB16B16 : Predicate<"Subtarget->hasSVEB16B16()">,
AssemblerPredicateWithAll<(all_of FeatureSVEB16B16), "sve-b16b16">;
def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
@@ -248,6 +248,10 @@ def HasSVE_or_SME
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable()">,
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
"sve or sme">;
+def HasNonStreamingSVE_or_SME2
+ : Predicate<"Subtarget->isNonStreamingSVEorSME2Available()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2),
+ "sve or sme2">;
def HasNonStreamingSVE_or_SME2p1
: Predicate<"Subtarget->isSVEAvailable() ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p1())">,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 509dd8b73a017..eeb47b4d99750 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4408,7 +4408,7 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PPRorPNR8:$Pd), 0>;
// Non-widening BFloat16 to BFloat16 instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasSVEB16B16] in {
+let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2] in {
defm BFADD_ZZZ : sve_fp_3op_u_zd_bfloat<0b000, "bfadd", AArch64fadd>;
defm BFSUB_ZZZ : sve_fp_3op_u_zd_bfloat<0b001, "bfsub", AArch64fsub>;
defm BFMUL_ZZZ : sve_fp_3op_u_zd_bfloat<0b010, "bfmul", AArch64fmul>;
@@ -4441,9 +4441,9 @@ defm BFMLS_ZZZI : sve_fp_fma_by_indexed_elem_bfloat<"bfmls", 0b11, AArch64fmlsid
defm BFMUL_ZZZI : sve_fp_fmul_by_indexed_elem_bfloat<"bfmul", AArch64fmulidx>;
defm BFCLAMP_ZZZ : sve_fp_clamp_bfloat<"bfclamp", AArch64fclamp>;
-} // End HasSVEB16B16
+} // End HasSVEB16B16, HasNonStreamingSVE_or_SME2
-let Predicates = [HasSVEB16B16, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos] in {
defm BFADD_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fadd>;
defm BFSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fsub>;
defm BFMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmul>;
@@ -4451,7 +4451,7 @@ defm BFMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmaxnm>;
defm BFMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fminnm>;
defm BFMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmin>;
defm BFMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_bfloat<int_aarch64_sve_fmax>;
-} // HasSVEB16B16, UseExperimentalZeroingPseudos
+} // HasSVEB16B16, HasNonStreamingSVE_or_SME2, UseExperimentalZeroingPseudos
let Predicates = [HasSVEBFSCALE] in {
def BFSCALE_ZPZZ : sve_fp_2op_p_zds_bfscale<0b1001, "bfscale", DestructiveBinary>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index d00e4471e107d..01c0bcc3a6a78 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -212,6 +212,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return hasSVE() || isStreamingSVEAvailable();
}
+ /// Returns true if the target has access to either the full range of SVE
+ /// instructions, or the streaming-compatible subset of SVE instructions
+ /// available to SME2.
+ bool isNonStreamingSVEorSME2Available() const {
+ return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
+ }
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
// we don't yet support streaming-compatible codegen support that we trust
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
index 83f4f8fc57aae..0580f5e0b019a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
-; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
-; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
+; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
+; RUN: llc -mattr=+sve,+bf16,+sve-b16b16 < %s | FileCheck %s --check-prefixes=CHECK,B16B16
+; RUN: llc -mattr=+sme,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,NOB16B16
+; RUN: llc -mattr=+sme2,+sve-b16b16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,B16B16
target triple = "aarch64-unknown-linux-gnu"
``````````
</details>
https://github.com/llvm/llvm-project/pull/154143
More information about the llvm-commits
mailing list