[clang] [Clang][AArch64] Add pessimistic vscale_range for sve/sme (PR #137624)
Matthew Devereau via cfe-commits
cfe-commits at lists.llvm.org
Thu May 15 02:34:25 PDT 2025
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/137624
>From 9bed3ae2f1bb98fc6f53a17cca98da4b1562e1a7 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 23 Apr 2025 12:27:02 +0000
Subject: [PATCH 1/7] [Clang][AArch64] Add pessimistic vscale_range when sve is
in target-features
The "target-features" function attribute is not currently considered when
adding vscale_range to a function. When +sve is pushed onto functions with
"#pragma attribute push(+sve)", the function potentially misses out on
optimizations that rely on vscale_range being present.
---
clang/include/clang/Basic/TargetInfo.h | 5 +++--
clang/lib/Basic/Targets/AArch64.cpp | 11 ++++++++++-
clang/lib/Basic/Targets/AArch64.h | 8 ++++++--
clang/lib/Basic/Targets/RISCV.cpp | 3 ++-
clang/lib/Basic/Targets/RISCV.h | 8 ++++++--
clang/lib/CodeGen/CodeGenFunction.cpp | 2 +-
.../test/CodeGen/AArch64/cpu-supports-target.c | 2 +-
clang/test/CodeGen/AArch64/targetattr.c | 18 +++++++++---------
8 files changed, 38 insertions(+), 19 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 8c3dcda25bc8d..ba24f2aebcf5b 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -48,6 +48,7 @@
namespace llvm {
struct fltSemantics;
+class Function;
}
namespace clang {
@@ -1037,8 +1038,8 @@ class TargetInfo : public TransferrableTargetInfo,
/// Returns target-specific min and max values VScale_Range.
virtual std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts,
- bool IsArmStreamingFunction) const {
+ getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
+ llvm::Function *F = nullptr) const {
return std::nullopt;
}
/// The __builtin_clz* and __builtin_ctz* built-in
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 3633bab6e0df9..045df30e6c77a 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
#include <optional>
@@ -794,7 +795,8 @@ AArch64TargetInfo::getTargetBuiltins() const {
std::optional<std::pair<unsigned, unsigned>>
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
- bool IsArmStreamingFunction) const {
+ bool IsArmStreamingFunction,
+ llvm::Function *F) const {
if (LangOpts.VScaleMin || LangOpts.VScaleMax)
return std::pair<unsigned, unsigned>(
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
@@ -802,6 +804,13 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")))
return std::pair<unsigned, unsigned>(1, 16);
+ if (F && F->hasFnAttribute("target-features")) {
+ StringRef Str = F->getFnAttribute("target-features").getValueAsString();
+ for (const auto &s : llvm::split(Str, ",")) {
+ if (s == "+sve")
+ return std::pair<unsigned, unsigned>(1, 16);
+ }
+ }
return std::nullopt;
}
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 2fab88cfca901..eb9fc24e51638 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -18,6 +18,10 @@
#include "llvm/TargetParser/AArch64TargetParser.h"
#include <optional>
+namespace llvm {
+class Function;
+}
+
namespace clang {
namespace targets {
@@ -187,8 +191,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;
std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts,
- bool IsArmStreamingFunction) const override;
+ getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
+ llvm::Function *F = nullptr) const override;
bool doesFeatureAffectCodeGen(StringRef Name) const override;
bool validateCpuSupports(StringRef FeatureStr) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 390ef0f3ac884..95211cc7c5554 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -332,7 +332,8 @@ bool RISCVTargetInfo::initFeatureMap(
std::optional<std::pair<unsigned, unsigned>>
RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts,
- bool IsArmStreamingFunction) const {
+ bool IsArmStreamingFunction,
+ llvm::Function *F) const {
// RISCV::RVVBitsPerBlock is 64.
unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock;
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index c26aa19080162..b072ccfe28ac4 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -20,6 +20,10 @@
#include "llvm/TargetParser/Triple.h"
#include <optional>
+namespace llvm {
+class Function;
+}
+
namespace clang {
namespace targets {
@@ -99,8 +103,8 @@ class RISCVTargetInfo : public TargetInfo {
const std::vector<std::string> &FeaturesVec) const override;
std::optional<std::pair<unsigned, unsigned>>
- getVScaleRange(const LangOptions &LangOpts,
- bool IsArmStreamingFunction) const override;
+ getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
+ llvm::Function *F = nullptr) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 4d29ceace646f..e9b1231e79289 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1117,7 +1117,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// Add vscale_range attribute if appropriate.
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
getContext().getTargetInfo().getVScaleRange(
- getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false);
+ getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false, CurFn);
if (VScaleRange) {
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
getLLVMContext(), VScaleRange->first, VScaleRange->second));
diff --git a/clang/test/CodeGen/AArch64/cpu-supports-target.c b/clang/test/CodeGen/AArch64/cpu-supports-target.c
index a39ffd4e4a74d..9b551a0714e74 100644
--- a/clang/test/CodeGen/AArch64/cpu-supports-target.c
+++ b/clang/test/CodeGen/AArch64/cpu-supports-target.c
@@ -220,7 +220,7 @@ int test_versions() {
//.
// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
+// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c
index cfe115bf97ed3..7052f2917cf71 100644
--- a/clang/test/CodeGen/AArch64/targetattr.c
+++ b/clang/test/CodeGen/AArch64/targetattr.c
@@ -201,21 +201,21 @@ void applem4() {}
//.
// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
+// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
+// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
+// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="cortex-a710" }
// CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" }
// CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" }
// CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
+// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
// CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" }
-// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
+// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
// CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16" }
-// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone vscale_range(1,16) "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
>From 2e9664623a2e63661a8fb17b4e5e5c25d81f8d64 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 30 Apr 2025 09:39:11 +0000
Subject: [PATCH 2/7] use getFunctionFeatureMap
---
clang/include/clang/Basic/TargetInfo.h | 3 +--
clang/lib/Basic/Targets/AArch64.cpp | 13 +++----------
clang/lib/Basic/Targets/AArch64.h | 6 +-----
clang/lib/Basic/Targets/RISCV.cpp | 2 +-
clang/lib/Basic/Targets/RISCV.h | 6 +-----
clang/lib/CodeGen/CodeGenFunction.cpp | 10 ++++++++--
6 files changed, 15 insertions(+), 25 deletions(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index ba24f2aebcf5b..9377bf7c0f148 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -48,7 +48,6 @@
namespace llvm {
struct fltSemantics;
-class Function;
}
namespace clang {
@@ -1039,7 +1038,7 @@ class TargetInfo : public TransferrableTargetInfo,
/// Returns target-specific min and max values VScale_Range.
virtual std::optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
- llvm::Function *F = nullptr) const {
+ llvm::StringMap<bool> *FeatureMap = nullptr) const {
return std::nullopt;
}
/// The __builtin_clz* and __builtin_ctz* built-in
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 045df30e6c77a..9ac119fcf6eb7 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Function.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
#include <optional>
@@ -796,21 +795,15 @@ AArch64TargetInfo::getTargetBuiltins() const {
std::optional<std::pair<unsigned, unsigned>>
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
bool IsArmStreamingFunction,
- llvm::Function *F) const {
+ llvm::StringMap<bool> *FeatureMap) const {
if (LangOpts.VScaleMin || LangOpts.VScaleMax)
return std::pair<unsigned, unsigned>(
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
- if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")))
+ if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")) ||
+ (FeatureMap && FeatureMap->lookup("sve")))
return std::pair<unsigned, unsigned>(1, 16);
- if (F && F->hasFnAttribute("target-features")) {
- StringRef Str = F->getFnAttribute("target-features").getValueAsString();
- for (const auto &s : llvm::split(Str, ",")) {
- if (s == "+sve")
- return std::pair<unsigned, unsigned>(1, 16);
- }
- }
return std::nullopt;
}
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index eb9fc24e51638..6eeac69af20df 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -18,10 +18,6 @@
#include "llvm/TargetParser/AArch64TargetParser.h"
#include <optional>
-namespace llvm {
-class Function;
-}
-
namespace clang {
namespace targets {
@@ -192,7 +188,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
std::optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
- llvm::Function *F = nullptr) const override;
+ llvm::StringMap<bool> *FeatureMap = nullptr) const override;
bool doesFeatureAffectCodeGen(StringRef Name) const override;
bool validateCpuSupports(StringRef FeatureStr) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 95211cc7c5554..a1a2437f288a0 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -333,7 +333,7 @@ bool RISCVTargetInfo::initFeatureMap(
std::optional<std::pair<unsigned, unsigned>>
RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts,
bool IsArmStreamingFunction,
- llvm::Function *F) const {
+ llvm::StringMap<bool> *FeatureMap) const {
// RISCV::RVVBitsPerBlock is 64.
unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock;
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index b072ccfe28ac4..0b36c9d5d9cc8 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -20,10 +20,6 @@
#include "llvm/TargetParser/Triple.h"
#include <optional>
-namespace llvm {
-class Function;
-}
-
namespace clang {
namespace targets {
@@ -104,7 +100,7 @@ class RISCVTargetInfo : public TargetInfo {
std::optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction,
- llvm::Function *F = nullptr) const override;
+ llvm::StringMap<bool> *FeatureMap = nullptr) const override;
bool hasFeature(StringRef Feature) const override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index e9b1231e79289..10019443b2590 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1115,9 +1115,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Fn->removeFnAttr("zero-call-used-regs");
// Add vscale_range attribute if appropriate.
+ llvm::StringMap<bool> FeatureMap;
+ bool IsArmStreaming = false;
+ if (FD) {
+ getContext().getFunctionFeatureMap(FeatureMap, FD);
+ IsArmStreaming = IsArmStreamingFunction(FD, true);
+ }
std::optional<std::pair<unsigned, unsigned>> VScaleRange =
- getContext().getTargetInfo().getVScaleRange(
- getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false, CurFn);
+ getContext().getTargetInfo().getVScaleRange(getLangOpts(), IsArmStreaming,
+ &FeatureMap);
if (VScaleRange) {
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
getLLVMContext(), VScaleRange->first, VScaleRange->second));
>From ad81417a96a71c2f476bbaa8d14ba06d42ea36f3 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Fri, 2 May 2025 09:26:03 +0000
Subject: [PATCH 3/7] add sme
---
clang/lib/Basic/Targets/AArch64.cpp | 7 +++++--
clang/test/CodeGen/AArch64/targetattr.c | 18 ++++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 9ac119fcf6eb7..e1f6c7b834dc7 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -800,8 +800,11 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
return std::pair<unsigned, unsigned>(
LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax);
- if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")) ||
- (FeatureMap && FeatureMap->lookup("sve")))
+ if (hasFeature("sve") || (FeatureMap && (FeatureMap->lookup("sve"))))
+ return std::pair<unsigned, unsigned>(1, 16);
+
+ if (IsArmStreamingFunction &&
+ (hasFeature("sme") || (FeatureMap && (FeatureMap->lookup("sme")))))
return std::pair<unsigned, unsigned>(1, 16);
return std::nullopt;
diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c
index 7052f2917cf71..f1a7eda4a82d6 100644
--- a/clang/test/CodeGen/AArch64/targetattr.c
+++ b/clang/test/CodeGen/AArch64/targetattr.c
@@ -199,6 +199,22 @@ __attribute__((target("cpu=apple-m4")))
//
void applem4() {}
+__attribute__((target("+sme")))
+// CHECK-LABEL: define {{[^@]+}}@plussmestreaming
+// CHECK-SAME: () #[[ATTR19:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+void plussmestreaming(void) __arm_streaming {}
+
+__attribute__((target("+sme")))
+// CHECK-LABEL: define {{[^@]+}}@plussmelocallystreaming
+// CHECK-SAME: () #[[ATTR20:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: ret void
+//
+__arm_locally_streaming void plussmelocallystreaming(void) {}
+
//.
// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
@@ -219,6 +235,8 @@ void applem4() {}
// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
+// CEHCK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
+// CEHCK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
>From 8338d13bef0964ecaf0acd25603917dea2d05e2b Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Fri, 2 May 2025 09:36:04 +0000
Subject: [PATCH 4/7] Fix typo in test
---
clang/test/CodeGen/AArch64/targetattr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c
index f1a7eda4a82d6..3a81dab31acc0 100644
--- a/clang/test/CodeGen/AArch64/targetattr.c
+++ b/clang/test/CodeGen/AArch64/targetattr.c
@@ -235,8 +235,8 @@ __arm_locally_streaming void plussmelocallystreaming(void) {}
// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
-// CEHCK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
-// CEHCK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
+// CHECK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
+// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
>From 01488e56fdb61a79958611943716ad1c9c9e488a Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Tue, 13 May 2025 15:59:42 +0000
Subject: [PATCH 5/7] Add extra error line to avx512-error.c
---
clang/test/CodeGen/X86/avx512-error.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
index 422cc7a8679dc..645126916572c 100644
--- a/clang/test/CodeGen/X86/avx512-error.c
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -29,4 +29,5 @@ __m512d zmm_error(__m512d a) {
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
+// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
#endif
>From f4caea770fd73fac57538661f9299f3411117104 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Wed, 14 May 2025 10:11:03 +0000
Subject: [PATCH 6/7] fix target-avx-abi-diag.c test
---
clang/test/CodeGen/target-avx-abi-diag.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/test/CodeGen/target-avx-abi-diag.c b/clang/test/CodeGen/target-avx-abi-diag.c
index dfbbc3213ca6b..116959d60788f 100644
--- a/clang/test/CodeGen/target-avx-abi-diag.c
+++ b/clang/test/CodeGen/target-avx-abi-diag.c
@@ -98,4 +98,6 @@ __attribute__((target("avx512f"))) void call_avx512_ok2(void) {
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
#endif
>From 2bc15f32ceee66a4850074c2d75f5f81777d9292 Mon Sep 17 00:00:00 2001
From: Matthew Devereau <matthew.devereau at arm.com>
Date: Thu, 15 May 2025 09:32:30 +0000
Subject: [PATCH 7/7] update targetattr.c
---
clang/test/CodeGen/AArch64/targetattr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c
index 3a81dab31acc0..f3f074b61cc84 100644
--- a/clang/test/CodeGen/AArch64/targetattr.c
+++ b/clang/test/CodeGen/AArch64/targetattr.c
@@ -220,7 +220,7 @@ __arm_locally_streaming void plussmelocallystreaming(void) {}
// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
+// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="cortex-a710" }
// CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" }
// CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" }
More information about the cfe-commits
mailing list