[clang] 740a1dd - [ARM] Armv8.6-a Matrix Mul cmd line support
Luke Geeson via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 24 07:54:38 PDT 2020
Author: Luke Geeson
Date: 2020-04-24T15:54:06+01:00
New Revision: 740a1dd050eea93a875ec86780ad6ed4b0310113
URL: https://github.com/llvm/llvm-project/commit/740a1dd050eea93a875ec86780ad6ed4b0310113
DIFF: https://github.com/llvm/llvm-project/commit/740a1dd050eea93a875ec86780ad6ed4b0310113.diff
LOG: [ARM] Armv8.6-a Matrix Mul cmd line support
This patch upstreams support for the Armv8.6-a Matrix Multiplication
Extension. A summary of the features can be found here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
This patch includes:
- Command line options to enable these features with +i8mm, +f32mm, or f64mm
Note: +f32mm and +f64mm are optional and so are not enabled by default
This is part of a patch series, starting with BFloat16 support and
the other components in the armv8.6a extension (in previous patches
linked in phabricator)
Based on work by:
- Luke Geeson
- Oliver Stannard
- Luke Cheeseman
Reviewers: t.p.northover, DavidSpickett
Reviewed By: DavidSpickett
Subscribers: DavidSpickett, ostannard, kristof.beyls, danielkiss,
cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77875
Added:
clang/test/Driver/arm-matrix-multiply.c
Modified:
clang/lib/Driver/ToolChains/Arch/AArch64.cpp
clang/test/Driver/aarch64-cpus.c
llvm/include/llvm/Support/AArch64TargetParser.def
llvm/include/llvm/Support/AArch64TargetParser.h
llvm/include/llvm/Support/ARMTargetParser.def
llvm/include/llvm/Support/ARMTargetParser.h
llvm/unittests/Support/TargetParserTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index b21cfac6e7ed..4c034d40aaf4 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -54,7 +54,8 @@ std::string aarch64::getAArch64TargetCPU(const ArgList &Args,
// Decode AArch64 features from string like +[no]featureA+[no]featureB+...
static bool DecodeAArch64Features(const Driver &D, StringRef text,
- std::vector<StringRef> &Features) {
+ std::vector<StringRef> &Features,
+ llvm::AArch64::ArchKind ArchKind) {
SmallVector<StringRef, 8> Split;
text.split(Split, StringRef("+"), -1, false);
@@ -66,6 +67,11 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
D.Diag(clang::diag::err_drv_no_neon_modifier);
else
return false;
+
+ // +sve implies +f32mm if the base architecture is v8.6A
+ // it isn't the case in general that sve implies both f64mm and f32mm
+ if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A) && Feature == "sve")
+ Features.push_back("+f32mm");
}
return true;
}
@@ -76,6 +82,7 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
std::vector<StringRef> &Features) {
std::pair<StringRef, StringRef> Split = Mcpu.split("+");
CPU = Split.first;
+ llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::ARMV8A;
if (CPU == "native")
CPU = llvm::sys::getHostCPUName();
@@ -83,7 +90,7 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
if (CPU == "generic") {
Features.push_back("+neon");
} else {
- llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseCPUArch(CPU);
+ ArchKind = llvm::AArch64::parseCPUArch(CPU);
if (!llvm::AArch64::getArchFeatures(ArchKind, Features))
return false;
@@ -92,10 +99,11 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
return false;
}
- if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
- return false;
+ if (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind))
+ return false;
- return true;
+ return true;
}
static bool
@@ -108,7 +116,8 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first);
if (ArchKind == llvm::AArch64::ArchKind::INVALID ||
!llvm::AArch64::getArchFeatures(ArchKind, Features) ||
- (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)))
+ (Split.second.size() &&
+ !DecodeAArch64Features(D, Split.second, Features, ArchKind)))
return false;
return true;
diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c
index 971f3cd83521..cf12a5155689 100644
--- a/clang/test/Driver/aarch64-cpus.c
+++ b/clang/test/Driver/aarch64-cpus.c
@@ -636,6 +636,34 @@
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s
// GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve"
+// The 8-bit integer matrix multiply extension is a mandatory component of the
+// Armv8.6-A extensions, but is permitted as an optional feature for any
+// implementation of Armv8.2-A to Armv8.5-A (inclusive)
+// RUN: %clang -target aarch64 -march=armv8.5a -### -c %s 2>&1 | FileCheck -check-prefix=NO-I8MM %s
+// RUN: %clang -target aarch64 -march=armv8.5a+i8mm -### -c %s 2>&1 | FileCheck -check-prefix=I8MM %s
+// NO-I8MM-NOT: "-target-feature" "+i8mm"
+// I8MM: "-target-feature" "+i8mm"
+
+// The 32-bit floating point matrix multiply extension is enabled by default
+// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for
+// any target from armv8.2a onwards (we don't enforce not using it with earlier
+// targets).
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// RUN: %clang -target aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// NO-F32MM-NOT: "-target-feature" "+f32mm"
+// F32MM: "-target-feature" "+f32mm"
+
+// The 64-bit floating point matrix multiply extension is not currently enabled
+// by default for any targets, because it requires an SVE vector length >= 256
+// bits. When we add a CPU which has that, then it can be enabled by default,
+// but for now it can only be used by adding the +f64mm feature.
+// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=NO-F64MM %s
+// RUN: %clang -target aarch64 -march=armv8.6a+f64mm -### -c %s 2>&1 | FileCheck -check-prefix=F64MM %s
+// NO-F64MM-NOT: "-target-feature" "+f64mm"
+// F64MM: "-target-feature" "+f64mm"
+
// fullfp16 is off by default for v8a, feature must not be mentioned
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
// RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
diff --git a/clang/test/Driver/arm-matrix-multiply.c b/clang/test/Driver/arm-matrix-multiply.c
new file mode 100644
index 000000000000..b1f620d4b6f7
--- /dev/null
+++ b/clang/test/Driver/arm-matrix-multiply.c
@@ -0,0 +1,14 @@
+// RUN: %clang -### -target arm-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s
+// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.5a+i8mm %s 2>&1 | FileCheck %s
+// CHECK: "-target-feature" "+i8mm"
+// CHECK-NOT: "-target-feature" "-i8mm"
+
+// RUN: %clang -### -target arm-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+// RUN: %clang -### -target aarch64-none-none-eabi -march=armv8.6a+noi8mm %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+// NOI8MM: "-target-feature" "-i8mm"
+// NOI8MM-NOT: "-target-feature" "+i8mm"
+
+// RUN: %clang -### -target arm-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT
+// RUN: %clang -### -target aarch64-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT
+// ABSENT-NOT: "-target-feature" "+i8mm"
+// ABSENT-NOT: "-target-feature" "-i8mm"
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index 39b24453b93d..3d3f88358b6a 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -88,6 +88,8 @@ AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
+AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm")
+AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm")
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
#undef AARCH64_ARCH_EXT_NAME
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index 1946392180fc..f8b0c2d4b244 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -24,7 +24,7 @@ namespace llvm {
namespace AArch64 {
// Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
+enum ArchExtKind : uint64_t {
AEK_INVALID = 0,
AEK_NONE = 1,
AEK_CRC = 1 << 1,
@@ -57,6 +57,8 @@ enum ArchExtKind : unsigned {
AEK_TME = 1 << 28,
AEK_BF16 = 1 << 29,
AEK_I8MM = 1 << 30,
+ AEK_F32MM = 1ULL << 31,
+ AEK_F64MM = 1ULL << 32,
};
enum class ArchKind {
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 87bd4997914d..5c96225351a9 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -116,7 +116,8 @@ ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
- ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+ ARM::AEK_I8MM))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -171,6 +172,7 @@ ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16")
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
+ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm", "-i8mm")
ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0")
ARM_ARCH_EXT_NAME("cdecp1", ARM::AEK_CDECP1, "+cdecp1", "-cdecp1")
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index 0de333031e6d..a378f739315c 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -47,14 +47,15 @@ enum ArchExtKind : uint64_t {
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
AEK_BF16 = 1 << 20,
- AEK_CDECP0 = 1 << 21,
- AEK_CDECP1 = 1 << 22,
- AEK_CDECP2 = 1 << 23,
- AEK_CDECP3 = 1 << 24,
- AEK_CDECP4 = 1 << 25,
- AEK_CDECP5 = 1 << 26,
- AEK_CDECP6 = 1 << 27,
- AEK_CDECP7 = 1 << 28,
+ AEK_I8MM = 1 << 21,
+ AEK_CDECP0 = 1 << 22,
+ AEK_CDECP1 = 1 << 23,
+ AEK_CDECP2 = 1 << 24,
+ AEK_CDECP3 = 1 << 25,
+ AEK_CDECP4 = 1 << 26,
+ AEK_CDECP5 = 1 << 27,
+ AEK_CDECP6 = 1 << 28,
+ AEK_CDECP7 = 1 << 29,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 825a127e3e08..0b3cc91bae9a 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -636,6 +636,7 @@ TEST(TargetParserTest, ARMArchExtFeature) {
{"maverick", "maverick", nullptr, nullptr},
{"xscale", "noxscale", nullptr, nullptr},
{"sb", "nosb", "+sb", "-sb"},
+ {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
{"mve", "nomve", "+mve", "-mve"},
{"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
@@ -1230,7 +1231,10 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"tme", "notme", "+tme", "-tme"},
{"ssbs", "nossbs", "+ssbs", "-ssbs"},
{"sb", "nosb", "+sb", "-sb"},
- {"predres", "nopredres", "+predres", "-predres"}
+ {"predres", "nopredres", "+predres", "-predres"},
+ {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
+ {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
+ {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
};
for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
More information about the cfe-commits
mailing list