[compiler-rt] [clang] [llvm] [AArch64][FMV] Support feature MOPS in Function Multi Versioning. (PR #78788)
Alexandros Lamprineas via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 19 13:29:06 PST 2024
https://github.com/labrinea created https://github.com/llvm/llvm-project/pull/78788
The patch adds support for FEAT_MOPS (Memory Copy and Memory Set instructions) in Function Multi Versioning. The bits [19:16] of the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS is implemented in AArch64 state.
>From bbf4a8885905a03a614af1d5010347f5ae42f08e Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 19 Jan 2024 21:07:46 +0000
Subject: [PATCH] [AArch64][FMV] Support feature MOPS in Function Multi
Versioning.
The patch adds support for FEAT_MOPS (Memory Copy and Memory Set
instructions) in Function Multi Versioning. The bits [19:16] of
the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS
is implemented in AArch64 state.
---
clang/test/CodeGen/attr-target-version.c | 30 +++++++++----------
clang/test/Sema/attr-target-clones-aarch64.c | 2 +-
clang/test/SemaCXX/attr-target-version.cpp | 1 +
compiler-rt/lib/builtins/cpu_model/aarch64.c | 1 +
.../builtins/cpu_model/aarch64/fmv/mrs.inc | 5 ++++
.../llvm/TargetParser/AArch64TargetParser.h | 3 +-
6 files changed, 25 insertions(+), 17 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index a7cd6f7bf802c5..fb8834b46b9104 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
-inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; }
+inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }
__attribute__((target_version("ls64"))) int fmv_e(void);
@@ -265,36 +265,36 @@ int hoo(void) {
// CHECK-NEXT: ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
-// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
+// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064
+// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
-// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
+// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
-// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
+// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
+// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
-// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
+// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
// CHECK: resolver_else4:
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
-// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
+// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184
+// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184
// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
// CHECK: resolver_return5:
-// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
+// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
// CHECK: resolver_else6:
// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576
-// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576
+// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
// CHECK: resolver_return7:
-// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3
+// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
// CHECK: resolver_else8:
// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
@@ -437,7 +437,7 @@ int hoo(void) {
// CHECK-LABEL: @fmv_inline._Msve2-sm4Mmemtag2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 10
-// CHECK-LABEL: @fmv_inline._Mrcpc3Mmemtag3(
+// CHECK-LABEL: @fmv_inline._Mrcpc3Mmemtag3Mmops(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 11
// CHECK-LABEL: @fmv_inline(
@@ -534,7 +534,7 @@ int hoo(void) {
// CHECK: attributes #20 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
// CHECK: attributes #21 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
// CHECK: attributes #22 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
-// CHECK: attributes #23 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" }
+// CHECK: attributes #23 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" }
// CHECK: attributes #24 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
index 9adabf87732130..4054b7c837ec99 100644
--- a/clang/test/Sema/attr-target-clones-aarch64.c
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
-void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void);
+void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void);
// expected-warning at +1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void __attribute__((target_clones("default+sha3"))) warn1(void);
diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp
index 2e262cda367749..5c542ad2e2dcab 100644
--- a/clang/test/SemaCXX/attr-target-version.cpp
+++ b/clang/test/SemaCXX/attr-target-version.cpp
@@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void);
void __attribute__((target_version("dotprod"))) no_def(void);
void __attribute__((target_version("rdm+fp"))) no_def(void);
void __attribute__((target_version("rcpc3"))) no_def(void);
+void __attribute__((target_version("mops"))) no_def(void);
// expected-error at +1 {{no matching function for call to 'no_def'}}
void foo(void) { no_def(); }
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 44e1cf49d1e927..17bddfca46f094 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -115,6 +115,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
+ FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
// in __aarch64_cpu_features
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
index 2f712f41f49792..e63eea38dc70b1 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -172,6 +172,11 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
// ID_AA64ISAR1_EL1.LS64 >= 0b0011
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
+
+ getCPUFeature(ID_AA64ISAR2_EL1, ftr);
+ // ID_AA64ISAR2_EL1.MOPS == 0b0001
+ if (extractBits(ftr, 16, 4) == 0x1)
+ setCPUFeature(FEAT_MOPS);
} else {
// Set some features in case of no CPUID support
if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index ddf5ab4d2df36c..623fdc21ba65a6 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -94,6 +94,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
+ FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62,
FEAT_INIT
@@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440},
{"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450},
{"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460},
- {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0},
+ {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650},
{"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0},
{"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160},
{"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0},
More information about the cfe-commits
mailing list