[compiler-rt] [llvm] [clang] [AArch64][FMV] Support feature MOPS in Function Multi Versioning. (PR #78788)

Alexandros Lamprineas via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 23 01:51:17 PST 2024


https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/78788

>From 4f639d0ca7edc5a1e8369e6ebec10458e0836948 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Fri, 19 Jan 2024 21:07:46 +0000
Subject: [PATCH] [AArch64][FMV] Support feature MOPS in Function Multi
 Versioning.

The patch adds support for FEAT_MOPS (Memory Copy and Memory Set
instructions) in Function Multi Versioning. The bits [19:16] of
the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS
is implemented in AArch64 state. This information is accessible
via ELF hwcaps.
---
 clang/test/CodeGen/attr-target-version.c      | 30 +++++++++----------
 clang/test/Sema/attr-target-clones-aarch64.c  |  2 +-
 clang/test/SemaCXX/attr-target-version.cpp    |  1 +
 compiler-rt/lib/builtins/cpu_model/aarch64.c  |  1 +
 .../builtins/cpu_model/aarch64/fmv/mrs.inc    |  2 ++
 .../lib/builtins/cpu_model/aarch64/hwcap.inc  |  3 ++
 .../llvm/TargetParser/AArch64TargetParser.h   |  3 +-
 7 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index 13b895ad8e9b891..89279852a8c91da 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re
 inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
 inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
 inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
-inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; }
+inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
 inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }
 
 __attribute__((target_version("ls64"))) int fmv_e(void);
@@ -272,36 +272,36 @@ int hoo(void) {
 // CHECK-NEXT:    ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme
 // CHECK:       resolver_else:
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
+// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064
+// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064
 // CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
 // CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
 // CHECK:       resolver_return1:
-// CHECK-NEXT:    ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
+// CHECK-NEXT:    ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
-// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
+// CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
+// CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
 // CHECK-NEXT:    [[TMP11:%.*]] = and i1 true, [[TMP10]]
 // CHECK-NEXT:    br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
 // CHECK:       resolver_return3:
-// CHECK-NEXT:    ret ptr @fmv_inline._Msha1MpmullMf64mm
+// CHECK-NEXT:    ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
 // CHECK:       resolver_else4:
 // CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
+// CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184
 // CHECK-NEXT:    [[TMP15:%.*]] = and i1 true, [[TMP14]]
 // CHECK-NEXT:    br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
 // CHECK:       resolver_return5:
-// CHECK-NEXT:    ret ptr @fmv_inline._Msha3Mi8mmMf32mm
+// CHECK-NEXT:    ret ptr @fmv_inline._Msha1MpmullMf64mm
 // CHECK:       resolver_else6:
 // CHECK-NEXT:    [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576
-// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576
+// CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
+// CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
 // CHECK-NEXT:    [[TMP19:%.*]] = and i1 true, [[TMP18]]
 // CHECK-NEXT:    br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
 // CHECK:       resolver_return7:
-// CHECK-NEXT:    ret ptr @fmv_inline._Mrcpc3Mmemtag3
+// CHECK-NEXT:    ret ptr @fmv_inline._Msha3Mi8mmMf32mm
 // CHECK:       resolver_else8:
 // CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
@@ -609,7 +609,7 @@ int hoo(void) {
 //
 //
 // CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3
+// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3Mmops
 // CHECK-SAME: () #[[ATTR23:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 11
@@ -768,7 +768,7 @@ int hoo(void) {
 // CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
 // CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
 // CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
-// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" }
+// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" }
 // CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
 //.
 // CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
index 9adabf877321306..4054b7c837ec99b 100644
--- a/clang/test/Sema/attr-target-clones-aarch64.c
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple aarch64-linux-gnu  -fsyntax-only -verify %s
 
-void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void);
+void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void);
 
 // expected-warning at +1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
 void __attribute__((target_clones("default+sha3"))) warn1(void);
diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp
index 2e262cda3677499..5c542ad2e2dcab4 100644
--- a/clang/test/SemaCXX/attr-target-version.cpp
+++ b/clang/test/SemaCXX/attr-target-version.cpp
@@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void);
 void __attribute__((target_version("dotprod"))) no_def(void);
 void __attribute__((target_version("rdm+fp"))) no_def(void);
 void __attribute__((target_version("rcpc3"))) no_def(void);
+void __attribute__((target_version("mops"))) no_def(void);
 
 // expected-error at +1 {{no matching function for call to 'no_def'}}
 void foo(void) { no_def(); }
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index 44e1cf49d1e9274..17bddfca46f094b 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -115,6 +115,7 @@ enum CPUFeatures {
   FEAT_SME_I64,
   FEAT_SME2,
   FEAT_RCPC3,
+  FEAT_MOPS,
   FEAT_MAX,
   FEAT_EXT = 62, // Reserved to indicate presence of additional features field
                  // in __aarch64_cpu_features
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
index 2f712f41f497924..32a21a2fba9a31a 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -109,6 +109,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_SME_I64);
   if (hwcap2 & HWCAP2_SME_F64F64)
     setCPUFeature(FEAT_SME_F64);
+  if (hwcap2 & HWCAP2_MOPS)
+    setCPUFeature(FEAT_MOPS);
   if (hwcap & HWCAP_CPUID) {
     unsigned long ftr;
     getCPUFeature(ID_AA64PFR1_EL1, ftr);
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
index 328d9c4140b9c62..7ddc125b26da71f 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
@@ -178,3 +178,6 @@
 #ifndef HWCAP2_SVE_EBF16
 #define HWCAP2_SVE_EBF16 (1ULL << 33)
 #endif
+#ifndef HWCAP2_MOPS
+#define HWCAP2_MOPS (1ULL << 43)
+#endif
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index ddf5ab4d2df36c1..623fdc21ba65a6e 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -94,6 +94,7 @@ enum CPUFeatures {
   FEAT_SME_I64,
   FEAT_SME2,
   FEAT_RCPC3,
+  FEAT_MOPS,
   FEAT_MAX,
   FEAT_EXT = 62,
   FEAT_INIT
@@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = {
     {"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440},
     {"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450},
     {"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460},
-    {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0},
+    {"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650},
     {"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0},
     {"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160},
     {"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0},



More information about the cfe-commits mailing list