[clang] [FMV][AArch64] Don't optimize backward compatible features in resolver. (PR #90928)

Pavel Iliin via cfe-commits cfe-commits at lists.llvm.org
Thu May 2 18:49:02 PDT 2024


https://github.com/ilinpv created https://github.com/llvm/llvm-project/pull/90928

For arch64 features, such as Branch Target Identification or MTE (Memory Tagging Extension), compatible with targets that lack their support we may encounter scenarios where a binary compiled with MTE for example is executed on both MTE and non-MTE hardware and we still need to detect at runtime whether the MTE feature is available to choose the appropriate function version.
So, we cannot optimize the function multi versioning resolver by removing checks for these features enabled for the target during compilation.

>From 36a72bf75ce1897ab58d422954c54dc40270f942 Mon Sep 17 00:00:00 2001
From: Pavel Iliin <Pavel.Iliin at arm.com>
Date: Wed, 1 May 2024 18:43:47 +0100
Subject: [PATCH] [FMV][AArch64] Don't optimize backward compatible features in
 resolver.

For arch64 features, such as Branch Target Identification or MTE (Memory
Tagging Extension), compatible with targets that lack their support we
may encounter scenarios where a binary compiled with MTE for example is
executed on both MTE and non-MTE hardware and we still need to detect at
runtime whether the MTE feature is available to choose the appropriate
function version.
So, we cannot optimize the function multi versioning resolver by removing
checks for these features enabled for the target during compilation.
---
 clang/lib/CodeGen/CodeGenFunction.cpp         | 10 +++++--
 .../test/CodeGen/attr-target-clones-aarch64.c | 29 +++++++++++++++++--
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 87766a758311d5..39943ed2a415e8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2759,8 +2759,14 @@ llvm::Value *CodeGenFunction::FormAArch64ResolverCondition(
     const MultiVersionResolverOption &RO) {
   llvm::SmallVector<StringRef, 8> CondFeatures;
   for (const StringRef &Feature : RO.Conditions.Features) {
-    // Form condition for features which are not yet enabled in target
-    if (!getContext().getTargetInfo().hasFeature(Feature))
+    // Optimize the Function Multi Versioning resolver by creating conditions
+    // only for features that are not enabled in the target. The exception is
+    // for features whose extension instructions are executed as NOP on targets
+    // without extension support.
+    if (!getContext().getTargetInfo().hasFeature(Feature) ||
+        Feature.equals("bti") || Feature.equals("memtag") ||
+        Feature.equals("memtag2") || Feature.equals("memtag3") ||
+        Feature.equals("dgh"))
       CondFeatures.push_back(Feature);
   }
   if (!CondFeatures.empty()) {
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index f75d8a69ebf02f..603d067864b45a 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -526,8 +526,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
 // CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
 // CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
 // CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK-MTE-BTI:       resolver_return:
@@ -604,7 +604,24 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 //
 // CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver(
 // CHECK-MTE-BTI-NEXT:  resolver_entry:
+// CHECK-MTE-BTI-NEXT:    call void @__init_cpu_features_resolver()
+// CHECK-MTE-BTI-NEXT:    [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624
+// CHECK-MTE-BTI-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624
+// CHECK-MTE-BTI-NEXT:    [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-MTE-BTI-NEXT:    br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK-MTE-BTI:       resolver_return:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_dup3._Mbti
+// CHECK-MTE-BTI:       resolver_else:
+// CHECK-MTE-BTI-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 17592186044416
+// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17592186044416
+// CHECK-MTE-BTI-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-MTE-BTI-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK-MTE-BTI:       resolver_return1:
+// CHECK-MTE-BTI-NEXT:    ret ptr @ftc_dup3._Mmemtag2
+// CHECK-MTE-BTI:       resolver_else2:
+// CHECK-MTE-BTI-NEXT:    ret ptr @ftc_dup3.default
 //
 //
 // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone
@@ -712,7 +729,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-MTE-BTI:       resolver_return:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_inline3._MsbMsve
 // CHECK-MTE-BTI:       resolver_else:
+// CHECK-MTE-BTI-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
+// CHECK-MTE-BTI-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
+// CHECK-MTE-BTI-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-MTE-BTI-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK-MTE-BTI:       resolver_return1:
 // CHECK-MTE-BTI-NEXT:    ret ptr @ftc_inline3._Mbti
+// CHECK-MTE-BTI:       resolver_else2:
+// CHECK-MTE-BTI-NEXT:    ret ptr @ftc_inline3.default
 //
 //
 // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone



More information about the cfe-commits mailing list