[llvm] [BOLT] Enable hugify for AArch64 (PR #117158)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 06:21:44 PST 2024


https://github.com/alekuz01 updated https://github.com/llvm/llvm-project/pull/117158

>From 813ccfaf3873dc5133d800c073f686573932ab60 Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 12:59:15 +0000
Subject: [PATCH 1/3] [BOLT] Enable hugify for AArch64

---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 17 +++++--
 bolt/runtime/common.h                         |  6 ++-
 bolt/runtime/hugify.cpp                       | 21 ++++++---
 .../AArch64/Inputs/user_func_order.txt        |  2 +
 bolt/test/runtime/AArch64/hugify.c            | 34 ++++++++++++++
 bolt/test/runtime/AArch64/section-order.test  | 10 +++++
 bolt/test/runtime/AArch64/user-func-reorder.c | 44 +++++++++++++++++++
 7 files changed, 123 insertions(+), 11 deletions(-)
 create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt
 create mode 100644 bolt/test/runtime/AArch64/hugify.c
 create mode 100644 bolt/test/runtime/AArch64/section-order.test
 create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 7059a3dd231099..5345d90c4e4163 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -582,8 +582,12 @@ Error RewriteInstance::discoverStorage() {
 
   // Hugify: Additional huge page from left side due to
   // weird ASLR mapping addresses (4KB aligned)
-  if (opts::Hugify && !BC->HasFixedLoadAddress)
+  if (opts::Hugify && !BC->HasFixedLoadAddress) {
     NextAvailableAddress += BC->PageAlign;
+    BC->outs() << "BOLT-INFO: Hugify, Additional huge page from left side due to"
+               << "weird ASLR mapping addresses(4KB aligned): " << NextAvailableAddress
+               << '\n';
+  }
 
   if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
     // This is where the black magic happens. Creating PHDR table in a segment
@@ -5722,17 +5726,22 @@ void RewriteInstance::rewriteFile() {
 
   // Write all allocatable sections - reloc-mode text is written here as well
   for (BinarySection &Section : BC->allocatableSections()) {
-    if (!Section.isFinalized() || !Section.getOutputData())
+    if (!Section.isFinalized() || !Section.getOutputData()) {
+      BC->outs() << "BOLT: new section is finalized or !getOutputData, skip " << Section.getName() << '\n';
       continue;
-    if (Section.isLinkOnly())
+    }
+    if (Section.isLinkOnly()) {
+      BC->outs() << "BOLT: new section is link only, skip " << Section.getName() << '\n';
       continue;
+    }
 
     if (opts::Verbosity >= 1)
       BC->outs() << "BOLT: writing new section " << Section.getName()
                  << "\n data at 0x"
                  << Twine::utohexstr(Section.getAllocAddress()) << "\n of size "
                  << Section.getOutputSize() << "\n at offset "
-                 << Section.getOutputFileOffset() << '\n';
+                 << Section.getOutputFileOffset()
+                 << " with content size " << Section.getOutputContents().size() << '\n';
     OS.seek(Section.getOutputFileOffset());
     Section.write(OS);
   }
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 9b9965bae524eb..593b7b91d20042 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -151,10 +151,12 @@ struct timespec {
   uint64_t tv_nsec; /* nanoseconds */
 };
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__arm64__)
 #include "sys_aarch64.h"
-#else
+#elif defined(__x86_64__)
 #include "sys_x86_64.h"
+#else
+ exit(1);
 #endif
 
 constexpr uint32_t BufSize = 10240;
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 05c1be4f2d70ca..901a2d61e987dd 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -6,7 +6,9 @@
 //
 //===---------------------------------------------------------------------===//
 
-#if defined (__x86_64__) && !defined(__APPLE__)
+#if defined(__x86_64__) \
+    || ( defined(__aarch64__) || defined(__arm64__) )  \
+    && !defined(__APPLE__)
 
 #include "common.h"
 
@@ -73,8 +75,11 @@ static bool hasPagecacheTHPSupport() {
   if (Res < 0)
     return false;
 
-  if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]"))
+  if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) {
+    DEBUG(report(
+              "[hugify] THP support is not enabled.\n");)
     return false;
+  }
 
   struct KernelVersionTy {
     uint32_t major;
@@ -168,10 +173,16 @@ extern "C" void __bolt_hugify_self_impl() {
 extern "C" __attribute((naked)) void __bolt_hugify_self() {
 #if defined(__x86_64__)
   __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
-                                "jmp __bolt_hugify_start_program\n" ::
-                                    :);
+                                "jmp __bolt_hugify_start_program\n"
+                                :::);
+#elif defined(__aarch64__) || defined(__arm64__)
+  __asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL
+                                "adrp x16, __bolt_hugify_start_program\n"
+                                "add x16, x16, #:lo12:__bolt_hugify_start_program\n"
+                                "br x16\n"
+                                :::);
 #else
-  exit(1);
+  __exit(1);
 #endif
 }
 #endif
diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
new file mode 100644
index 00000000000000..48b76cd35f44d4
--- /dev/null
+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
@@ -0,0 +1,2 @@
+main
+fib
diff --git a/bolt/test/runtime/AArch64/hugify.c b/bolt/test/runtime/AArch64/hugify.c
new file mode 100644
index 00000000000000..a54b8952c7007d
--- /dev/null
+++ b/bolt/test/runtime/AArch64/hugify.c
@@ -0,0 +1,34 @@
+// Make sure BOLT correctly processes --hugify option
+
+#include <stdio.h>
+
+int g1 = 1;
+int g2;
+static int sg1 = 1;
+static int sg2;
+
+
+int main(int argc, char **argv) {
+  printf("Hello world %p = %d , %p = %d\n", &g1, g1, &sg1, sg1);
+  printf("%p = %d , %p = %d\n", &g2, g2, &sg2, sg2);
+  return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q
+RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q
+
+RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify
+RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify
+
+RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE
+
+CHECK-NOPIE: Hello world
+
+RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE
+
+CHECK-PIE: Hello world
+
+*/
diff --git a/bolt/test/runtime/AArch64/section-order.test b/bolt/test/runtime/AArch64/section-order.test
new file mode 100644
index 00000000000000..f98af04422f243
--- /dev/null
+++ b/bolt/test/runtime/AArch64/section-order.test
@@ -0,0 +1,10 @@
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
+RUN: llvm-bolt %t.exe -o %t --instrument --hugify -v 10 -debug 2>&1 | tee section_order.log
+RUN: llvm-readelf --section-headers %t | FileCheck %s
+
+## Verify that llvm-bolt outputs new sections in expected order.
+CHECK: .text.bolt.extra.1
+CHECK: .rodata.bolt.extra.1
+CHECK: .data.bolt.extra.1
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
new file mode 100644
index 00000000000000..fcb92bca16259b
--- /dev/null
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
@@ -0,0 +1,44 @@
+/* Checks that BOLT correctly processes a user-provided function list file,
+ * reorder functions according to this list, update hot_start and hot_end
+ * symbols and insert a function to perform hot text mapping during program
+ * startup.
+ */
+#include <stdio.h>
+
+int foo(int x) {
+  return x + 1;
+}
+
+int fib(int x) {
+  if (x < 2)
+    return x;
+  return fib(x - 1) + fib(x - 2);
+}
+
+int bar(int x) {
+  return x - 1;
+}
+
+int main(int argc, char **argv) {
+  printf("fib(%d) = %d\n", argc, fib(argc));
+  return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
+
+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
+RUN:   --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
+RUN: llvm-nm --numeric-sort --print-armap %t | \
+RUN:   FileCheck %s -check-prefix=CHECK-NM
+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+CHECK-NM:      W  __hot_start
+CHECK-NM:      T main
+CHECK-NM-NEXT: T fib
+CHECK-NM-NEXT: W __hot_end
+
+CHECK-OUTPUT: fib(4) = 3
+*/

>From 2c4933055dc940f62e8ba8b177d6f01e0c59b0fd Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 14:12:06 +0000
Subject: [PATCH 2/3] [BOLT] Enable hugify for AArch64

Make debug message visible only when -debug used and verbosity > 1
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 19 ++++++++++++++-----
 bolt/runtime/hugify.cpp              |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 5345d90c4e4163..bf884594463649 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -584,9 +584,12 @@ Error RewriteInstance::discoverStorage() {
   // weird ASLR mapping addresses (4KB aligned)
   if (opts::Hugify && !BC->HasFixedLoadAddress) {
     NextAvailableAddress += BC->PageAlign;
-    BC->outs() << "BOLT-INFO: Hugify, Additional huge page from left side due to"
-               << "weird ASLR mapping addresses(4KB aligned): " << NextAvailableAddress
-               << '\n';
+    if (opts::Verbosity >= 1) {
+      BC->outs()
+          << "BOLT-INFO: Hugify, Additional huge page from left side due to"
+          << "weird ASLR mapping addresses(4KB aligned): "
+          << NextAvailableAddress << '\n';
+    }
   }
 
   if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
@@ -5727,11 +5730,17 @@ void RewriteInstance::rewriteFile() {
   // Write all allocatable sections - reloc-mode text is written here as well
   for (BinarySection &Section : BC->allocatableSections()) {
     if (!Section.isFinalized() || !Section.getOutputData()) {
-      BC->outs() << "BOLT: new section is finalized or !getOutputData, skip " << Section.getName() << '\n';
+      LLVM_DEBUG(if (opts::Verbosity > 1) {
+        dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip "
+                   << Section.getName() << '\n';
+      });
       continue;
     }
     if (Section.isLinkOnly()) {
-      BC->outs() << "BOLT: new section is link only, skip " << Section.getName() << '\n';
+      LLVM_DEBUG(if (opts::Verbosity > 1) {
+        dbgs() << "BOLT-INFO: new section is link only, skip "
+                   << Section.getName() << '\n';
+      });
       continue;
     }
 
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 901a2d61e987dd..8f84bcb37afb53 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -171,6 +171,7 @@ extern "C" void __bolt_hugify_self_impl() {
 
 /// This is hooking ELF's entry, it needs to save all machine state.
 extern "C" __attribute((naked)) void __bolt_hugify_self() {
+  // clang-format off
 #if defined(__x86_64__)
   __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
                                 "jmp __bolt_hugify_start_program\n"
@@ -184,5 +185,6 @@ extern "C" __attribute((naked)) void __bolt_hugify_self() {
 #else
   __exit(1);
 #endif
+  // clang-format on
 }
 #endif

>From a0c6208eff798dd1c7b9c8ee11edc392d6991da7 Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 14:21:14 +0000
Subject: [PATCH 3/3] [BOLT] Enable hugify for AArch64

Instead of fail exit we do nice macro message
---
 bolt/runtime/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 593b7b91d20042..27d0830071067a 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -156,7 +156,7 @@ struct timespec {
 #elif defined(__x86_64__)
 #include "sys_x86_64.h"
 #else
- exit(1);
+#error "For AArch64/ARM64 and X86_64 only."
 #endif
 
 constexpr uint32_t BufSize = 10240;



More information about the llvm-commits mailing list