[llvm] [BOLT] Enable hugify for AArch64 (PR #117158)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 20 11:56:03 PST 2024
https://github.com/alekuz01 updated https://github.com/llvm/llvm-project/pull/117158
>From 813ccfaf3873dc5133d800c073f686573932ab60 Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 12:59:15 +0000
Subject: [PATCH 1/5] [BOLT] Enable hugify for AArch64
---
bolt/lib/Rewrite/RewriteInstance.cpp | 17 +++++--
bolt/runtime/common.h | 6 ++-
bolt/runtime/hugify.cpp | 21 ++++++---
.../AArch64/Inputs/user_func_order.txt | 2 +
bolt/test/runtime/AArch64/hugify.c | 34 ++++++++++++++
bolt/test/runtime/AArch64/section-order.test | 10 +++++
bolt/test/runtime/AArch64/user-func-reorder.c | 44 +++++++++++++++++++
7 files changed, 123 insertions(+), 11 deletions(-)
create mode 100644 bolt/test/runtime/AArch64/Inputs/user_func_order.txt
create mode 100644 bolt/test/runtime/AArch64/hugify.c
create mode 100644 bolt/test/runtime/AArch64/section-order.test
create mode 100644 bolt/test/runtime/AArch64/user-func-reorder.c
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 7059a3dd231099..5345d90c4e4163 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -582,8 +582,12 @@ Error RewriteInstance::discoverStorage() {
// Hugify: Additional huge page from left side due to
// weird ASLR mapping addresses (4KB aligned)
- if (opts::Hugify && !BC->HasFixedLoadAddress)
+ if (opts::Hugify && !BC->HasFixedLoadAddress) {
NextAvailableAddress += BC->PageAlign;
+ BC->outs() << "BOLT-INFO: Hugify, Additional huge page from left side due to"
+ << "weird ASLR mapping addresses(4KB aligned): " << NextAvailableAddress
+ << '\n';
+ }
if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
// This is where the black magic happens. Creating PHDR table in a segment
@@ -5722,17 +5726,22 @@ void RewriteInstance::rewriteFile() {
// Write all allocatable sections - reloc-mode text is written here as well
for (BinarySection &Section : BC->allocatableSections()) {
- if (!Section.isFinalized() || !Section.getOutputData())
+ if (!Section.isFinalized() || !Section.getOutputData()) {
+ BC->outs() << "BOLT: new section is finalized or !getOutputData, skip " << Section.getName() << '\n';
continue;
- if (Section.isLinkOnly())
+ }
+ if (Section.isLinkOnly()) {
+ BC->outs() << "BOLT: new section is link only, skip " << Section.getName() << '\n';
continue;
+ }
if (opts::Verbosity >= 1)
BC->outs() << "BOLT: writing new section " << Section.getName()
<< "\n data at 0x"
<< Twine::utohexstr(Section.getAllocAddress()) << "\n of size "
<< Section.getOutputSize() << "\n at offset "
- << Section.getOutputFileOffset() << '\n';
+ << Section.getOutputFileOffset()
+ << " with content size " << Section.getOutputContents().size() << '\n';
OS.seek(Section.getOutputFileOffset());
Section.write(OS);
}
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 9b9965bae524eb..593b7b91d20042 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -151,10 +151,12 @@ struct timespec {
uint64_t tv_nsec; /* nanoseconds */
};
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__arm64__)
#include "sys_aarch64.h"
-#else
+#elif defined(__x86_64__)
#include "sys_x86_64.h"
+#else
+ exit(1);
#endif
constexpr uint32_t BufSize = 10240;
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 05c1be4f2d70ca..901a2d61e987dd 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -6,7 +6,9 @@
//
//===---------------------------------------------------------------------===//
-#if defined (__x86_64__) && !defined(__APPLE__)
+#if defined(__x86_64__) \
+ || ( defined(__aarch64__) || defined(__arm64__) ) \
+ && !defined(__APPLE__)
#include "common.h"
@@ -73,8 +75,11 @@ static bool hasPagecacheTHPSupport() {
if (Res < 0)
return false;
- if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]"))
+ if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) {
+ DEBUG(report(
+ "[hugify] THP support is not enabled.\n");)
return false;
+ }
struct KernelVersionTy {
uint32_t major;
@@ -168,10 +173,16 @@ extern "C" void __bolt_hugify_self_impl() {
extern "C" __attribute((naked)) void __bolt_hugify_self() {
#if defined(__x86_64__)
__asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
- "jmp __bolt_hugify_start_program\n" ::
- :);
+ "jmp __bolt_hugify_start_program\n"
+ :::);
+#elif defined(__aarch64__) || defined(__arm64__)
+ __asm__ __volatile__(SAVE_ALL "bl __bolt_hugify_self_impl\n" RESTORE_ALL
+ "adrp x16, __bolt_hugify_start_program\n"
+ "add x16, x16, #:lo12:__bolt_hugify_start_program\n"
+ "br x16\n"
+ :::);
#else
- exit(1);
+ __exit(1);
#endif
}
#endif
diff --git a/bolt/test/runtime/AArch64/Inputs/user_func_order.txt b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
new file mode 100644
index 00000000000000..48b76cd35f44d4
--- /dev/null
+++ b/bolt/test/runtime/AArch64/Inputs/user_func_order.txt
@@ -0,0 +1,2 @@
+main
+fib
diff --git a/bolt/test/runtime/AArch64/hugify.c b/bolt/test/runtime/AArch64/hugify.c
new file mode 100644
index 00000000000000..a54b8952c7007d
--- /dev/null
+++ b/bolt/test/runtime/AArch64/hugify.c
@@ -0,0 +1,34 @@
+// Make sure BOLT correctly processes --hugify option
+
+#include <stdio.h>
+
+int g1 = 1;
+int g2;
+static int sg1 = 1;
+static int sg2;
+
+
+int main(int argc, char **argv) {
+ printf("Hello world %p = %d , %p = %d\n", &g1, g1, &sg1, sg1);
+ printf("%p = %d , %p = %d\n", &g2, g2, &sg2, sg2);
+ return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q
+RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q
+
+RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify
+RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify
+
+RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE
+
+CHECK-NOPIE: Hello world
+
+RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE
+
+CHECK-PIE: Hello world
+
+*/
diff --git a/bolt/test/runtime/AArch64/section-order.test b/bolt/test/runtime/AArch64/section-order.test
new file mode 100644
index 00000000000000..f98af04422f243
--- /dev/null
+++ b/bolt/test/runtime/AArch64/section-order.test
@@ -0,0 +1,10 @@
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
+RUN: llvm-bolt %t.exe -o %t --instrument --hugify -v 10 -debug 2>&1 | tee section_order.log
+RUN: llvm-readelf --section-headers %t | FileCheck %s
+
+## Verify that llvm-bolt outputs new sections in expected order.
+CHECK: .text.bolt.extra.1
+CHECK: .rodata.bolt.extra.1
+CHECK: .data.bolt.extra.1
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
new file mode 100644
index 00000000000000..fcb92bca16259b
--- /dev/null
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
@@ -0,0 +1,44 @@
+/* Checks that BOLT correctly processes a user-provided function list file,
+ * reorder functions according to this list, update hot_start and hot_end
+ * symbols and insert a function to perform hot text mapping during program
+ * startup.
+ */
+#include <stdio.h>
+
+int foo(int x) {
+ return x + 1;
+}
+
+int fib(int x) {
+ if (x < 2)
+ return x;
+ return fib(x - 1) + fib(x - 2);
+}
+
+int bar(int x) {
+ return x - 1;
+}
+
+int main(int argc, char **argv) {
+ printf("fib(%d) = %d\n", argc, fib(argc));
+ return 0;
+}
+
+/*
+REQUIRES: system-linux,bolt-runtime
+
+RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q
+
+RUN: llvm-bolt %t.exe --relocs=1 --lite --reorder-functions=user \
+RUN: --hugify --function-order=%p/Inputs/user_func_order.txt -o %t
+RUN: llvm-nm --numeric-sort --print-armap %t | \
+RUN: FileCheck %s -check-prefix=CHECK-NM
+RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT
+
+CHECK-NM: W __hot_start
+CHECK-NM: T main
+CHECK-NM-NEXT: T fib
+CHECK-NM-NEXT: W __hot_end
+
+CHECK-OUTPUT: fib(4) = 3
+*/
>From 2c4933055dc940f62e8ba8b177d6f01e0c59b0fd Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 14:12:06 +0000
Subject: [PATCH 2/5] [BOLT] Enable hugify for AArch64
Make debug message visible only when -debug used and verbosity > 1
---
bolt/lib/Rewrite/RewriteInstance.cpp | 19 ++++++++++++++-----
bolt/runtime/hugify.cpp | 2 ++
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 5345d90c4e4163..bf884594463649 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -584,9 +584,12 @@ Error RewriteInstance::discoverStorage() {
// weird ASLR mapping addresses (4KB aligned)
if (opts::Hugify && !BC->HasFixedLoadAddress) {
NextAvailableAddress += BC->PageAlign;
- BC->outs() << "BOLT-INFO: Hugify, Additional huge page from left side due to"
- << "weird ASLR mapping addresses(4KB aligned): " << NextAvailableAddress
- << '\n';
+ if (opts::Verbosity >= 1) {
+ BC->outs()
+ << "BOLT-INFO: Hugify, Additional huge page from left side due to"
+ << "weird ASLR mapping addresses(4KB aligned): "
+ << NextAvailableAddress << '\n';
+ }
}
if (!opts::UseGnuStack && !BC->IsLinuxKernel) {
@@ -5727,11 +5730,17 @@ void RewriteInstance::rewriteFile() {
// Write all allocatable sections - reloc-mode text is written here as well
for (BinarySection &Section : BC->allocatableSections()) {
if (!Section.isFinalized() || !Section.getOutputData()) {
- BC->outs() << "BOLT: new section is finalized or !getOutputData, skip " << Section.getName() << '\n';
+ LLVM_DEBUG(if (opts::Verbosity > 1) {
+ dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip "
+ << Section.getName() << '\n';
+ });
continue;
}
if (Section.isLinkOnly()) {
- BC->outs() << "BOLT: new section is link only, skip " << Section.getName() << '\n';
+ LLVM_DEBUG(if (opts::Verbosity > 1) {
+ dbgs() << "BOLT-INFO: new section is link only, skip "
+ << Section.getName() << '\n';
+ });
continue;
}
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 901a2d61e987dd..8f84bcb37afb53 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -171,6 +171,7 @@ extern "C" void __bolt_hugify_self_impl() {
/// This is hooking ELF's entry, it needs to save all machine state.
extern "C" __attribute((naked)) void __bolt_hugify_self() {
+ // clang-format off
#if defined(__x86_64__)
__asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL
"jmp __bolt_hugify_start_program\n"
@@ -184,5 +185,6 @@ extern "C" __attribute((naked)) void __bolt_hugify_self() {
#else
__exit(1);
#endif
+ // clang-format on
}
#endif
>From a0c6208eff798dd1c7b9c8ee11edc392d6991da7 Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Thu, 21 Nov 2024 14:21:14 +0000
Subject: [PATCH 3/5] [BOLT] Enable hugify for AArch64
Instead of fail exit we do nice macro message
---
bolt/runtime/common.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
index 593b7b91d20042..27d0830071067a 100644
--- a/bolt/runtime/common.h
+++ b/bolt/runtime/common.h
@@ -156,7 +156,7 @@ struct timespec {
#elif defined(__x86_64__)
#include "sys_x86_64.h"
#else
- exit(1);
+#error "For AArch64/ARM64 and X86_64 only."
#endif
constexpr uint32_t BufSize = 10240;
>From 7ed49941150324bd6bd3cd153da5d805bcb97ac4 Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Tue, 3 Dec 2024 13:10:08 +0000
Subject: [PATCH 4/5] [BOLT] Enable hugify for AArch64
Minor fix fto show a proper system arch for runtime libs
---
bolt/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
index 9ac196ad0e8210..d035c0692ad06c 100644
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@@ -136,7 +136,7 @@ if (LLVM_INCLUDE_TESTS)
endif()
if (BOLT_ENABLE_RUNTIME)
- message(STATUS "Building BOLT runtime libraries for X86")
+ message(STATUS "Building BOLT runtime libraries for ${CMAKE_SYSTEM_PROCESSOR}")
set(extra_args "")
if(CMAKE_SYSROOT)
list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
>From c1dd410f498129c2d02cbaf808aafa103709265f Mon Sep 17 00:00:00 2001
From: Aleksei Kuzmenko <aleksei.kuzmenko at arm.com>
Date: Fri, 20 Dec 2024 19:55:07 +0000
Subject: [PATCH 5/5] [BOLT] Enable hugify for AArch64
Fix clang-format.
---
bolt/lib/Rewrite/RewriteInstance.cpp | 8 ++++----
bolt/runtime/hugify.cpp | 8 +++-----
bolt/test/runtime/AArch64/hugify.c | 1 -
bolt/test/runtime/AArch64/user-func-reorder.c | 8 ++------
4 files changed, 9 insertions(+), 16 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index bf884594463649..14f8b6c8cc8e2a 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5732,14 +5732,14 @@ void RewriteInstance::rewriteFile() {
if (!Section.isFinalized() || !Section.getOutputData()) {
LLVM_DEBUG(if (opts::Verbosity > 1) {
dbgs() << "BOLT-INFO: new section is finalized or !getOutputData, skip "
- << Section.getName() << '\n';
+ << Section.getName() << '\n';
});
continue;
}
if (Section.isLinkOnly()) {
LLVM_DEBUG(if (opts::Verbosity > 1) {
dbgs() << "BOLT-INFO: new section is link only, skip "
- << Section.getName() << '\n';
+ << Section.getName() << '\n';
});
continue;
}
@@ -5749,8 +5749,8 @@ void RewriteInstance::rewriteFile() {
<< "\n data at 0x"
<< Twine::utohexstr(Section.getAllocAddress()) << "\n of size "
<< Section.getOutputSize() << "\n at offset "
- << Section.getOutputFileOffset()
- << " with content size " << Section.getOutputContents().size() << '\n';
+ << Section.getOutputFileOffset() << " with content size "
+ << Section.getOutputContents().size() << '\n';
OS.seek(Section.getOutputFileOffset());
Section.write(OS);
}
diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp
index 8f84bcb37afb53..a98227070bbd7a 100644
--- a/bolt/runtime/hugify.cpp
+++ b/bolt/runtime/hugify.cpp
@@ -6,9 +6,8 @@
//
//===---------------------------------------------------------------------===//
-#if defined(__x86_64__) \
- || ( defined(__aarch64__) || defined(__arm64__) ) \
- && !defined(__APPLE__)
+#if defined(__x86_64__) || \
+ (defined(__aarch64__) || defined(__arm64__)) && !defined(__APPLE__)
#include "common.h"
@@ -76,8 +75,7 @@ static bool hasPagecacheTHPSupport() {
return false;
if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) {
- DEBUG(report(
- "[hugify] THP support is not enabled.\n");)
+ DEBUG(report("[hugify] THP support is not enabled.\n");)
return false;
}
diff --git a/bolt/test/runtime/AArch64/hugify.c b/bolt/test/runtime/AArch64/hugify.c
index a54b8952c7007d..c16e8589eb61c9 100644
--- a/bolt/test/runtime/AArch64/hugify.c
+++ b/bolt/test/runtime/AArch64/hugify.c
@@ -7,7 +7,6 @@ int g2;
static int sg1 = 1;
static int sg2;
-
int main(int argc, char **argv) {
printf("Hello world %p = %d , %p = %d\n", &g1, g1, &sg1, sg1);
printf("%p = %d , %p = %d\n", &g2, g2, &sg2, sg2);
diff --git a/bolt/test/runtime/AArch64/user-func-reorder.c b/bolt/test/runtime/AArch64/user-func-reorder.c
index fcb92bca16259b..3f144f5f808816 100644
--- a/bolt/test/runtime/AArch64/user-func-reorder.c
+++ b/bolt/test/runtime/AArch64/user-func-reorder.c
@@ -5,9 +5,7 @@
*/
#include <stdio.h>
-int foo(int x) {
- return x + 1;
-}
+int foo(int x) { return x + 1; }
int fib(int x) {
if (x < 2)
@@ -15,9 +13,7 @@ int fib(int x) {
return fib(x - 1) + fib(x - 2);
}
-int bar(int x) {
- return x - 1;
-}
+int bar(int x) { return x - 1; }
int main(int argc, char **argv) {
printf("fib(%d) = %d\n", argc, fib(argc));
More information about the llvm-commits
mailing list