[compiler-rt] [AIX][PGO] Handle atexit functions when dlclose'ing shared libraries (PR #102940)

Wael Yehia via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 12 13:40:58 PDT 2024


https://github.com/w2yehia updated https://github.com/llvm/llvm-project/pull/102940

>From cf7c3df59d24b72fc9e5a37b9aad6492babe289f Mon Sep 17 00:00:00 2001
From: Wael Yehia <wyehia at ca.ibm.com>
Date: Fri, 26 Jul 2024 13:10:01 -0400
Subject: [PATCH] [AIX][PGO] Handle atexit functions when dlclose'ing shared
 libraries

Problem:
On AIX, functions registered by atexit in a shared library are not run
when the library is dlclosed, but instead run (and fail because the function
pointer is no longer valid) during main program exit.

The profile-rt registers some functions with atexit:

 1. writeFileWithoutReturn that writes out the profile file
 2. llvm_delete_reset_function_list that does some cleanup in the gcov
    instrumentation library (not sure)

And so right now, we get an "Illegal instruction (core dumped)" when an
instrumented shared object is dlopen'ed and dlclosed.

Solution:
When a shared library is dlclose'd, destructors from the library are
called. So create a destructor function that iterates over all known functions
that profile-rt registers with atexit, and unregister the ones that have been
registered and execute them.

Scenarios tested:

(0) gcov dlopen/dlclose                                       (AIX/gcov-dlopen-dlclose.test)
(1) multiple dlopen/dlclose of the same lib and multiple libs (instrprof-dlopen-dlclose.test)
(2) dlopen but no dlclose                                     (exists: Posix/instrprof-dlopen.test)
(3) a simple fork testcase with dlopen/dlclose                (instrprof-dlopen-dlclose.test)
(4) dlopen/dlclose by multiple threads.                       (instrprof-dlopen-dlclose.test)
(5) regular dynamic-linking of instrumented shared libs       (exists: AIX/shared-bexpall-pgo.c)
(6) a simple fork testcase produces correct profile           (instrprof-fork.c)

Co-authored-by: Hubert Tong <hstong at ca.ibm.com>
---
 compiler-rt/lib/profile/GCDAProfiling.c       |   4 +-
 compiler-rt/lib/profile/InstrProfilingFile.c  |   2 +-
 compiler-rt/lib/profile/InstrProfilingUtil.c  |  69 ++++++++
 compiler-rt/lib/profile/InstrProfilingUtil.h  |   5 +
 .../test/profile/AIX/gcov-dlopen-dlclose.test |  55 ++++++
 .../profile/instrprof-dlopen-dlclose.test     | 162 ++++++++++++++++++
 compiler-rt/test/profile/instrprof-fork.c     |  30 ++++
 7 files changed, 324 insertions(+), 3 deletions(-)
 create mode 100644 compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
 create mode 100644 compiler-rt/test/profile/instrprof-dlopen-dlclose.test
 create mode 100644 compiler-rt/test/profile/instrprof-fork.c

diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c
index 4f46fd2839b909..d6e2175169e4a5 100644
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@@ -617,9 +617,9 @@ void llvm_gcov_init(fn_ptr wfn, fn_ptr rfn) {
     atexit_ran = 1;
 
     /* Make sure we write out the data and delete the data structures. */
-    atexit(llvm_delete_reset_function_list);
+    lprofAtExit(llvm_delete_reset_function_list);
 #ifdef _WIN32
-    atexit(llvm_writeout_and_clear);
+    lprofAtExit(llvm_writeout_and_clear);
 #endif
   }
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index db3918d8410319..60ea689e745478 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1259,7 +1259,7 @@ int __llvm_profile_register_write_file_atexit(void) {
   lprofSetupValueProfiler();
 
   HasBeenRegistered = 1;
-  return atexit(writeFileWithoutReturn);
+  return lprofAtExit(writeFileWithoutReturn);
 }
 
 COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.c b/compiler-rt/lib/profile/InstrProfilingUtil.c
index cd18cba3e268f9..642393d432d7ea 100644
--- a/compiler-rt/lib/profile/InstrProfilingUtil.c
+++ b/compiler-rt/lib/profile/InstrProfilingUtil.c
@@ -373,3 +373,72 @@ COMPILER_RT_VISIBILITY int lprofReleaseMemoryPagesToOS(uintptr_t Begin,
   return 0;
 #endif
 }
+
+#ifdef _AIX
+typedef struct fn_node {
+  AtExit_Fn_ptr func;
+  struct fn_node *next;
+} fn_node;
+typedef struct {
+  fn_node *top;
+} fn_stack;
+
+static void fn_stack_push(fn_stack *, AtExit_Fn_ptr);
+static AtExit_Fn_ptr fn_stack_pop(fn_stack *);
+/* return 1 if stack is empty, 0 otherwise */
+static int fn_stack_is_empty(fn_stack *);
+
+static fn_stack AtExit_stack = {0};
+#define ATEXIT_STACK (&AtExit_stack)
+
+/* On AIX, atexit() functions registered by a shared library do not get called
+ * when the library is dlclose'd, causing a crash when they are eventually
+ * called at main program exit. However, a destructor does get called. So we
+ * collect all atexit functions registered by profile-rt and at program
+ * termination time (normal exit, shared library unload, or dlclose) we walk
+ * the list and execute any function that is still sitting in the atexit system
+ * queue.
+ */
+__attribute__((__destructor__)) static void cleanup() {
+  while (!fn_stack_is_empty(ATEXIT_STACK)) {
+    AtExit_Fn_ptr func = fn_stack_pop(ATEXIT_STACK);
+    if (func && unatexit(func) == 0)
+      func();
+  }
+}
+
+static void fn_stack_push(fn_stack *st, AtExit_Fn_ptr func) {
+  fn_node *old_top, *n = (fn_node *)malloc(sizeof(fn_node));
+  n->func = func;
+
+  while (1) {
+    old_top = st->top;
+    n->next = old_top;
+    if (COMPILER_RT_BOOL_CMPXCHG(&st->top, old_top, n))
+      return;
+  }
+}
+static AtExit_Fn_ptr fn_stack_pop(fn_stack *st) {
+  fn_node *old_top, *new_top;
+  while (1) {
+    old_top = st->top;
+    if (old_top == 0)
+      return 0;
+    new_top = old_top->next;
+    if (COMPILER_RT_BOOL_CMPXCHG(&st->top, old_top, new_top)) {
+      AtExit_Fn_ptr func = old_top->func;
+      free(old_top);
+      return func;
+    }
+  }
+}
+
+static int fn_stack_is_empty(fn_stack *st) { return st->top == 0; }
+#endif
+
+COMPILER_RT_VISIBILITY int lprofAtExit(AtExit_Fn_ptr func) {
+#ifdef _AIX
+  fn_stack_push(ATEXIT_STACK, func);
+#endif
+  return atexit(func);
+}
diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.h b/compiler-rt/lib/profile/InstrProfilingUtil.h
index 4a88a03580941e..841204b6ea8a38 100644
--- a/compiler-rt/lib/profile/InstrProfilingUtil.h
+++ b/compiler-rt/lib/profile/InstrProfilingUtil.h
@@ -84,4 +84,9 @@ static inline size_t lprofRoundDownTo(size_t x, size_t boundary) {
 
 int lprofReleaseMemoryPagesToOS(uintptr_t Begin, uintptr_t End);
 
+typedef void (*AtExit_Fn_ptr)(void);
+
+/* Call atexit and perform other platform-specific bookkeeping. */
+int lprofAtExit(AtExit_Fn_ptr);
+
 #endif /* PROFILE_INSTRPROFILINGUTIL_H */
diff --git a/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test b/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
new file mode 100644
index 00000000000000..21a56bd4199e84
--- /dev/null
+++ b/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
@@ -0,0 +1,55 @@
+RUN: rm -rf %t && split-file %s %t && cd %t
+RUN: %clang foo.c -c --coverage
+RUN: %clang foo2.c -c --coverage
+RUN: %clang -shared foo.o -o shr.o --coverage
+RUN: ar -X32_64 r libfoo.a shr.o
+RUN: %clang -shared foo2.o -o shr.o --coverage
+RUN: ar -X32_64 r libfoo2.a shr.o
+
+RUN: %clang common.c -c --coverage
+
+RUN: %clang test1.c common.o  --coverage
+RUN: ./a.out
+
+//--- foo.c
+void foo() {}
+
+//--- foo2.c
+void foo2() {}
+
+//--- common.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef void (*FN_PTR)();
+int open_close_libs() {
+  void *handle, *handle2;
+  FN_PTR foo, foo2;
+
+#define OPEN_AND_RUN(HANDLE, SUF)                                            \
+  HANDLE = dlopen("./lib" #SUF ".a(shr.o)",RTLD_NOW|RTLD_MEMBER);            \
+  SUF = (void (*)())dlsym(HANDLE, #SUF);                                     \
+  if (SUF == NULL) {                                                         \
+    fprintf(stderr, "unable to lookup symbol '%s': %s\n", #SUF, dlerror());  \
+    return EXIT_FAILURE;                                                     \
+  }                                                                          \
+  SUF();
+
+#define CLOSE_AND_CHECK(HANDLE, SUF)                                         \
+  dlclose(HANDLE);                                                           \
+  system("ls " #SUF ".gc*");
+
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle, foo)
+
+  OPEN_AND_RUN(handle2, foo2)
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle2, foo2)
+  CLOSE_AND_CHECK(handle, foo)
+  return EXIT_SUCCESS;
+}
+//--- test1.c
+int open_close_libs();
+int main() {
+  open_close_libs();
+}
diff --git a/compiler-rt/test/profile/instrprof-dlopen-dlclose.test b/compiler-rt/test/profile/instrprof-dlopen-dlclose.test
new file mode 100644
index 00000000000000..2011cbf4b8ebad
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-dlopen-dlclose.test
@@ -0,0 +1,162 @@
+RUN: rm -rf %t && split-file %s %t && cd %t
+RUN: %clang_pgogen foo.c -c -Xclang -fprofile-instrument-path="default_foo_%m.profraw"
+RUN: %clang_pgogen foo2.c -c -Xclang -fprofile-instrument-path="default_foo2_%m.profraw"
+RUN: %clang_pgogen -shared foo.o -o shr.o -bcdtors:mbr
+RUN: ar -X32_64 r libfoo.a shr.o
+RUN: %clang_pgogen -shared foo2.o -o shr.o
+RUN: ar -X32_64 r libfoo2.a shr.o
+
+RUN: %clang_pgogen common.c -c
+
+RUN: %clang_pgogen test1.c common.o -Xclang -fprofile-instrument-path="default_test1_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO
+RUN: llvm-profdata show default_test1_*.profraw --counts --all-functions 2>&1 | \
+RUN:   FileCheck %s -check-prefix=CHECK-TEST1
+RUN: rm -f default*
+
+RUN: %clang_pgogen test2.c common.o -Xclang -fprofile-instrument-path="default_test2_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO
+RUN: llvm-profdata show default_test2_*.profraw --counts --all-functions 2>&1 | \
+RUN:   FileCheck %s -check-prefix=CHECK-TEST2
+RUN: rm -f default*
+
+RUN: %clangxx_pgogen -lpthread test3.cpp common.o -Xclang -fprofile-instrument-path="default_test3_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO-FOUR-THREADS
+
+CHECK-FOO:  foo:
+CHECK-FOO:    Block counts: [1]
+CHECK-FOO:  foo2:
+CHECK-FOO:    Block counts: [1]
+CHECK-FOO:  foo:
+CHECK-FOO:    Block counts: [2]
+
+CHECK-FOO-FOUR-THREADS:  foo:
+CHECK-FOO-FOUR-THREADS:    Block counts: [8]
+CHECK-FOO-FOUR-THREADS:  foo2:
+CHECK-FOO-FOUR-THREADS:    Block counts: [4]
+
+CHECK-TEST1:  main:
+CHECK-TEST1:    Block counts: [1, 0, 1, 0, 1, 1, 0]
+
+CHECK-TEST2: func1:
+CHECK-TEST2: Block counts: [4]
+CHECK-TEST2:  func2:
+CHECK-TEST2: Block counts: [1]
+
+
+//--- foo.c
+void foo() {}
+
+//--- foo2.c
+void foo2() {}
+
+//--- common.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef void (*FN_PTR)();
+int perform_check = 1;
+
+/* This function dlopen/dlclose libfoo.a twice and libfoo2.a once. Each time it
+ * dlopens a library, it runs the singleton function from that library. So the
+ * final counter value for foo and foo2 in each profile file is 2 and 1, resp.
+ */
+int open_close_libs() {
+  void *handle, *handle2;
+  FN_PTR foo, foo2;
+
+#define OPEN_AND_RUN(HANDLE, SUF)                                            \
+  HANDLE = dlopen("./lib" #SUF ".a(shr.o)",RTLD_NOW|RTLD_MEMBER);            \
+  SUF = (void (*)())dlsym(HANDLE, #SUF);                                     \
+  if (SUF == NULL) {                                                         \
+    fprintf(stderr, "unable to lookup symbol '%s': %s\n", #SUF, dlerror());  \
+    return EXIT_FAILURE;                                                     \
+  }                                                                          \
+  SUF();
+
+#define CHECK_ONLY(SUF)                                                      \
+  system("llvm-profdata show default_" #SUF "_*.profraw --counts --all-functions");
+
+#define CLOSE_AND_CHECK(HANDLE, SUF)                                         \
+  dlclose(HANDLE);                                                           \
+  if (perform_check) { CHECK_ONLY(SUF) }
+
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle, foo)
+
+  OPEN_AND_RUN(handle2, foo2)
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle2, foo2)
+  CLOSE_AND_CHECK(handle, foo)
+  return EXIT_SUCCESS;
+}
+void check_prof_files() {
+  CHECK_ONLY(foo)
+  CHECK_ONLY(foo2)
+}
+
+//--- test1.c
+int open_close_libs();
+int main() {
+  open_close_libs();
+}
+
+//--- test2.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+__attribute__((noinline)) void func1() {}
+__attribute__((noinline)) void func2() {}
+void open_close_libs();
+
+int main(void) {
+  int status;
+  func1();
+  pid_t pid = fork();
+  if (pid == -1)
+    return 1;
+  if (pid == 0) { // child
+    open_close_libs();
+    func2();
+  }
+  func1();
+  if (pid)
+    wait(&status);
+  return 0;
+}
+
+//--- test3.cpp
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+
+extern "C" void check_prof_files();
+extern "C" void open_close_libs();
+extern int perform_check;
+
+template <typename T>
+void launcher(T func) {
+  auto t1 = std::thread(func);
+  auto t2 = std::thread(func);
+  auto t3 = std::thread(func);
+  auto t4 = std::thread(func);
+
+  t1.join();
+  t2.join();
+  t3.join();
+  t4.join();
+}
+
+int main() {
+  // don't check profiles generate inside open_close_libs because
+  // you'll get non-deterministic output due to threading.
+  perform_check = 0;
+  launcher<>(open_close_libs);
+
+  // instead, check the profiles manually here in the main thread.
+  check_prof_files();
+  return 0;
+}
diff --git a/compiler-rt/test/profile/instrprof-fork.c b/compiler-rt/test/profile/instrprof-fork.c
new file mode 100644
index 00000000000000..8b2bdd056a2f9b
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-fork.c
@@ -0,0 +1,30 @@
+// A simple fork results in two processes writing to the same file
+// RUN: rm -fr %t.profdir
+// RUN: %clang_pgogen=%t.profdir -o %t -O2 %s
+// RUN: %run %t
+// RUN: llvm-profdata show --all-functions --counts %t.profdir/default_*.profraw  | FileCheck %s
+//
+// CHECK: func1:
+// CHECK: Block counts: [4]
+// CHECK:  func2:
+// CHECK: Block counts: [1]
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+__attribute__((noinline)) void func1() {}
+__attribute__((noinline)) void func2() {}
+
+int main(void) {      //   child     | parent
+  int status;         // func1 func2 | func1 func2
+  func1();            //   +1        |   +1        (*)
+  pid_t pid = fork(); //             |
+  if (pid == -1)      //             |
+    return 1;         //             |
+  if (pid == 0)       //             |
+    func2();          //         +1  |
+  func1();            //   +1        |   +1
+  if (pid)            // ------------+------------
+    wait(&status);    //    2     1  |    2    0
+  return 0;           // (*)  the child inherits counter values prior to fork
+}                     //      from the parent in non-continuous mode.



More information about the llvm-commits mailing list