[compiler-rt] 2abed78 - [AIX][PGO] Handle atexit functions when dlclose'ing shared libraries (#102940)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 27 11:30:37 PDT 2024


Author: Wael Yehia
Date: 2024-08-27T14:30:32-04:00
New Revision: 2abed78b4da0f4eb133e971c70734e664d5f87fd

URL: https://github.com/llvm/llvm-project/commit/2abed78b4da0f4eb133e971c70734e664d5f87fd
DIFF: https://github.com/llvm/llvm-project/commit/2abed78b4da0f4eb133e971c70734e664d5f87fd.diff

LOG: [AIX][PGO] Handle atexit functions when dlclose'ing shared libraries (#102940)

Problem:
On AIX, functions registered by atexit in a shared library are not run
when the library is dlclosed, but instead run (and fail because the
function pointer is no longer valid) during main program exit.

The profile-rt registers some functions with atexit:

 1. writeFileWithoutReturn that writes out the profile file
 2. llvm_delete_reset_function_list that does some cleanup in the gcov 
    instrumentation library (not sure)

And so right now, we get an "Illegal instruction (core dumped)" when an
instrumented shared object is dlopen'ed and dlclosed.

Solution:
  When a shared library is dlclose'd, destructors from the library are
  called. So create a destructor function that iterates over all known
  functions that profile-rt registers with atexit, and unregister the ones
  that have been registered and execute them.

Scenarios tested:
(0) gcov dlopen/dlclose                                       (AIX/gcov-dlopen-dlclose.test)
(1) multiple dlopen/dlclose of the same lib and multiple libs (instrprof-dlopen-dlclose.test)
(2) dlopen but no dlclose                                     (exists: Posix/instrprof-dlopen.test)
(3) a simple fork testcase with dlopen/dlclose                (instrprof-dlopen-dlclose.test)
(4) dlopen/dlclose by multiple threads.                       (instrprof-dlopen-dlclose.test)
(5) regular dynamic-linking of instrumented shared libs       (exists: AIX/shared-bexpall-pgo.c)
(6) a simple fork testcase produces correct profile           (instrprof-fork.c)


---------

Co-authored-by: Hubert Tong <hstong at ca.ibm.com>

Added: 
    compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
    compiler-rt/test/profile/Posix/instrprof-dlopen-norpath.test
    compiler-rt/test/profile/instrprof-fork.c

Modified: 
    compiler-rt/lib/profile/GCDAProfiling.c
    compiler-rt/lib/profile/InstrProfilingFile.c
    compiler-rt/lib/profile/InstrProfilingUtil.c
    compiler-rt/lib/profile/InstrProfilingUtil.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c
index 4f46fd2839b909..d6e2175169e4a5 100644
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@@ -617,9 +617,9 @@ void llvm_gcov_init(fn_ptr wfn, fn_ptr rfn) {
     atexit_ran = 1;
 
     /* Make sure we write out the data and delete the data structures. */
-    atexit(llvm_delete_reset_function_list);
+    lprofAtExit(llvm_delete_reset_function_list);
 #ifdef _WIN32
-    atexit(llvm_writeout_and_clear);
+    lprofAtExit(llvm_writeout_and_clear);
 #endif
   }
 }

diff  --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index db3918d8410319..60ea689e745478 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -1259,7 +1259,7 @@ int __llvm_profile_register_write_file_atexit(void) {
   lprofSetupValueProfiler();
 
   HasBeenRegistered = 1;
-  return atexit(writeFileWithoutReturn);
+  return lprofAtExit(writeFileWithoutReturn);
 }
 
 COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,

diff  --git a/compiler-rt/lib/profile/InstrProfilingUtil.c b/compiler-rt/lib/profile/InstrProfilingUtil.c
index cd18cba3e268f9..642393d432d7ea 100644
--- a/compiler-rt/lib/profile/InstrProfilingUtil.c
+++ b/compiler-rt/lib/profile/InstrProfilingUtil.c
@@ -373,3 +373,72 @@ COMPILER_RT_VISIBILITY int lprofReleaseMemoryPagesToOS(uintptr_t Begin,
   return 0;
 #endif
 }
+
+#ifdef _AIX
+typedef struct fn_node {
+  AtExit_Fn_ptr func;
+  struct fn_node *next;
+} fn_node;
+typedef struct {
+  fn_node *top;
+} fn_stack;
+
+static void fn_stack_push(fn_stack *, AtExit_Fn_ptr);
+static AtExit_Fn_ptr fn_stack_pop(fn_stack *);
+/* return 1 if stack is empty, 0 otherwise */
+static int fn_stack_is_empty(fn_stack *);
+
+static fn_stack AtExit_stack = {0};
+#define ATEXIT_STACK (&AtExit_stack)
+
+/* On AIX, atexit() functions registered by a shared library do not get called
+ * when the library is dlclose'd, causing a crash when they are eventually
+ * called at main program exit. However, a destructor does get called. So we
+ * collect all atexit functions registered by profile-rt and at program
+ * termination time (normal exit, shared library unload, or dlclose) we walk
+ * the list and execute any function that is still sitting in the atexit system
+ * queue.
+ */
+__attribute__((__destructor__)) static void cleanup() {
+  while (!fn_stack_is_empty(ATEXIT_STACK)) {
+    AtExit_Fn_ptr func = fn_stack_pop(ATEXIT_STACK);
+    if (func && unatexit(func) == 0)
+      func();
+  }
+}
+
+static void fn_stack_push(fn_stack *st, AtExit_Fn_ptr func) {
+  fn_node *old_top, *n = (fn_node *)malloc(sizeof(fn_node));
+  n->func = func;
+
+  while (1) {
+    old_top = st->top;
+    n->next = old_top;
+    if (COMPILER_RT_BOOL_CMPXCHG(&st->top, old_top, n))
+      return;
+  }
+}
+static AtExit_Fn_ptr fn_stack_pop(fn_stack *st) {
+  fn_node *old_top, *new_top;
+  while (1) {
+    old_top = st->top;
+    if (old_top == 0)
+      return 0;
+    new_top = old_top->next;
+    if (COMPILER_RT_BOOL_CMPXCHG(&st->top, old_top, new_top)) {
+      AtExit_Fn_ptr func = old_top->func;
+      free(old_top);
+      return func;
+    }
+  }
+}
+
+static int fn_stack_is_empty(fn_stack *st) { return st->top == 0; }
+#endif
+
+COMPILER_RT_VISIBILITY int lprofAtExit(AtExit_Fn_ptr func) {
+#ifdef _AIX
+  fn_stack_push(ATEXIT_STACK, func);
+#endif
+  return atexit(func);
+}

diff  --git a/compiler-rt/lib/profile/InstrProfilingUtil.h b/compiler-rt/lib/profile/InstrProfilingUtil.h
index 4a88a03580941e..841204b6ea8a38 100644
--- a/compiler-rt/lib/profile/InstrProfilingUtil.h
+++ b/compiler-rt/lib/profile/InstrProfilingUtil.h
@@ -84,4 +84,9 @@ static inline size_t lprofRoundDownTo(size_t x, size_t boundary) {
 
 int lprofReleaseMemoryPagesToOS(uintptr_t Begin, uintptr_t End);
 
+typedef void (*AtExit_Fn_ptr)(void);
+
+/* Call atexit and perform other platform-specific bookkeeping. */
+int lprofAtExit(AtExit_Fn_ptr);
+
 #endif /* PROFILE_INSTRPROFILINGUTIL_H */

diff  --git a/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test b/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
new file mode 100644
index 00000000000000..21a56bd4199e84
--- /dev/null
+++ b/compiler-rt/test/profile/AIX/gcov-dlopen-dlclose.test
@@ -0,0 +1,55 @@
+RUN: rm -rf %t && split-file %s %t && cd %t
+RUN: %clang foo.c -c --coverage
+RUN: %clang foo2.c -c --coverage
+RUN: %clang -shared foo.o -o shr.o --coverage
+RUN: ar -X32_64 r libfoo.a shr.o
+RUN: %clang -shared foo2.o -o shr.o --coverage
+RUN: ar -X32_64 r libfoo2.a shr.o
+
+RUN: %clang common.c -c --coverage
+
+RUN: %clang test1.c common.o  --coverage
+RUN: ./a.out
+
+//--- foo.c
+void foo() {}
+
+//--- foo2.c
+void foo2() {}
+
+//--- common.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef void (*FN_PTR)();
+int open_close_libs() {
+  void *handle, *handle2;
+  FN_PTR foo, foo2;
+
+#define OPEN_AND_RUN(HANDLE, SUF)                                            \
+  HANDLE = dlopen("./lib" #SUF ".a(shr.o)",RTLD_NOW|RTLD_MEMBER);            \
+  SUF = (void (*)())dlsym(HANDLE, #SUF);                                     \
+  if (SUF == NULL) {                                                         \
+    fprintf(stderr, "unable to lookup symbol '%s': %s\n", #SUF, dlerror());  \
+    return EXIT_FAILURE;                                                     \
+  }                                                                          \
+  SUF();
+
+#define CLOSE_AND_CHECK(HANDLE, SUF)                                         \
+  dlclose(HANDLE);                                                           \
+  system("ls " #SUF ".gc*");
+
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle, foo)
+
+  OPEN_AND_RUN(handle2, foo2)
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle2, foo2)
+  CLOSE_AND_CHECK(handle, foo)
+  return EXIT_SUCCESS;
+}
+//--- test1.c
+int open_close_libs();
+int main() {
+  open_close_libs();
+}

diff  --git a/compiler-rt/test/profile/Posix/instrprof-dlopen-norpath.test b/compiler-rt/test/profile/Posix/instrprof-dlopen-norpath.test
new file mode 100644
index 00000000000000..8a7cca737dc8a4
--- /dev/null
+++ b/compiler-rt/test/profile/Posix/instrprof-dlopen-norpath.test
@@ -0,0 +1,160 @@
+RUN: rm -rf %t && split-file %s %t && cd %t
+RUN: %clang_pgogen foo.c -c -Xclang -fprofile-instrument-path="default_foo_%m.profraw"
+RUN: %clang_pgogen foo2.c -c -Xclang -fprofile-instrument-path="default_foo2_%m.profraw"
+RUN: %clang_pgogen -shared foo.o -o shr_foo.o %if target={{.*aix.*}} %{ -bcdtors:mbr %}
+RUN: %clang_pgogen -shared foo2.o -o shr_foo2.o
+
+RUN: %clang_pgogen common.c -c
+
+RUN: %clang_pgogen test1.c common.o -Xclang -fprofile-instrument-path="default_test1_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO
+RUN: llvm-profdata show default_test1_*.profraw --counts --all-functions 2>&1 | \
+RUN:   FileCheck %s -check-prefix=CHECK-TEST1
+RUN: rm -f default*
+
+RUN: %clang_pgogen test2.c common.o -Xclang -fprofile-instrument-path="default_test2_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO
+RUN: llvm-profdata show default_test2_*.profraw --counts --all-functions 2>&1 | \
+RUN:   FileCheck %s -check-prefix=CHECK-TEST2
+RUN: rm -f default*
+
+RUN: %clangxx_pgogen -lpthread test3.cpp common.o -Xclang -fprofile-instrument-path="default_test3_%m.profraw"
+RUN: ./a.out 2>&1 | FileCheck %s -check-prefix=CHECK-FOO-FOUR-THREADS
+
+CHECK-FOO:  foo:
+CHECK-FOO:    Block counts: [1]
+CHECK-FOO:  foo2:
+CHECK-FOO:    Block counts: [1]
+CHECK-FOO:  foo:
+CHECK-FOO:    Block counts: [2]
+
+CHECK-FOO-FOUR-THREADS:  foo:
+CHECK-FOO-FOUR-THREADS:    Block counts: [8]
+CHECK-FOO-FOUR-THREADS:  foo2:
+CHECK-FOO-FOUR-THREADS:    Block counts: [4]
+
+CHECK-TEST1:  main:
+CHECK-TEST1:    Block counts: [1, 0, 1, 0, 1, 1, 0]
+
+CHECK-TEST2: func1:
+CHECK-TEST2: Block counts: [4]
+CHECK-TEST2:  func2:
+CHECK-TEST2: Block counts: [1]
+
+
+//--- foo.c
+void foo() {}
+
+//--- foo2.c
+void foo2() {}
+
+//--- common.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+typedef void (*FN_PTR)();
+int perform_check = 1;
+
+/* This function dlopen/dlclose shr_foo.a twice and shr_foo2.a once. Each time it
+ * dlopens a library, it runs the singleton function from that library. So the
+ * final counter value for foo and foo2 in each profile file is 2 and 1, resp.
+ */
+int open_close_libs() {
+  void *handle, *handle2;
+  FN_PTR foo, foo2;
+
+#define OPEN_AND_RUN(HANDLE, SUF)                                            \
+  HANDLE = dlopen("./shr_" #SUF ".o", RTLD_NOW);                             \
+  SUF = (void (*)())dlsym(HANDLE, #SUF);                                     \
+  if (SUF == NULL) {                                                         \
+    fprintf(stderr, "unable to lookup symbol '%s': %s\n", #SUF, dlerror());  \
+    return EXIT_FAILURE;                                                     \
+  }                                                                          \
+  SUF();
+
+#define CHECK_ONLY(SUF)                                                      \
+  system("llvm-profdata show default_" #SUF "_*.profraw --counts --all-functions");
+
+#define CLOSE_AND_CHECK(HANDLE, SUF)                                         \
+  dlclose(HANDLE);                                                           \
+  if (perform_check) { CHECK_ONLY(SUF) }
+
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle, foo)
+
+  OPEN_AND_RUN(handle2, foo2)
+  OPEN_AND_RUN(handle, foo)
+  CLOSE_AND_CHECK(handle2, foo2)
+  CLOSE_AND_CHECK(handle, foo)
+  return EXIT_SUCCESS;
+}
+void check_prof_files() {
+  CHECK_ONLY(foo)
+  CHECK_ONLY(foo2)
+}
+
+//--- test1.c
+int open_close_libs();
+int main() {
+  open_close_libs();
+}
+
+//--- test2.c
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+__attribute__((noinline)) void func1() {}
+__attribute__((noinline)) void func2() {}
+void open_close_libs();
+
+int main(void) {
+  int status;
+  func1();
+  pid_t pid = fork();
+  if (pid == -1)
+    return 1;
+  if (pid == 0) { // child
+    open_close_libs();
+    func2();
+  }
+  func1();
+  if (pid)
+    wait(&status);
+  return 0;
+}
+
+//--- test3.cpp
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+
+extern "C" void check_prof_files();
+extern "C" void open_close_libs();
+extern int perform_check;
+
+template <typename T>
+void launcher(T func) {
+  auto t1 = std::thread(func);
+  auto t2 = std::thread(func);
+  auto t3 = std::thread(func);
+  auto t4 = std::thread(func);
+
+  t1.join();
+  t2.join();
+  t3.join();
+  t4.join();
+}
+
+int main() {
+  // don't check profiles generate inside open_close_libs because
+  // you'll get non-deterministic output due to threading.
+  perform_check = 0;
+  launcher<>(open_close_libs);
+
+  // instead, check the profiles manually here in the main thread.
+  check_prof_files();
+  return 0;
+}

diff  --git a/compiler-rt/test/profile/instrprof-fork.c b/compiler-rt/test/profile/instrprof-fork.c
new file mode 100644
index 00000000000000..8b2bdd056a2f9b
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-fork.c
@@ -0,0 +1,30 @@
+// A simple fork results in two processes writing to the same file
+// RUN: rm -fr %t.profdir
+// RUN: %clang_pgogen=%t.profdir -o %t -O2 %s
+// RUN: %run %t
+// RUN: llvm-profdata show --all-functions --counts %t.profdir/default_*.profraw  | FileCheck %s
+//
+// CHECK: func1:
+// CHECK: Block counts: [4]
+// CHECK:  func2:
+// CHECK: Block counts: [1]
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+__attribute__((noinline)) void func1() {}
+__attribute__((noinline)) void func2() {}
+
+int main(void) {      //   child     | parent
+  int status;         // func1 func2 | func1 func2
+  func1();            //   +1        |   +1        (*)
+  pid_t pid = fork(); //             |
+  if (pid == -1)      //             |
+    return 1;         //             |
+  if (pid == 0)       //             |
+    func2();          //         +1  |
+  func1();            //   +1        |   +1
+  if (pid)            // ------------+------------
+    wait(&status);    //    2     1  |    2    0
+  return 0;           // (*)  the child inherits counter values prior to fork
+}                     //      from the parent in non-continuous mode.


        


More information about the llvm-commits mailing list