[test-suite] r336970 - [XRay][test-suite] Benchmarks for profiling mode implementation

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 12 21:48:41 PDT 2018


Author: dberris
Date: Thu Jul 12 21:48:41 2018
New Revision: 336970

URL: http://llvm.org/viewvc/llvm-project?rev=336970&view=rev
Log:
[XRay][test-suite] Benchmarks for profiling mode implementation

Summary:
This patch adds microbenchmarks for the XRay Profiling Mode
implementation to the test-suite.

The benchmarks included cover:

- Cost of the Profiling Mode runtime handler(s) and underlying
  implementation details when enabled.

- Different benchmarks for different call stack traces. Initially
  showing deep, shallow, and wide function call stacks.

These microbenchmarks can be used to measure progress on the
optimisation work associated with the profiling mode runtime
implementation going forward. It also allows us to better qualify the
cost of the XRay runtime framework (in particular the trampolines) as we
make improvements to those in the future.

Depends on D48653.

Reviewers: kpw, eizan

Reviewed By: eizan

Subscribers: mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D48879

Added:
    test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/
    test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt
    test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc
    test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc
    test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc
Modified:
    test-suite/trunk/MicroBenchmarks/XRay/CMakeLists.txt

Modified: test-suite/trunk/MicroBenchmarks/XRay/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MicroBenchmarks/XRay/CMakeLists.txt?rev=336970&r1=336969&r2=336970&view=diff
==============================================================================
--- test-suite/trunk/MicroBenchmarks/XRay/CMakeLists.txt (original)
+++ test-suite/trunk/MicroBenchmarks/XRay/CMakeLists.txt Thu Jul 12 21:48:41 2018
@@ -1,2 +1,3 @@
 add_subdirectory(ReturnReference)
 add_subdirectory(FDRMode)
+add_subdirectory(ProfilingMode)

Added: test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt?rev=336970&view=auto
==============================================================================
--- test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt (added)
+++ test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/CMakeLists.txt Thu Jul 12 21:48:41 2018
@@ -0,0 +1,20 @@
+check_cxx_compiler_flag(-fxray-instrument COMPILER_HAS_FXRAY_INSTRUMENT)
+check_cxx_compiler_flag(-fxray-modes=xray-profiling
+	COMPILER_HAS_FXRAY_PROFILING)
+if(ARCH STREQUAL "x86"
+		AND COMPILER_HAS_FXRAY_INSTRUMENT
+		AND COMPILER_HAS_FXRAY_PROFILING)
+  list(APPEND CPPFLAGS
+		-std=c++11 -Wl,--gc-sections
+		-fxray-instrument -fxray-modes=xray-profiling)
+  list(APPEND LDFLAGS
+		-fxray-instrument -fxray-modes=xray-profiling)
+  llvm_test_run()
+  llvm_test_executable(deep-call-bench deep-call-bench.cc)
+  target_link_libraries(deep-call-bench benchmark)
+  llvm_test_executable(shallow-call-bench shallow-call-bench.cc)
+  target_link_libraries(shallow-call-bench benchmark)
+  llvm_test_executable(wide-call-bench wide-call-bench.cc)
+  target_link_libraries(wide-call-bench benchmark)
+endif()
+

Added: test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc?rev=336970&view=auto
==============================================================================
--- test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc (added)
+++ test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/deep-call-bench.cc Thu Jul 12 21:48:41 2018
@@ -0,0 +1,90 @@
+//===- deep-call-bench.cc - XRay Profiling Mode Benchmarks ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{1};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+  if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+    std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+      XRAY_LOG_INITIALIZED) {
+    std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+    std::abort();
+  };
+
+  __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+  if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+    std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+    std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+}
+
+}  // namespace
+
+[[clang::xray_always_instrument]] __attribute__((weak))
+__attribute__((noinline)) int
+deep(int depth) {
+  if (depth == 0) return some_global.load(std::memory_order_acquire);
+  return some_global.load(std::memory_order_acquire) + deep(depth - 1);
+}
+
+// This benchmark measures the cost of XRay instrumentation in deep function
+// call stacks, where each function has been instrumented. We use function call
+// recursion to control the depth of the recursion as an input. We make the
+// recursion function a combination of: no-inline, have weak symbol binding, and
+// force instrumentation with XRay. Each iteration of the benchmark will
+// initialize the XRay profiling runtime, and then tear it down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingDeepCallStack(
+    benchmark::State &state) {
+  if (state.thread_index == 0) profiling_setup();
+
+  benchmark::DoNotOptimize(some_temporary = deep(state.range(0)));
+
+  for (auto _ : state)
+    benchmark::DoNotOptimize(some_temporary = deep(state.range(0)));
+
+  if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingDeepCallStack)
+    ->ThreadRange(1, 32)
+    ->RangeMultiplier(2)
+    ->Range(1, 64)
+    ->UseRealTime();
+
+BENCHMARK_MAIN();

Added: test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc?rev=336970&view=auto
==============================================================================
--- test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc (added)
+++ test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/shallow-call-bench.cc Thu Jul 12 21:48:41 2018
@@ -0,0 +1,84 @@
+//===- shallow-call-bench.cc - XRay Profiling Mode Benchmarks -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{0};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+  if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+    std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+      XRAY_LOG_INITIALIZED) {
+    std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+    std::abort();
+  };
+
+  __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+  if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+    std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+    std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+}
+
+}  // namespace
+
+#define XRAY_WEAK_NOINLINE                                \
+  [[clang::xray_always_instrument]] __attribute__((weak)) \
+      __attribute__((noinline))
+
+XRAY_WEAK_NOINLINE int shallow() {
+  return some_global.fetch_add(1, std::memory_order_acq_rel);
+}
+
+// This benchmark measures the cost of XRay instrumentation in shallow function
+// call stack, where we instrument a single function call. We make the function
+// a combination of: no-inline, have weak symbol binding, and force
+// instrumentation with XRay. Each iteration of the benchmark will initialize
+// the XRay profiling runtime, and then tear it down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingShallowStack(
+    benchmark::State &state) {
+  if (state.thread_index == 0) profiling_setup();
+
+  benchmark::DoNotOptimize(some_temporary = shallow());
+  for (auto _ : state) benchmark::DoNotOptimize(some_temporary = shallow());
+
+  if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingShallowStack)->ThreadRange(1, 64)->UseRealTime();
+
+BENCHMARK_MAIN();

Added: test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc?rev=336970&view=auto
==============================================================================
--- test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc (added)
+++ test-suite/trunk/MicroBenchmarks/XRay/ProfilingMode/wide-call-bench.cc Thu Jul 12 21:48:41 2018
@@ -0,0 +1,142 @@
+//===- wide-call-bench.cc - XRay Profiling Mode Benchmarks ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These benchmarks measure the cost of XRay profiling mode when enabled.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <iostream>
+#include <mutex>
+#include <thread>
+#include "benchmark/benchmark.h"
+#include "xray/xray_log_interface.h"
+
+namespace {
+
+std::atomic<int> some_global{1};
+
+std::atomic<int> some_temporary{0};
+
+[[clang::xray_never_instrument]] static void profiling_setup() {
+  if (__xray_log_select_mode("xray-profiling") != XRAY_REGISTRATION_OK) {
+    std::cerr << "Failed selecting 'xray-profiling' mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_init_mode("xray-profiling", "no_flush=true") !=
+      XRAY_LOG_INITIALIZED) {
+    std::cerr << "Failed initializing xray-profiling mode. Aborting.\n";
+    std::abort();
+  };
+
+  __xray_patch();
+}
+
+[[clang::xray_never_instrument]] static void profiling_teardown() {
+  if (__xray_log_finalize() != XRAY_LOG_FINALIZED) {
+    std::cerr << "Failed to finalize xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+
+  if (__xray_log_flushLog() != XRAY_LOG_FLUSHED) {
+    std::cerr << "Failed to flush xray-profiling mode. Aborting.\n";
+    std::abort();
+  }
+}
+
+}  // namespace
+
+#define XRAY_WEAK_NOINLINE                                \
+  [[clang::xray_always_instrument]] __attribute__((weak)) \
+      __attribute__((noinline))
+
+XRAY_WEAK_NOINLINE int wide8() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide7() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide6() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide5() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide4() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide3() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide2() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int wide1() {
+  return some_global.load(std::memory_order_acquire);
+}
+XRAY_WEAK_NOINLINE int call(int depth, int width) {
+  if (depth == 0) return some_global.load(std::memory_order_acquire);
+
+  auto val = 0;
+  switch (width) {
+    default:
+    case 8:
+      val += wide8();
+    case 7:
+      val += wide7();
+    case 6:
+      val += wide6();
+    case 5:
+      val += wide5();
+    case 4:
+      val += wide4();
+    case 3:
+      val += wide3();
+    case 2:
+      val += wide2();
+    case 1:
+      val += wide1();
+  }
+
+  return some_global.load(std::memory_order_acquire) + val +
+         call(depth - 1, width);
+}
+
+// This benchmark measures the cost of XRay instrumentation in wide function
+// call stacks, where each function has been instrumented. We use function call
+// recursion to control the depth of the recursion as an input, as well as an
+// input-controlled branching (non-looping) to determine the width of other
+// functions. We make the recursion function a combination of: no-inline, have
+// weak symbol binding, and force instrumentation with XRay. Each iteration of
+// the benchmark will initialize the XRay profiling runtime, and then tear it
+// down afterwards.
+//
+// We also run the benchmark on multiple threads, to track and identify
+// whether/where the contention and scalability issues are in the implementation
+// of the profiling runtime.
+[[clang::xray_never_instrument]] static void BM_XRayProfilingWideCallStack(
+    benchmark::State &state) {
+  if (state.thread_index == 0) profiling_setup();
+
+  benchmark::DoNotOptimize(some_temporary =
+                               call(state.range(0), state.range(1)));
+  for (auto _ : state)
+    benchmark::DoNotOptimize(some_temporary =
+                                 call(state.range(0), state.range(1)));
+
+  if (state.thread_index == 0) profiling_teardown();
+}
+BENCHMARK(BM_XRayProfilingWideCallStack)
+    ->ThreadRange(1, 32)
+    ->RangeMultiplier(2)
+    ->Ranges({{1, 64}, {1, 8}})
+    ->UseRealTime();
+
+BENCHMARK_MAIN();




More information about the llvm-commits mailing list