[llvm] [Offload] Introduce the offload sanitizer (initially for traps) (PR #101417)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 14 16:09:10 PST 2024


https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/101417

>From 2459befdf49f0728610ce805f31a87648a71ba62 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 24 Jul 2024 16:03:19 -0700
Subject: [PATCH] [Offload] Introduce the offload sanitizer (initially for
 traps)

This is the first commit for a new "OffloadSanitizer" that is designed
to work well on GPUs. To keep the commit small, only traps are sanitized
and we only report information about the encountering thread. It is also
restricted to AMD GPUs for now, though that is not a conceptual
requirement.

The communication between the instrumented device code and the runtime
is performed via host initialized pinned memory. If an error is
detected, one encountering thread will setup this sanitizer environment
and a hardware trap is executed to end the kernel. The host trap handler
can check the sanitizer environment to determine if the trap was issued
by the sanitizer code or not. If so, we report the reason (for now only
that a trap was encountered), the encountering thread id, and the PC.
---
 .../Instrumentation/OffloadSanitizer.h        |  27 +++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   9 +
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 +
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/OffloadSanitizer.cpp      | 160 ++++++++++++++++++
 .../Instrumentation/OffloadSanitizer/basic.ll |  56 ++++++
 offload/DeviceRTL/CMakeLists.txt              |   1 +
 offload/DeviceRTL/src/Sanitizer.cpp           |  96 +++++++++++
 offload/include/Shared/Environment.h          |  25 +++
 .../common/include/ErrorReporting.h           |  46 ++++-
 .../common/include/PluginInterface.h          |   7 +
 .../common/src/PluginInterface.cpp            |  20 +++
 offload/test/sanitizer/kernel_trap.c          |  27 +--
 offload/test/sanitizer/kernel_trap.cpp        |  13 +-
 offload/test/sanitizer/kernel_trap_all.c      |  31 ++++
 offload/test/sanitizer/kernel_trap_async.c    |  14 +-
 offload/test/sanitizer/kernel_trap_many.c     |  19 ++-
 19 files changed, 524 insertions(+), 31 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
 create mode 100644 llvm/test/Instrumentation/OffloadSanitizer/basic.ll
 create mode 100644 offload/DeviceRTL/src/Sanitizer.cpp
 create mode 100644 offload/test/sanitizer/kernel_trap_all.c

diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
new file mode 100644
index 00000000000000..6935b7dc390c40
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
@@ -0,0 +1,27 @@
+//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to instrument offload code in order to detect errors and communicate
+// them to the LLVM/Offload runtimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index df7c9a4fbb9387..ac06e95f854ef5 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -195,6 +195,7 @@
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
 #include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index da179a6610afd5..656328c4cd41e2 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -155,6 +155,7 @@ MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
 MODULE_PASS("verify", VerifierPass())
 MODULE_PASS("view-callgraph", CallGraphViewerPass())
 MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
+MODULE_PASS("offload-sanitizer", OffloadSanitizerPass())
 #undef MODULE_PASS
 
 #ifndef MODULE_PASS_WITH_PARAMS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 603339e200dde9..0b481897e337ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -74,6 +74,7 @@
 #include "llvm/Transforms/IPO/ExpandVariadics.h"
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/FlattenCFG.h"
@@ -448,6 +449,11 @@ static cl::opt<bool>
                            cl::desc("Enable AMDGPUAttributorPass"),
                            cl::init(true), cl::Hidden);
 
+static cl::opt<bool>
+    EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
+                           cl::desc("Enable the offload sanitizer"),
+                           cl::init(false), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -823,6 +829,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
+        if (EnableOffloadSanitizer)
+          PM.addPass(OffloadSanitizerPass());
+
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index fed29c3e14aae2..d74f484a172048 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -186,6 +186,7 @@ add_llvm_target(AMDGPUCodeGen
   Core
   GlobalISel
   HipStdPar
+  Instrumentation
   IPO
   IRPrinter
   Instrumentation
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 3e3c3eced4bb9c..e3f34277f5be3b 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
   MemProfiler.cpp
   MemorySanitizer.cpp
   NumericalStabilitySanitizer.cpp
+  OffloadSanitizer.cpp
   IndirectCallPromotion.cpp
   InstrOrderFile.cpp
   InstrProfiling.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
new file mode 100644
index 00000000000000..a24fdc477a0630
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -0,0 +1,160 @@
+//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "offload-sanitizer"
+
+namespace {
+
+class OffloadSanitizerImpl final {
+public:
+  OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
+      : M(M), FAM(FAM), Ctx(M.getContext()) {}
+
+  bool instrument();
+
+private:
+  bool shouldInstrumentFunction(Function &Fn);
+  bool instrumentFunction(Function &Fn);
+  bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+
+  FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
+                               ArrayRef<Type *> ArgTys) {
+    if (!FC) {
+      auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
+      FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
+    }
+    return FC;
+  }
+
+  /// void __offload_san_trap_info(Int64Ty);
+  FunctionCallee TrapInfoFn;
+  FunctionCallee getTrapInfoFn() {
+    return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
+                         {/*PC*/ Int64Ty});
+  }
+
+  CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
+                       ArrayRef<Value *> Args = std::nullopt,
+                       const Twine &Name = "") {
+    Calls.push_back(IRB.CreateCall(Callee, Args, Name));
+    return Calls.back();
+  }
+  SmallVector<CallInst *> Calls;
+
+  Value *getPC(IRBuilder<> &IRB) {
+    return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
+                               "PC");
+  }
+
+  Module &M;
+  FunctionAnalysisManager &FAM;
+  LLVMContext &Ctx;
+
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+  PointerType *PtrTy = PointerType::getUnqual(Ctx);
+  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+  IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+  IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
+
+  const DataLayout &DL = M.getDataLayout();
+};
+
+} // end anonymous namespace
+
+bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
+  if (Fn.isDeclaration())
+    return false;
+  if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
+      Fn.getName().starts_with("rpc_"))
+    return false;
+  return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
+}
+
+bool OffloadSanitizerImpl::instrumentTrapInstructions(
+    SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
+  bool Changed = false;
+  for (auto *II : TrapCalls) {
+    IRBuilder<> IRB(II);
+    createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
+  }
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
+  if (!shouldInstrumentFunction(Fn))
+    return false;
+
+  SmallVector<IntrinsicInst *> TrapCalls;
+
+  bool Changed = false;
+  for (auto &I : instructions(Fn)) {
+    switch (I.getOpcode()) {
+    case Instruction::Call: {
+      auto &CI = cast<CallInst>(I);
+      if (auto *II = dyn_cast<IntrinsicInst>(&CI))
+        if (II->getIntrinsicID() == Intrinsic::trap)
+          TrapCalls.push_back(II);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  Changed |= instrumentTrapInstructions(TrapCalls);
+
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrument() {
+  bool Changed = false;
+
+  for (Function &Fn : M)
+    Changed |= instrumentFunction(Fn);
+
+  removeFromUsedLists(M, [&](Constant *C) {
+    if (!C->getName().starts_with("__offload_san"))
+      return false;
+    return Changed = true;
+  });
+
+  return Changed;
+}
+
+PreservedAnalyses OffloadSanitizerPass::run(Module &M,
+                                            ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  OffloadSanitizerImpl Impl(M, FAM);
+  if (!Impl.instrument())
+    return PreservedAnalyses::all();
+  LLVM_DEBUG(M.dump());
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
new file mode 100644
index 00000000000000..8719b3a5c2bfb4
--- /dev/null
+++ b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+; Test basic offload sanitizer trap instrumentation.
+
+; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s
+
+define void @test_trap1() {
+; CHECK-LABEL: define void @test_trap1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.trap()
+  ret void
+}
+
+define void @test_trap2() {
+; CHECK-LABEL: define void @test_trap2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  call void @llvm.trap()
+  unreachable
+}
+
+define void @test_trap3(i1 %c) {
+; CHECK-LABEL: define void @test_trap3(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK:       [[T]]:
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[F]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %c, label %t ,label %f
+t:
+  call void @llvm.trap()
+  unreachable
+f:
+  ret void
+}
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 96cb79b7d071c5..8d20e3a396226e 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -96,6 +96,7 @@ set(src_files
   ${source_directory}/Parallelism.cpp
   ${source_directory}/Profiling.cpp
   ${source_directory}/Reduction.cpp
+  ${source_directory}/Sanitizer.cpp
   ${source_directory}/State.cpp
   ${source_directory}/Synchronization.cpp
   ${source_directory}/Tasking.cpp
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
new file mode 100644
index 00000000000000..d524eed0d9a930
--- /dev/null
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -0,0 +1,96 @@
+//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "DeviceTypes.h"
+#include "DeviceUtils.h"
+#include "Mapping.h"
+#include "Shared/Environment.h"
+#include "Synchronization.h"
+#include "gpuintrin.h"
+
+using namespace ompx;
+
+#define _SAN_ATTRS                                                             \
+  [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]]
+#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS
+
+#pragma omp begin declare target device_type(nohost)
+
+[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy
+    *__sanitizer_environment_ptr;
+
+namespace {
+
+/// Helper to lock the sanitizer environment. While we never unlock it, this
+/// allows us to have a no-op "side effect" in the spin-wait function below.
+_SAN_ATTRS bool
+getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE,
+                            SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) {
+  return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE,
+                     ErrorCode, atomic::OrderingTy::seq_cst,
+                     atomic::OrderingTy::seq_cst);
+}
+
+/// The spin-wait function should not be inlined, it's a catch all to give one
+/// thread time to setup the sanitizer environment.
+[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) {
+  while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire))
+    ;
+  __builtin_trap();
+}
+
+_SAN_ATTRS
+void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) {
+  for (int I = 0; I < 3; ++I) {
+    SE.ThreadId[I] = mapping::getThreadIdInBlock(I);
+    SE.BlockId[I] = mapping::getBlockIdInKernel(I);
+  }
+  SE.PC = PC;
+
+  // This is the last step to initialize the sanitizer environment, time to
+  // trap via the spinWait. Flush the memory writes and signal for the end.
+  fence::system(atomic::OrderingTy::release);
+  atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release);
+}
+
+_SAN_ATTRS
+void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode,
+                         uint64_t PC) {
+  SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr;
+  bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode);
+
+  // If no thread of this warp has the lock, end execution gracefully.
+  bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock);
+  if (!AnyThreadHasLock)
+    __gpu_exit();
+
+  // One thread will set the location information and signal that the rest of
+  // the wapr that the actual trap can be executed now.
+  if (HasLock)
+    setLocation(SE, PC);
+
+  synchronize::warp(lanes::All);
+
+  // This is not the first thread that encountered the trap, to avoid a race
+  // on the sanitizer environment, this thread is simply going to spin-wait.
+  // The trap above will end the program for all threads.
+  spinWait(SE);
+}
+
+} // namespace
+
+extern "C" {
+
+_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
+  raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
+}
+}
+
+#pragma omp end declare target
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 147583c209fc3e..6aab284f6c9743 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -105,4 +105,29 @@ struct KernelLaunchEnvironmentTy {
   void *ReductionBuffer = nullptr;
 };
 
+/// The environment used to communicate sanitizer information from the device to
+/// the host.
+struct SanitizerEnvironmentTy {
+  enum ErrorCodeTy : uint8_t {
+    NONE = 0,
+    TRAP,
+    LAST = TRAP,
+  } ErrorCode;
+
+  /// Flag to indicate the environment has been initialized fully.
+  uint8_t IsInitialized;
+
+  /// Return the error code location for use in an atomic compare-and-swap.
+  uint8_t *getErrorCodeLocation() {
+    return reinterpret_cast<uint8_t *>(&ErrorCode);
+  }
+
+  /// Thread info
+  /// {
+  uint32_t ThreadId[3];
+  uint32_t BlockId[3];
+  uint64_t PC;
+  /// }
+};
+
 #endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 8478977a8f86af..8207512e41fd39 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -12,6 +12,7 @@
 #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
 
 #include "PluginInterface.h"
+#include "Shared/Environment.h"
 #include "Shared/EnvironmentVar.h"
 
 #include "llvm/ADT/STLExtras.h"
@@ -105,6 +106,15 @@ class ErrorReporter {
     print(BoldRed, Format, Args...);
     print("\n");
   }
+
+  /// Print \p Format, instantiated with \p Args to stderr, but colored with
+  /// a banner.
+  template <typename... ArgsTy>
+  [[gnu::format(__printf__, 1, 2)]] static void
+  reportWarning(const char *Format, ArgsTy &&...Args) {
+    print(Yellow, "WARNING: ");
+    print(Yellow, Format, Args...);
+  }
 #pragma clang diagnostic pop
 
   static void reportError(const char *Str) { reportError("%s", Str); }
@@ -115,6 +125,13 @@ class ErrorReporter {
     print(Color, "%s", Str.str().c_str());
   }
 
+  static void reportLocation(SanitizerEnvironmentTy &SE) {
+    print(BoldLightPurple,
+          "Triggered by thread <%u,%u,%u> block <%u,%u,%u> PC %p\n",
+          SE.ThreadId[0], SE.ThreadId[1], SE.ThreadId[2], SE.BlockId[0],
+          SE.BlockId[1], SE.BlockId[2], (void *)SE.PC);
+  }
+
   /// Pretty print a stack trace.
   static void reportStackTrace(StringRef StackTrace) {
     if (StackTrace.empty())
@@ -280,6 +297,16 @@ class ErrorReporter {
       std::function<bool(__tgt_async_info &)> AsyncInfoWrapperMatcher) {
     assert(AsyncInfoWrapperMatcher && "A matcher is required");
 
+    SanitizerEnvironmentTy *SE = nullptr;
+    for (auto &It : Device.SanitizerEnvironmentMap) {
+      if (It.second->ErrorCode == SanitizerEnvironmentTy::NONE)
+        continue;
+      if (SE)
+        reportWarning(
+            "Multiple errors encountered, information might be inaccurate.");
+      SE = It.second;
+    }
+
     uint32_t Idx = 0;
     for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) {
       auto KTI = KTIR.getKernelTraceInfo(I);
@@ -298,7 +325,24 @@ class ErrorReporter {
           llvm::omp::prettifyFunctionName(KTI.Kernel->getName());
       reportError("Kernel '%s'", PrettyKernelName.c_str());
     }
-    reportError("execution interrupted by hardware trap instruction");
+    assert((!SE || SE->ErrorCode != SanitizerEnvironmentTy::NONE) &&
+           "Unexpected sanitizer environment");
+    if (!SE) {
+      reportError("execution stopped, reason is unknown");
+      print(Yellow, "Compile with '-mllvm -amdgpu-enable-offload-sanitizer' "
+                    "improved diagnosis\n");
+    } else {
+      switch (SE->ErrorCode) {
+      case SanitizerEnvironmentTy::TRAP:
+        reportError("execution interrupted by hardware trap instruction");
+        break;
+      default:
+        reportError(
+            "execution stopped, reason is unknown due to invalid error code");
+      }
+
+      reportLocation(*SE);
+    }
     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
       if (!KTI.LaunchTrace.empty())
         reportStackTrace(KTI.LaunchTrace);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 41cc0f286a581f..38da73528a1599 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -712,6 +712,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
                               uint64_t PoolSize);
 
+  /// Setup the sanitizer environment to receive sanitizer information from the
+  /// device.
+  Error setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                  DeviceImageTy &Image);
+
   // Setup the RPC server for this device if needed. This may not run on some
   // plugins like the CPU targets. By default, it will not be executed so it is
   // up to the target to override this using the shouldSetupRPCServer function.
@@ -931,6 +936,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// Allocate and construct a kernel object.
   virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
 
+  DenseMap<DeviceImageTy *, SanitizerEnvironmentTy *> SanitizerEnvironmentMap;
+
   /// Reference to the underlying plugin that created this device.
   GenericPluginTy &Plugin;
 
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 25b815b7f96694..33c9fa04797ce2 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -927,6 +927,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
       return std::move(Err);
   }
 
+  if (auto Err = setupSanitizerEnvironment(Plugin, *Image))
+    return std::move(Err);
+
   if (auto Err = setupRPCServer(Plugin, *Image))
     return std::move(Err);
 
@@ -1032,6 +1035,23 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
   return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
 }
 
+Error GenericDeviceTy::setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                                 DeviceImageTy &Image) {
+  GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
+  if (!GHandler.isSymbolInImage(*this, Image, "__sanitizer_environment_ptr"))
+    return Plugin::success();
+
+  auto *&SanitizerEnvironment = SanitizerEnvironmentMap[&Image];
+  SanitizerEnvironment = reinterpret_cast<SanitizerEnvironmentTy *>(allocate(
+      sizeof(*SanitizerEnvironment), &SanitizerEnvironment, TARGET_ALLOC_HOST));
+  memset(SanitizerEnvironment, '\0', sizeof(SanitizerEnvironmentTy));
+
+  GlobalTy SanitizerEnvironmentGlobal("__sanitizer_environment_ptr",
+                                      sizeof(SanitizerEnvironment),
+                                      &SanitizerEnvironment);
+  return GHandler.writeGlobalToDevice(*this, Image, SanitizerEnvironmentGlobal);
+}
+
 Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
                                       DeviceImageTy &Image) {
   // The plugin either does not need an RPC server or it is unavailible.
diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c
index 91c4c7229159bc..d046785f3c22bd 100644
--- a/offload/test/sanitizer/kernel_trap.c
+++ b/offload/test/sanitizer/kernel_trap.c
@@ -1,11 +1,14 @@
 
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -25,19 +28,23 @@ int main(void) {
   {
   }
 #pragma omp target
+  {}
+#pragma omp target teams num_teams(32) thread_limit(128)
   {
-  }
-#pragma omp target
-  {
-    __builtin_trap();
+#pragma omp parallel
+    if (omp_get_team_num() == 17 && omp_get_thread_num() == 42)
+      __builtin_trap();
   }
 #pragma omp target
   {
   }
 }
 // clang-format off
-// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 32 (__omp_offloading_{{.*}}_main_l32)'
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// SANIT: Triggered by thread <42,0,0> block <17,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap.c:
diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp
index c67b3857fabba1..e950e6cba4129e 100644
--- a/offload/test/sanitizer/kernel_trap.cpp
+++ b/offload/test/sanitizer/kernel_trap.cpp
@@ -1,11 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compilexx-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
-// RUN: %libomptarget-compilexx-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compilexx-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -43,7 +43,8 @@ int main(void) {
 
 // clang-format off
 // CHECK: OFFLOAD ERROR: Kernel 'omp target in void cxx_function_name<S>(int, S*) @ [[LINE:[0-9]+]] (__omp_offloading_{{.*}}__Z17cxx_function_nameI1SEviPT__l[[LINE]])'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // NDEBG:     cxx_function_name<S>(int, S*)
 // NDEBG:     main
diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c
new file mode 100644
index 00000000000000..379ca8362aa83d
--- /dev/null
+++ b/offload/test/sanitizer/kernel_trap_all.c
@@ -0,0 +1,31 @@
+
+// clang-format off
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+int main(void) {
+
+#pragma omp target teams
+  {
+#pragma omp parallel
+    __builtin_trap();
+  }
+}
+// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20)
+// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}}
+// TRACE:     launchKernel
+// NDEBG:     main
+// DEBUG:     main {{.*}}kernel_trap_all.c:
diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c
index 391ff0c7dcaa4e..ec516a924e1eea 100644
--- a/offload/test/sanitizer/kernel_trap_async.c
+++ b/offload/test/sanitizer/kernel_trap_async.c
@@ -1,11 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compileopt-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
-// RUN: %libomptarget-compileopt-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -36,7 +36,9 @@ int main(void) {
 
 // clang-format off
 // CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30)
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // DEBUG:     kernel_trap_async.c:
 // clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c
index f2e63794168b2b..31bd626e80c1af 100644
--- a/offload/test/sanitizer/kernel_trap_many.c
+++ b/offload/test/sanitizer/kernel_trap_many.c
@@ -1,8 +1,10 @@
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT,TRACE,DEBUG
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -23,13 +25,14 @@ int main(void) {
     {
     }
   }
-#pragma omp target
-  {
-    __builtin_trap();
-  }
+#pragma omp target thread_limit(1)
+  { __builtin_trap(); }
 }
-// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l26)
-// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l28)
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved
+// diagnosis SANIT: OFFLOAD ERROR: execution interrupted by hardware trap
+// instruction SANIT: Triggered by thread <0,0,0> block <0,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap_many.c:



More information about the llvm-commits mailing list