[llvm] [Offload] Sanitize "standalone" unreachable instructions (PR #101425)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 21:16:31 PDT 2024


https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/101425

>From c130a25b63c672745dc8b7d1964572de402d6841 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 24 Jul 2024 16:03:19 -0700
Subject: [PATCH 1/2] [Offload] Introduce the offload sanitizer (initially for
 traps)

This is the first commit for a new "OffloadSanitizer" that is designed
to work well on GPUs. To keep the commit small, only traps are sanitized
and we only report information about the encountering thread. It is also
restricted to AMD GPUs for now, though that is not a conceptual
requirement.

The communication between the instrumented device code and the runtime
is performed via host initialized pinned memory. If an error is
detected, one encountering thread will setup this sanitizer environment
and a hardware trap is executed to end the kernel. The host trap handler
can check the sanitizer environment to determine if the trap was issued
by the sanitizer code or not. If so, we report the reason (for now only
that a trap was encountered), the encountering thread id, and the PC.
---
 .../Instrumentation/OffloadSanitizer.h        |  27 +++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   9 +
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 +
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/OffloadSanitizer.cpp      | 160 ++++++++++++++++++
 .../Instrumentation/OffloadSanitizer/basic.ll |  56 ++++++
 offload/DeviceRTL/CMakeLists.txt              |   1 +
 offload/DeviceRTL/include/Utils.h             |   3 +
 offload/DeviceRTL/src/Sanitizer.cpp           |  95 +++++++++++
 offload/DeviceRTL/src/Utils.cpp               |   7 +
 offload/include/Shared/Environment.h          |  25 +++
 .../common/include/ErrorReporting.h           |  46 ++++-
 .../common/include/PluginInterface.h          |   7 +
 .../common/src/PluginInterface.cpp            |  20 +++
 offload/test/sanitizer/kernel_trap.c          |  27 +--
 offload/test/sanitizer/kernel_trap.cpp        |  13 +-
 offload/test/sanitizer/kernel_trap_all.c      |  31 ++++
 offload/test/sanitizer/kernel_trap_async.c    |  14 +-
 offload/test/sanitizer/kernel_trap_many.c     |  19 ++-
 21 files changed, 533 insertions(+), 31 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
 create mode 100644 llvm/test/Instrumentation/OffloadSanitizer/basic.ll
 create mode 100644 offload/DeviceRTL/src/Sanitizer.cpp
 create mode 100644 offload/test/sanitizer/kernel_trap_all.c

diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
new file mode 100644
index 0000000000000..6935b7dc390c4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
@@ -0,0 +1,27 @@
+//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to instrument offload code in order to detect errors and communicate
+// them to the LLVM/Offload runtimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5dbb1e2f49871..232aa96e16963 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -181,6 +181,7 @@
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
 #include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 3b92823cd283b..22b4a1fd1b981 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -144,6 +144,7 @@ MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
 MODULE_PASS("verify", VerifierPass())
 MODULE_PASS("view-callgraph", CallGraphViewerPass())
 MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
+MODULE_PASS("offload-sanitizer", OffloadSanitizerPass())
 #undef MODULE_PASS
 
 #ifndef MODULE_PASS_WITH_PARAMS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8fb68d1c0b0c..a10357f8e584c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -60,6 +60,7 @@
 #include "llvm/Transforms/IPO/ExpandVariadics.h"
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
@@ -380,6 +381,11 @@ static cl::opt<bool> EnableHipStdPar(
   cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
   cl::Hidden);
 
+static cl::opt<bool>
+    EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
+                           cl::desc("Enable the offload sanitizer"),
+                           cl::init(false), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -744,6 +750,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
+        if (EnableOffloadSanitizer)
+          PM.addPass(OffloadSanitizerPass());
+
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 671caf8484cd9..008102372d852 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
   Core
   GlobalISel
   HipStdPar
+  Instrumentation
   IPO
   IRPrinter
   MC
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 4e3f9e27e0c34..8db9f795fd8e9 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
   MemProfiler.cpp
   MemorySanitizer.cpp
   NumericalStabilitySanitizer.cpp
+  OffloadSanitizer.cpp
   IndirectCallPromotion.cpp
   Instrumentation.cpp
   InstrOrderFile.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
new file mode 100644
index 0000000000000..a24fdc477a063
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -0,0 +1,160 @@
+//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "offload-sanitizer"
+
+namespace {
+
+class OffloadSanitizerImpl final {
+public:
+  OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
+      : M(M), FAM(FAM), Ctx(M.getContext()) {}
+
+  bool instrument();
+
+private:
+  bool shouldInstrumentFunction(Function &Fn);
+  bool instrumentFunction(Function &Fn);
+  bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+
+  FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
+                               ArrayRef<Type *> ArgTys) {
+    if (!FC) {
+      auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
+      FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
+    }
+    return FC;
+  }
+
+  /// void __offload_san_trap_info(Int64Ty);
+  FunctionCallee TrapInfoFn;
+  FunctionCallee getTrapInfoFn() {
+    return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
+                         {/*PC*/ Int64Ty});
+  }
+
+  CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
+                       ArrayRef<Value *> Args = std::nullopt,
+                       const Twine &Name = "") {
+    Calls.push_back(IRB.CreateCall(Callee, Args, Name));
+    return Calls.back();
+  }
+  SmallVector<CallInst *> Calls;
+
+  Value *getPC(IRBuilder<> &IRB) {
+    return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
+                               "PC");
+  }
+
+  Module &M;
+  FunctionAnalysisManager &FAM;
+  LLVMContext &Ctx;
+
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+  PointerType *PtrTy = PointerType::getUnqual(Ctx);
+  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+  IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+  IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
+
+  const DataLayout &DL = M.getDataLayout();
+};
+
+} // end anonymous namespace
+
+bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
+  if (Fn.isDeclaration())
+    return false;
+  if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
+      Fn.getName().starts_with("rpc_"))
+    return false;
+  return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
+}
+
+bool OffloadSanitizerImpl::instrumentTrapInstructions(
+    SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
+  bool Changed = false;
+  for (auto *II : TrapCalls) {
+    IRBuilder<> IRB(II);
+    createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
+  }
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
+  if (!shouldInstrumentFunction(Fn))
+    return false;
+
+  SmallVector<IntrinsicInst *> TrapCalls;
+
+  bool Changed = false;
+  for (auto &I : instructions(Fn)) {
+    switch (I.getOpcode()) {
+    case Instruction::Call: {
+      auto &CI = cast<CallInst>(I);
+      if (auto *II = dyn_cast<IntrinsicInst>(&CI))
+        if (II->getIntrinsicID() == Intrinsic::trap)
+          TrapCalls.push_back(II);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  Changed |= instrumentTrapInstructions(TrapCalls);
+
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrument() {
+  bool Changed = false;
+
+  for (Function &Fn : M)
+    Changed |= instrumentFunction(Fn);
+
+  removeFromUsedLists(M, [&](Constant *C) {
+    if (!C->getName().starts_with("__offload_san"))
+      return false;
+    return Changed = true;
+  });
+
+  return Changed;
+}
+
+PreservedAnalyses OffloadSanitizerPass::run(Module &M,
+                                            ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  OffloadSanitizerImpl Impl(M, FAM);
+  if (!Impl.instrument())
+    return PreservedAnalyses::all();
+  LLVM_DEBUG(M.dump());
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
new file mode 100644
index 0000000000000..8719b3a5c2bfb
--- /dev/null
+++ b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+; Test basic offload sanitizer trap instrumentation.
+
+; RUN: opt < %s -passes=offload-sanitizer -S | FileCheck --check-prefixes=CHECK %s
+
+define void @test_trap1() {
+; CHECK-LABEL: define void @test_trap1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.trap()
+  ret void
+}
+
+define void @test_trap2() {
+; CHECK-LABEL: define void @test_trap2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+entry:
+  call void @llvm.trap()
+  unreachable
+}
+
+define void @test_trap3(i1 %c) {
+; CHECK-LABEL: define void @test_trap3(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK:       [[T]]:
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_trap_info(i64 [[PC]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[F]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %c, label %t ,label %f
+t:
+  call void @llvm.trap()
+  unreachable
+f:
+  ret void
+}
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 7818c8d752599..8535c5ee981b2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -94,6 +94,7 @@ set(src_files
   ${source_directory}/Misc.cpp
   ${source_directory}/Parallelism.cpp
   ${source_directory}/Reduction.cpp
+  ${source_directory}/Sanitizer.cpp
   ${source_directory}/State.cpp
   ${source_directory}/Synchronization.cpp
   ${source_directory}/Tasking.cpp
diff --git a/offload/DeviceRTL/include/Utils.h b/offload/DeviceRTL/include/Utils.h
index 82e2397b5958b..2e7767808b721 100644
--- a/offload/DeviceRTL/include/Utils.h
+++ b/offload/DeviceRTL/include/Utils.h
@@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
 
 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
 
+/// Terminate the execution of this warp.
+void terminateWarp();
+
 /// Return \p LowBits and \p HighBits packed into a single 64 bit value.
 uint64_t pack(uint32_t LowBits, uint32_t HighBits);
 
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
new file mode 100644
index 0000000000000..cf0a983f62395
--- /dev/null
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -0,0 +1,95 @@
+//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mapping.h"
+#include "Shared/Environment.h"
+#include "Synchronization.h"
+#include "Types.h"
+#include "Utils.h"
+
+using namespace ompx;
+
+#define _SAN_ATTRS                                                             \
+  [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]]
+#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS
+
+#pragma omp begin declare target device_type(nohost)
+
+[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy
+    *__sanitizer_environment_ptr;
+
+namespace {
+
+/// Helper to lock the sanitizer environment. While we never unlock it, this
+/// allows us to have a no-op "side effect" in the spin-wait function below.
+_SAN_ATTRS bool
+getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE,
+                            SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) {
+  return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE,
+                     ErrorCode, atomic::OrderingTy::seq_cst,
+                     atomic::OrderingTy::seq_cst);
+}
+
+/// The spin-wait function should not be inlined, it's a catch all to give one
+/// thread time to setup the sanitizer environment.
+[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) {
+  while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire))
+    ;
+  __builtin_trap();
+}
+
+_SAN_ATTRS
+void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) {
+  for (int I = 0; I < 3; ++I) {
+    SE.ThreadId[I] = mapping::getThreadIdInBlock(I);
+    SE.BlockId[I] = mapping::getBlockIdInKernel(I);
+  }
+  SE.PC = PC;
+
+  // This is the last step to initialize the sanitizer environment, time to
+  // trap via the spinWait. Flush the memory writes and signal for the end.
+  fence::system(atomic::OrderingTy::release);
+  atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release);
+}
+
+_SAN_ATTRS
+void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode,
+                         uint64_t PC) {
+  SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr;
+  bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode);
+
+  // If no thread of this warp has the lock, end execution gracefully.
+  bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock);
+  if (!AnyThreadHasLock)
+    utils::terminateWarp();
+
+  // One thread will set the location information and signal that the rest of
+  // the wapr that the actual trap can be executed now.
+  if (HasLock)
+    setLocation(SE, PC);
+
+  synchronize::warp(lanes::All);
+
+  // This is not the first thread that encountered the trap, to avoid a race
+  // on the sanitizer environment, this thread is simply going to spin-wait.
+  // The trap above will end the program for all threads.
+  spinWait(SE);
+}
+
+} // namespace
+
+extern "C" {
+
+_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
+  raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
+}
+}
+
+#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Utils.cpp b/offload/DeviceRTL/src/Utils.cpp
index 53cc803234867..ae6bcf80e348f 100644
--- a/offload/DeviceRTL/src/Utils.cpp
+++ b/offload/DeviceRTL/src/Utils.cpp
@@ -38,6 +38,7 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
                     int32_t Width);
 
 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+void terminateWarp();
 
 /// AMDGCN Implementation
 ///
@@ -63,6 +64,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
   return Mask & __builtin_amdgcn_ballot_w64(Pred);
 }
 
+void terminateWarp() { __builtin_amdgcn_endpgm(); }
+
 bool isSharedMemPtr(const void *Ptr) {
   return __builtin_amdgcn_is_shared(
       (const __attribute__((address_space(0))) void *)Ptr);
@@ -90,6 +93,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
   return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
 }
 
+void terminateWarp() { __nvvm_exit(); }
+
 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
 
 #pragma omp end declare variant
@@ -126,6 +131,8 @@ uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
   return impl::ballotSync(Mask, Pred);
 }
 
+void utils::terminateWarp() { return impl::terminateWarp(); }
+
 bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
 
 extern "C" {
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index d141146b6bd5a..e2fb7109dddce 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -105,4 +105,29 @@ struct KernelLaunchEnvironmentTy {
   void *ReductionBuffer = nullptr;
 };
 
+/// The environment used to communicate sanitizer information from the device to
+/// the host.
+struct SanitizerEnvironmentTy {
+  enum ErrorCodeTy : uint8_t {
+    NONE = 0,
+    TRAP,
+    LAST = TRAP,
+  } ErrorCode;
+
+  /// Flag to indicate the environment has been initialized fully.
+  uint8_t IsInitialized;
+
+  /// Return the error code location for use in an atomic compare-and-swap.
+  uint8_t *getErrorCodeLocation() {
+    return reinterpret_cast<uint8_t *>(&ErrorCode);
+  }
+
+  /// Thread info
+  /// {
+  uint32_t ThreadId[3];
+  uint32_t BlockId[3];
+  uint64_t PC;
+  /// }
+};
+
 #endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index e557b32c2c24f..591e2a99c3d8d 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -12,6 +12,7 @@
 #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
 
 #include "PluginInterface.h"
+#include "Shared/Environment.h"
 #include "Shared/EnvironmentVar.h"
 
 #include "llvm/ADT/STLExtras.h"
@@ -105,6 +106,15 @@ class ErrorReporter {
     print(BoldRed, Format, Args...);
     print("\n");
   }
+
+  /// Print \p Format, instantiated with \p Args to stderr, but colored with
+  /// a banner.
+  template <typename... ArgsTy>
+  [[gnu::format(__printf__, 1, 2)]] static void
+  reportWarning(const char *Format, ArgsTy &&...Args) {
+    print(Yellow, "WARNING: ");
+    print(Yellow, Format, Args...);
+  }
 #pragma clang diagnostic pop
 
   static void reportError(const char *Str) { reportError("%s", Str); }
@@ -115,6 +125,13 @@ class ErrorReporter {
     print(Color, "%s", Str.str().c_str());
   }
 
+  static void reportLocation(SanitizerEnvironmentTy &SE) {
+    print(BoldLightPurple,
+          "Triggered by thread <%u,%u,%u> block <%u,%u,%u> PC %p\n",
+          SE.ThreadId[0], SE.ThreadId[1], SE.ThreadId[2], SE.BlockId[0],
+          SE.BlockId[1], SE.BlockId[2], (void *)SE.PC);
+  }
+
   /// Pretty print a stack trace.
   static void reportStackTrace(StringRef StackTrace) {
     if (StackTrace.empty())
@@ -225,6 +242,16 @@ class ErrorReporter {
       std::function<bool(__tgt_async_info &)> AsyncInfoWrapperMatcher) {
     assert(AsyncInfoWrapperMatcher && "A matcher is required");
 
+    SanitizerEnvironmentTy *SE = nullptr;
+    for (auto &It : Device.SanitizerEnvironmentMap) {
+      if (It.second->ErrorCode == SanitizerEnvironmentTy::NONE)
+        continue;
+      if (SE)
+        reportWarning(
+            "Multiple errors encountered, information might be inaccurate.");
+      SE = It.second;
+    }
+
     uint32_t Idx = 0;
     for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) {
       auto KTI = KTIR.getKernelTraceInfo(I);
@@ -243,7 +270,24 @@ class ErrorReporter {
           llvm::omp::prettifyFunctionName(KTI.Kernel->getName());
       reportError("Kernel '%s'", PrettyKernelName.c_str());
     }
-    reportError("execution interrupted by hardware trap instruction");
+    assert((!SE || SE->ErrorCode != SanitizerEnvironmentTy::NONE) &&
+           "Unexpected sanitizer environment");
+    if (!SE) {
+      reportError("execution stopped, reason is unknown");
+      print(Yellow, "Compile with '-mllvm -amdgpu-enable-offload-sanitizer' "
+                    "improved diagnosis\n");
+    } else {
+      switch (SE->ErrorCode) {
+      case SanitizerEnvironmentTy::TRAP:
+        reportError("execution interrupted by hardware trap instruction");
+        break;
+      default:
+        reportError(
+            "execution stopped, reason is unknown due to invalid error code");
+      }
+
+      reportLocation(*SE);
+    }
     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
       if (!KTI.LaunchTrace.empty())
         reportStackTrace(KTI.LaunchTrace);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 81823338fe211..fb686dd7a6418 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -712,6 +712,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
                               uint64_t PoolSize);
 
+  /// Setup the sanitizer environment to receive sanitizer information from the
+  /// device.
+  Error setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                  DeviceImageTy &Image);
+
   // Setup the RPC server for this device if needed. This may not run on some
   // plugins like the CPU targets. By default, it will not be executed so it is
   // up to the target to override this using the shouldSetupRPCServer function.
@@ -931,6 +936,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// Allocate and construct a kernel object.
   virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
 
+  DenseMap<DeviceImageTy *, SanitizerEnvironmentTy *> SanitizerEnvironmentMap;
+
   /// Reference to the underlying plugin that created this device.
   GenericPluginTy &Plugin;
 
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index c3ecbcc62f71f..317a3c713aeeb 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -903,6 +903,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
       return std::move(Err);
   }
 
+  if (auto Err = setupSanitizerEnvironment(Plugin, *Image))
+    return std::move(Err);
+
   if (auto Err = setupRPCServer(Plugin, *Image))
     return std::move(Err);
 
@@ -1008,6 +1011,23 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
   return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
 }
 
+Error GenericDeviceTy::setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                                 DeviceImageTy &Image) {
+  GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
+  if (!GHandler.isSymbolInImage(*this, Image, "__sanitizer_environment_ptr"))
+    return Plugin::success();
+
+  auto *&SanitizerEnvironment = SanitizerEnvironmentMap[&Image];
+  SanitizerEnvironment = reinterpret_cast<SanitizerEnvironmentTy *>(allocate(
+      sizeof(*SanitizerEnvironment), &SanitizerEnvironment, TARGET_ALLOC_HOST));
+  memset(SanitizerEnvironment, '\0', sizeof(SanitizerEnvironmentTy));
+
+  GlobalTy SanitizerEnvironmentGlobal("__sanitizer_environment_ptr",
+                                      sizeof(SanitizerEnvironment),
+                                      &SanitizerEnvironment);
+  return GHandler.writeGlobalToDevice(*this, Image, SanitizerEnvironmentGlobal);
+}
+
 Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
                                       DeviceImageTy &Image) {
   // The plugin either does not need an RPC server or it is unavailible.
diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c
index db243001c9056..b9ab6f28ac170 100644
--- a/offload/test/sanitizer/kernel_trap.c
+++ b/offload/test/sanitizer/kernel_trap.c
@@ -1,11 +1,14 @@
 
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -25,19 +28,23 @@ int main(void) {
   {
   }
 #pragma omp target
+  {}
+#pragma omp target teams num_teams(32) thread_limit(128)
   {
-  }
-#pragma omp target
-  {
-    __builtin_trap();
+#pragma omp parallel
+    if (omp_get_team_num() == 17 && omp_get_thread_num() == 42)
+      __builtin_trap();
   }
 #pragma omp target
   {
   }
 }
 // clang-format off
-// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 32 (__omp_offloading_{{.*}}_main_l32)'
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// SANIT: Triggered by thread <42,0,0> block <17,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap.c:
diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp
index 899b608d579a7..dfa5358040b08 100644
--- a/offload/test/sanitizer/kernel_trap.cpp
+++ b/offload/test/sanitizer/kernel_trap.cpp
@@ -1,11 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compilexx-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
-// RUN: %libomptarget-compilexx-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compilexx-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -43,7 +43,8 @@ int main(void) {
 
 // clang-format off
 // CHECK: OFFLOAD ERROR: Kernel 'omp target in void cxx_function_name<S>(int, S*) @ [[LINE:[0-9]+]] (__omp_offloading_{{.*}}__Z17cxx_function_nameI1SEviPT__l[[LINE]])'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // NDEBG:     cxx_function_name<S>(int, S*)
 // NDEBG:     main
diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c
new file mode 100644
index 0000000000000..379ca8362aa83
--- /dev/null
+++ b/offload/test/sanitizer/kernel_trap_all.c
@@ -0,0 +1,31 @@
+
+// clang-format off
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+int main(void) {
+
+#pragma omp target teams
+  {
+#pragma omp parallel
+    __builtin_trap();
+  }
+}
+// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20)
+// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}}
+// TRACE:     launchKernel
+// NDEBG:     main
+// DEBUG:     main {{.*}}kernel_trap_all.c:
diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c
index ee0d772fef9b8..d44689a5ee8d4 100644
--- a/offload/test/sanitizer/kernel_trap_async.c
+++ b/offload/test/sanitizer/kernel_trap_async.c
@@ -1,11 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compileopt-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
-// RUN: %libomptarget-compileopt-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -36,7 +36,9 @@ int main(void) {
 
 // clang-format off
 // CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30)
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // DEBUG:     kernel_trap_async.c:
 // clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c
index b3bdad9f07b4a..0ca67ec145906 100644
--- a/offload/test/sanitizer/kernel_trap_many.c
+++ b/offload/test/sanitizer/kernel_trap_many.c
@@ -1,9 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG 
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT,TRACE,DEBUG
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -24,13 +26,14 @@ int main(void) {
     {
     }
   }
-#pragma omp target
-  {
-    __builtin_trap();
-  }
+#pragma omp target thread_limit(1)
+  { __builtin_trap(); }
 }
-// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l27)
-// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29)
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved
+// diagnosis SANIT: OFFLOAD ERROR: execution interrupted by hardware trap
+// instruction SANIT: Triggered by thread <0,0,0> block <0,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap_many.c:

>From 1ba8017134b5c0e05600b4ea899136479baebbdb Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Thu, 25 Jul 2024 20:22:01 -0700
Subject: [PATCH 2/2] [Offload] Sanitize "standalone" unreachable instructions

If an unreachable is reached, the execution state is invalid. If the
sanitizer is enabled, we stop and report it to the user.
---
 .../Instrumentation/OffloadSanitizer.cpp      | 29 +++++++++++++++
 .../Instrumentation/OffloadSanitizer/basic.ll | 29 +++++++++++++++
 offload/DeviceRTL/src/Sanitizer.cpp           |  4 +++
 offload/include/Shared/Environment.h          |  3 +-
 .../common/include/ErrorReporting.h           |  4 +++
 offload/test/sanitizer/kernel_known_ub.c      | 35 +++++++++++++++++++
 6 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 offload/test/sanitizer/kernel_known_ub.c

diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
index a24fdc477a063..29f2487696729 100644
--- a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -42,6 +42,8 @@ class OffloadSanitizerImpl final {
   bool shouldInstrumentFunction(Function &Fn);
   bool instrumentFunction(Function &Fn);
   bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+  bool instrumentUnreachableInstructions(
+      SmallVectorImpl<UnreachableInst *> &UnreachableInsts);
 
   FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
                                ArrayRef<Type *> ArgTys) {
@@ -59,6 +61,13 @@ class OffloadSanitizerImpl final {
                          {/*PC*/ Int64Ty});
   }
 
+  /// void __offload_san_unreachable_info(Int64Ty);
+  FunctionCallee UnreachableInfoFn;
+  FunctionCallee getUnreachableInfoFn() {
+    return getOrCreateFn(UnreachableInfoFn, "__offload_san_unreachable_info",
+                         VoidTy, {/*PC*/ Int64Ty});
+  }
+
   CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
                        ArrayRef<Value *> Args = std::nullopt,
                        const Twine &Name = "") {
@@ -107,15 +116,34 @@ bool OffloadSanitizerImpl::instrumentTrapInstructions(
   return Changed;
 }
 
+bool OffloadSanitizerImpl::instrumentUnreachableInstructions(
+    SmallVectorImpl<UnreachableInst *> &UnreachableInsts) {
+  bool Changed = false;
+  for (auto *II : UnreachableInsts) {
+    // Skip unreachables after traps since we instrument those as well.
+    if (&II->getParent()->front() != II)
+      if (auto *CI = dyn_cast<CallInst>(II->getPrevNode()))
+        if (CI->getIntrinsicID() == Intrinsic::trap)
+          continue;
+    IRBuilder<> IRB(II);
+    createCall(IRB, getUnreachableInfoFn(), {getPC(IRB)});
+  }
+  return Changed;
+}
+
 bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
   if (!shouldInstrumentFunction(Fn))
     return false;
 
+  SmallVector<UnreachableInst *> UnreachableInsts;
   SmallVector<IntrinsicInst *> TrapCalls;
 
   bool Changed = false;
   for (auto &I : instructions(Fn)) {
     switch (I.getOpcode()) {
+    case Instruction::Unreachable:
+      UnreachableInsts.push_back(cast<UnreachableInst>(&I));
+      break;
     case Instruction::Call: {
       auto &CI = cast<CallInst>(I);
       if (auto *II = dyn_cast<IntrinsicInst>(&CI))
@@ -129,6 +157,7 @@ bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
   }
 
   Changed |= instrumentTrapInstructions(TrapCalls);
+  Changed |= instrumentUnreachableInstructions(UnreachableInsts);
 
   return Changed;
 }
diff --git a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
index 8719b3a5c2bfb..b86572f7c5cff 100644
--- a/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/OffloadSanitizer/basic.ll
@@ -54,3 +54,32 @@ t:
 f:
   ret void
 }
+
+define void @test_unreachable1() {
+; CHECK-LABEL: define void @test_unreachable1() {
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_unreachable_info(i64 [[PC]])
+; CHECK-NEXT:    unreachable
+;
+  unreachable
+}
+
+define void @test_unreachable2(i1 %c) {
+; CHECK-LABEL: define void @test_unreachable2(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK:       [[T]]:
+; CHECK-NEXT:    [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT:    call void @__offload_san_unreachable_info(i64 [[PC]])
+; CHECK-NEXT:    unreachable
+; CHECK:       [[F]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br i1 %c, label %t ,label %f
+t:
+  unreachable
+f:
+  ret void
+}
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
index cf0a983f62395..f2e8deec10bc8 100644
--- a/offload/DeviceRTL/src/Sanitizer.cpp
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -90,6 +90,10 @@ extern "C" {
 _SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
   raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
 }
+
+_SAN_ENTRY_ATTRS void __offload_san_unreachable_info(uint64_t PC) {
+  raiseExecutionError(SanitizerEnvironmentTy::UNREACHABLE, PC);
+}
 }
 
 #pragma omp end declare target
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index e2fb7109dddce..95e039223a964 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -111,7 +111,8 @@ struct SanitizerEnvironmentTy {
   enum ErrorCodeTy : uint8_t {
     NONE = 0,
     TRAP,
-    LAST = TRAP,
+    UNREACHABLE,
+    LAST = UNREACHABLE,
   } ErrorCode;
 
   /// Flag to indicate the environment has been initialized fully.
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 591e2a99c3d8d..efc17acb1e9a9 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -281,6 +281,10 @@ class ErrorReporter {
       case SanitizerEnvironmentTy::TRAP:
         reportError("execution interrupted by hardware trap instruction");
         break;
+      case SanitizerEnvironmentTy::UNREACHABLE:
+        reportError("execution reached an \"unreachable\" state (likely caused "
+                  "by undefined behavior)");
+	break;
       default:
         reportError(
             "execution stopped, reason is unknown due to invalid error code");
diff --git a/offload/test/sanitizer/kernel_known_ub.c b/offload/test/sanitizer/kernel_known_ub.c
new file mode 100644
index 0000000000000..818e28f323e0f
--- /dev/null
+++ b/offload/test/sanitizer/kernel_known_ub.c
@@ -0,0 +1,35 @@
+
+// clang-format off
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+__attribute__((noinline)) void unreachable(volatile int *GoodPtr) {
+  *GoodPtr = 1;
+  __builtin_unreachable();
+}
+
+int main(void) {
+#pragma omp target
+  {
+    volatile int A = 0;
+    unreachable(&A);
+  }
+}
+// SANIT: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l27)
+// SANIT: OFFLOAD ERROR: execution reached an "unreachable" state (likely caused by undefined behavior)
+// SANIT: Triggered by thread <{{.*}},0,0> block <{{.*}},0,0> PC 0x{{.*}}



More information about the llvm-commits mailing list