[llvm] [Offload] Sanitize "standalone" unreachable instructions (PR #101425)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 15:24:39 PDT 2024


https://github.com/jdoerfert created https://github.com/llvm/llvm-project/pull/101425

If an unreachable is reached, the execution state is invalid. If the
sanitizer is enabled, we stop and report it to the user.

>From 4a0ba06973e917c566718478b24498af49aae472 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Thu, 25 Jul 2024 14:56:13 -0700
Subject: [PATCH 1/3] [Offload][OpenMP] Prettify error messages by "demangling"
 the kernel name

The kernel names for OpenMP are manually mangled and not ideal when we
report something to the user. We demangle them now, providing the
function and line number of the target region, together with the actual
kernel name.
---
 llvm/include/llvm/Frontend/OpenMP/OMP.h       |  9 +++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  3 +
 llvm/lib/Frontend/OpenMP/CMakeLists.txt       |  1 +
 llvm/lib/Frontend/OpenMP/OMP.cpp              | 57 +++++++++++++++++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  2 +-
 .../common/include/ErrorReporting.h           | 15 +++--
 offload/src/CMakeLists.txt                    |  1 +
 offload/test/sanitizer/kernel_crash.c         |  8 +--
 offload/test/sanitizer/kernel_crash_async.c   |  4 +-
 offload/test/sanitizer/kernel_crash_many.c    | 16 +++---
 offload/test/sanitizer/kernel_crash_single.c  |  4 +-
 offload/test/sanitizer/kernel_trap.c          |  4 +-
 offload/test/sanitizer/kernel_trap.cpp        | 41 +++++++++++++
 offload/test/sanitizer/kernel_trap_async.c    |  4 +-
 offload/test/sanitizer/kernel_trap_many.c     |  2 +-
 15 files changed, 147 insertions(+), 24 deletions(-)
 create mode 100644 offload/test/sanitizer/kernel_trap.cpp

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h
index 6f7a39acac1d3..f081015db0b0b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm::omp {
 ArrayRef<Directive> getLeafConstructs(Directive D);
@@ -30,6 +31,14 @@ Directive getCompoundConstruct(ArrayRef<Directive> Parts);
 bool isLeafConstruct(Directive D);
 bool isCompositeConstruct(Directive D);
 bool isCombinedConstruct(Directive D);
+
+/// Create a nicer version of a function name for humans to look at.
+std::string prettityFunctionName(StringRef FunctionName);
+
+/// Deconstruct an OpenMP kernel name into the parent function name and the line
+/// number.
+std::string deconstructOpenMPKernelName(StringRef KernelName, unsigned &LineNo);
+
 } // namespace llvm::omp
 
 #endif // LLVM_FRONTEND_OPENMP_OMP_H
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1614d5716d28c..9cb311834907b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -196,6 +196,9 @@ class OpenMPIRBuilderConfig {
 /// Data structure to contain the information needed to uniquely identify
 /// a target entry.
 struct TargetRegionEntryInfo {
+  /// The prefix used for kernel names.
+  static constexpr const char *KernelNamePrefix = "__omp_offloading_";
+
   std::string ParentName;
   unsigned DeviceID;
   unsigned FileID;
diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
index 67aedf5c2b61a..82d2a9ae7c533 100644
--- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_component_library(LLVMFrontendOpenMP
   TargetParser
   TransformUtils
   Analysis
+  Demangle
   MC
   Scalar
   BitReader
diff --git a/llvm/lib/Frontend/OpenMP/OMP.cpp b/llvm/lib/Frontend/OpenMP/OMP.cpp
index c1556ff3c74d7..b54cc90a14d83 100644
--- a/llvm/lib/Frontend/OpenMP/OMP.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMP.cpp
@@ -10,13 +10,19 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
 
 #include <algorithm>
+#include <cstdio>
 #include <iterator>
+#include <string>
 #include <type_traits>
 
 using namespace llvm;
@@ -186,4 +192,55 @@ bool isCombinedConstruct(Directive D) {
   // Otherwise directive-name is a combined construct.
   return !getLeafConstructs(D).empty() && !isCompositeConstruct(D);
 }
+
+std::string prettityFunctionName(StringRef FunctionName) {
+  // Internalized functions have the right name, but simply a suffix.
+  if (FunctionName.ends_with(".internalized"))
+    return FunctionName.drop_back(sizeof("internalized")).str() +
+           " (internalized)";
+  unsigned LineNo = 0;
+  auto ParentName = deconstructOpenMPKernelName(FunctionName, LineNo);
+  if (LineNo == 0)
+    return FunctionName.str();
+  return ("omp target in " + ParentName + " @ " + std::to_string(LineNo) +
+          " (" + FunctionName + ")")
+      .str();
+}
+
+std::string deconstructOpenMPKernelName(StringRef KernelName,
+                                        unsigned &LineNo) {
+
+  // Only handle functions with an OpenMP kernel prefix for now. Naming scheme:
+  // __omp_offloading_<hex_hash1>_<hex_hash2>_<name>_l<line>_[<count>_]<suffix>
+  if (!KernelName.starts_with(TargetRegionEntryInfo::KernelNamePrefix))
+    return "";
+  auto SkipAfterNext = [](StringRef S, char Tgt, int &Remaining) {
+    return S.drop_while([&](char C) {
+      if (!Remaining)
+        return false;
+      Remaining -= (C == Tgt);
+      return true;
+    });
+  };
+  auto PrettyName = KernelName.drop_front(
+      sizeof(TargetRegionEntryInfo::KernelNamePrefix) - /*'\0'*/ 1);
+  int Remaining = 3;
+  PrettyName = SkipAfterNext(PrettyName, '_', Remaining);
+  if (Remaining)
+    return "";
+
+  // Look for the last '_l<line>'.
+  size_t LineIdx = PrettyName.find("_l");
+  if (LineIdx == StringRef::npos)
+    return "";
+  while (true) {
+    size_t NewLineIdx = PrettyName.find("_l", LineIdx + 2);
+    if (NewLineIdx == StringRef::npos)
+      break;
+    LineIdx = NewLineIdx;
+  }
+  if (PrettyName.drop_front(LineIdx + 2).consumeInteger(10, LineNo))
+    return "";
+  return demangle(PrettyName.take_front(LineIdx));
+}
 } // namespace llvm::omp
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 77e350e7276ab..3f8e64315849e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8581,7 +8581,7 @@ void TargetRegionEntryInfo::getTargetRegionEntryFnName(
     SmallVectorImpl<char> &Name, StringRef ParentName, unsigned DeviceID,
     unsigned FileID, unsigned Line, unsigned Count) {
   raw_svector_ostream OS(Name);
-  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
+  OS << KernelNamePrefix << llvm::format("%x", DeviceID)
      << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
   if (Count)
     OS << "_" << Count;
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 72cfb5273ae3c..bca7b27304a0b 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
@@ -237,8 +238,11 @@ class ErrorReporter {
     }
 
     auto KTI = KTIR.getKernelTraceInfo(Idx);
-    if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo)))
-      reportError("Kernel '%s'", KTI.Kernel->getName());
+    if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
+      auto PrettyKernelName =
+          llvm::omp::prettityFunctionName(KTI.Kernel->getName());
+      reportError("Kernel '%s'", PrettyKernelName.c_str());
+    }
     reportError("execution interrupted by hardware trap instruction");
     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
       if (!KTI.LaunchTrace.empty())
@@ -284,10 +288,13 @@ class ErrorReporter {
 
     for (uint32_t Idx = 0, I = 0; I < NumKTIs; ++Idx) {
       auto KTI = KTIR.getKernelTraceInfo(Idx);
+      auto PrettyKernelName =
+          llvm::omp::prettityFunctionName(KTI.Kernel->getName());
       if (NumKTIs == 1)
-        print(BoldLightPurple, "Kernel '%s'\n", KTI.Kernel->getName());
+        print(BoldLightPurple, "Kernel '%s'\n", PrettyKernelName.c_str());
       else
-        print(BoldLightPurple, "Kernel %d: '%s'\n", I, KTI.Kernel->getName());
+        print(BoldLightPurple, "Kernel %d: '%s'\n", I,
+              PrettyKernelName.c_str());
       reportStackTrace(KTI.LaunchTrace);
       ++I;
     }
diff --git a/offload/src/CMakeLists.txt b/offload/src/CMakeLists.txt
index efa5cdab33ec9..344069b6fcdcf 100644
--- a/offload/src/CMakeLists.txt
+++ b/offload/src/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(omptarget
   ${LIBOMPTARGET_BINARY_INCLUDE_DIR}
 
   LINK_COMPONENTS
+  FrontendOpenMP
   Support
   Object
 
diff --git a/offload/test/sanitizer/kernel_crash.c b/offload/test/sanitizer/kernel_crash.c
index 457d953a33a05..c69219d97d3d0 100644
--- a/offload/test/sanitizer/kernel_crash.c
+++ b/offload/test/sanitizer/kernel_crash.c
@@ -36,12 +36,12 @@ int main(void) {
   }
 }
 // TRACE: Display 1 of the 3 last kernel launch traces
-// TRACE: Kernel 0: '__omp_offloading_{{.*}}_main_l30'
+// TRACE: Kernel 0: {{.*}} (__omp_offloading_{{.*}}_main_l30)
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash.c:30
 //
 // CHECK: Display last 3 kernels launched:
-// CHECK: Kernel 0: '__omp_offloading_{{.*}}_main_l30'
-// CHECK: Kernel 1: '__omp_offloading_{{.*}}_main_l27'
-// CHECK: Kernel 2: '__omp_offloading_{{.*}}_main_l24'
+// CHECK: Kernel 0: {{.*}} (__omp_offloading_{{.*}}_main_l30)
+// CHECK: Kernel 1: {{.*}} (__omp_offloading_{{.*}}_main_l27)
+// CHECK: Kernel 2: {{.*}} (__omp_offloading_{{.*}}_main_l24)
diff --git a/offload/test/sanitizer/kernel_crash_async.c b/offload/test/sanitizer/kernel_crash_async.c
index 6aebf1b42a535..6a0461b0045b2 100644
--- a/offload/test/sanitizer/kernel_crash_async.c
+++ b/offload/test/sanitizer/kernel_crash_async.c
@@ -34,7 +34,7 @@ int main(void) {
 #pragma omp taskwait
 }
 
-// TRACE: Kernel {{.*}}'__omp_offloading_{{.*}}_main_
+// TRACE: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30)
 // TRACE:     launchKernel
 //
-// CHECK-DAG: Kernel {{[0-9]}}: '__omp_offloading_{{.*}}_main_l30'
+// CHECK: Kernel {{[0-9]}}: {{.*}} (__omp_offloading_{{.*}}_main_l30)
diff --git a/offload/test/sanitizer/kernel_crash_many.c b/offload/test/sanitizer/kernel_crash_many.c
index 9e3f4f1630acd..25986e0a459c1 100644
--- a/offload/test/sanitizer/kernel_crash_many.c
+++ b/offload/test/sanitizer/kernel_crash_many.c
@@ -30,42 +30,42 @@ int main(void) {
   }
 }
 // CHECK: Display 8 of the 8 last kernel launch traces
-// CHECK: Kernel 0: '__omp_offloading_{{.*}}_main_l27'
+// CHECK: Kernel 0: {{.*}} (__omp_offloading_{{.*}}_main_l27)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:27
 //
-// CHECK: Kernel 1: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 1: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 2: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 2: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 3: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 3: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 4: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 4: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 5: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 5: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 6: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 6: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
 //
-// CHECK: Kernel 7: '__omp_offloading_{{.*}}_main_l23'
+// CHECK: Kernel 7: {{.*}} (__omp_offloading_{{.*}}_main_l23)
 // CHECK:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_many.c:
diff --git a/offload/test/sanitizer/kernel_crash_single.c b/offload/test/sanitizer/kernel_crash_single.c
index 16a8159f074e5..075c3de7ffabb 100644
--- a/offload/test/sanitizer/kernel_crash_single.c
+++ b/offload/test/sanitizer/kernel_crash_single.c
@@ -27,10 +27,10 @@ int main(void) {
   }
 }
 // TRACE: Display kernel launch trace
-// TRACE: Kernel '__omp_offloading_{{.*}}_main_l24'
+// TRACE: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l24)
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_crash_single.c:24
 //
 // CHECK: Display only launched kernel:
-// CHECK: Kernel '__omp_offloading_{{.*}}_main_l24'
+// CHECK: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l24)
diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c
index 13fe6f2fb71e8..db243001c9056 100644
--- a/offload/test/sanitizer/kernel_trap.c
+++ b/offload/test/sanitizer/kernel_trap.c
@@ -35,8 +35,10 @@ int main(void) {
   {
   }
 }
-// CHECK: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l30'
+// clang-format off
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)'
 // CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap.c:
+// clang-format on
diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp
new file mode 100644
index 0000000000000..97ded79c0f194
--- /dev/null
+++ b/offload/test/sanitizer/kernel_trap.cpp
@@ -0,0 +1,41 @@
+
+// clang-format off
+// RUN: %libomptarget-compilexx-generic
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %libomptarget-compilexx-generic -g
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// clang-format on
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+int main(void) {
+
+#pragma omp target
+  {}
+#pragma omp target
+  {}
+#pragma omp target
+  {
+    __builtin_trap();
+  }
+#pragma omp target
+  {}
+}
+// clang-format off
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 28 (__omp_offloading_{{.*}}_main_l28)'
+// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// TRACE:     launchKernel
+// NDEBG:     main
+// DEBUG:     main {{.*}}kernel_trap.cpp:
+// clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c
index 65e8880798343..ee0d772fef9b8 100644
--- a/offload/test/sanitizer/kernel_trap_async.c
+++ b/offload/test/sanitizer/kernel_trap_async.c
@@ -34,7 +34,9 @@ int main(void) {
 #pragma omp taskwait
 }
 
-// CHECK: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l30'
+// clang-format off
+// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30)
 // CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // DEBUG:     kernel_trap_async.c:
+// clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c
index 3f1796e8913ea..b3bdad9f07b4a 100644
--- a/offload/test/sanitizer/kernel_trap_many.c
+++ b/offload/test/sanitizer/kernel_trap_many.c
@@ -29,7 +29,7 @@ int main(void) {
     __builtin_trap();
   }
 }
-// TRACE: OFFLOAD ERROR: Kernel '__omp_offloading_{{.*}}_main_l27'
+// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l27)
 // TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // NDEBG:     main

>From 7890e82163067f5a338360c176535c42a67a1785 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Wed, 24 Jul 2024 16:03:19 -0700
Subject: [PATCH 2/3] [Offload] Introduce the offload sanitizer (initially for
 traps)

This is the first commit for a new "OffloadSanitizer" that is designed
to work well on GPUs. To keep the commit small, only traps are sanitized
and we only report information about the encountering thread. It is also
restricted to AMD GPUs for now, though that is not a conceptual
requirement.

The communication between the instrumented device code and the runtime
is performed via host initialized pinned memory. If an error is
detected, one encountering thread will setup this sanitizer environment
and a hardware trap is executed to end the kernel. The host trap handler
can check the sanitizer environment to determine if the trap was issued
by the sanitizer code or not. If so, we report the reason (for now only
that a trap was encountered), the encountering thread id, and the PC.
---
 .../Instrumentation/OffloadSanitizer.h        |  27 +++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   9 +
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 +
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/OffloadSanitizer.cpp      | 160 ++++++++++++++++++
 offload/DeviceRTL/CMakeLists.txt              |   1 +
 offload/DeviceRTL/include/Utils.h             |   3 +
 offload/DeviceRTL/src/Sanitizer.cpp           |  95 +++++++++++
 offload/DeviceRTL/src/Utils.cpp               |   7 +
 offload/include/Shared/Environment.h          |  25 +++
 .../common/include/ErrorReporting.h           |  46 ++++-
 .../common/include/PluginInterface.h          |   7 +
 .../common/src/PluginInterface.cpp            |  20 +++
 offload/test/sanitizer/kernel_trap.c          |  27 +--
 offload/test/sanitizer/kernel_trap.cpp        |  20 ++-
 offload/test/sanitizer/kernel_trap_all.c      |  31 ++++
 offload/test/sanitizer/kernel_trap_async.c    |  14 +-
 offload/test/sanitizer/kernel_trap_many.c     |  19 ++-
 18 files changed, 479 insertions(+), 34 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
 create mode 100644 offload/DeviceRTL/src/Sanitizer.cpp
 create mode 100644 offload/test/sanitizer/kernel_trap_all.c

diff --git a/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
new file mode 100644
index 0000000000000..6935b7dc390c4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/OffloadSanitizer.h
@@ -0,0 +1,27 @@
+//===- Transforms/Instrumentation/OffloadSanitizer.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to instrument offload code in order to detect errors and communicate
+// them to the LLVM/Offload runtimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class OffloadSanitizerPass : public PassInfoMixin<OffloadSanitizerPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_OFFLOADSAN_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8fb68d1c0b0c..a10357f8e584c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -60,6 +60,7 @@
 #include "llvm/Transforms/IPO/ExpandVariadics.h"
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
@@ -380,6 +381,11 @@ static cl::opt<bool> EnableHipStdPar(
   cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
   cl::Hidden);
 
+static cl::opt<bool>
+    EnableOffloadSanitizer("amdgpu-enable-offload-sanitizer",
+                           cl::desc("Enable the offload sanitizer"),
+                           cl::init(false), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -744,6 +750,9 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerFullLinkTimeOptimizationLastEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
+        if (EnableOffloadSanitizer)
+          PM.addPass(OffloadSanitizerPass());
+
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 671caf8484cd9..008102372d852 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen
   Core
   GlobalISel
   HipStdPar
+  Instrumentation
   IPO
   IRPrinter
   MC
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 4e3f9e27e0c34..8db9f795fd8e9 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMInstrumentation
   MemProfiler.cpp
   MemorySanitizer.cpp
   NumericalStabilitySanitizer.cpp
+  OffloadSanitizer.cpp
   IndirectCallPromotion.cpp
   Instrumentation.cpp
   InstrOrderFile.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
new file mode 100644
index 0000000000000..a24fdc477a063
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -0,0 +1,160 @@
+//===-- OffloadSanitizer.cpp - Offload sanitizer --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/OffloadSanitizer.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "offload-sanitizer"
+
+namespace {
+
+class OffloadSanitizerImpl final {
+public:
+  OffloadSanitizerImpl(Module &M, FunctionAnalysisManager &FAM)
+      : M(M), FAM(FAM), Ctx(M.getContext()) {}
+
+  bool instrument();
+
+private:
+  bool shouldInstrumentFunction(Function &Fn);
+  bool instrumentFunction(Function &Fn);
+  bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+
+  FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
+                               ArrayRef<Type *> ArgTys) {
+    if (!FC) {
+      auto *NewAllocationFnTy = FunctionType::get(RetTy, ArgTys, false);
+      FC = M.getOrInsertFunction(Name, NewAllocationFnTy);
+    }
+    return FC;
+  }
+
+  /// void __offload_san_trap_info(Int64Ty);
+  FunctionCallee TrapInfoFn;
+  FunctionCallee getTrapInfoFn() {
+    return getOrCreateFn(TrapInfoFn, "__offload_san_trap_info", VoidTy,
+                         {/*PC*/ Int64Ty});
+  }
+
+  CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
+                       ArrayRef<Value *> Args = std::nullopt,
+                       const Twine &Name = "") {
+    Calls.push_back(IRB.CreateCall(Callee, Args, Name));
+    return Calls.back();
+  }
+  SmallVector<CallInst *> Calls;
+
+  Value *getPC(IRBuilder<> &IRB) {
+    return IRB.CreateIntrinsic(Int64Ty, Intrinsic::amdgcn_s_getpc, {}, nullptr,
+                               "PC");
+  }
+
+  Module &M;
+  FunctionAnalysisManager &FAM;
+  LLVMContext &Ctx;
+
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  Type *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+  PointerType *PtrTy = PointerType::getUnqual(Ctx);
+  IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+  IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+  IntegerType *Int64Ty = Type::getInt64Ty(Ctx);
+
+  const DataLayout &DL = M.getDataLayout();
+};
+
+} // end anonymous namespace
+
+bool OffloadSanitizerImpl::shouldInstrumentFunction(Function &Fn) {
+  if (Fn.isDeclaration())
+    return false;
+  if (Fn.getName().contains("ompx") || Fn.getName().contains("__kmpc") ||
+      Fn.getName().starts_with("rpc_"))
+    return false;
+  return !Fn.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
+}
+
+bool OffloadSanitizerImpl::instrumentTrapInstructions(
+    SmallVectorImpl<IntrinsicInst *> &TrapCalls) {
+  bool Changed = false;
+  for (auto *II : TrapCalls) {
+    IRBuilder<> IRB(II);
+    createCall(IRB, getTrapInfoFn(), {getPC(IRB)});
+  }
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
+  if (!shouldInstrumentFunction(Fn))
+    return false;
+
+  SmallVector<IntrinsicInst *> TrapCalls;
+
+  bool Changed = false;
+  for (auto &I : instructions(Fn)) {
+    switch (I.getOpcode()) {
+    case Instruction::Call: {
+      auto &CI = cast<CallInst>(I);
+      if (auto *II = dyn_cast<IntrinsicInst>(&CI))
+        if (II->getIntrinsicID() == Intrinsic::trap)
+          TrapCalls.push_back(II);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  Changed |= instrumentTrapInstructions(TrapCalls);
+
+  return Changed;
+}
+
+bool OffloadSanitizerImpl::instrument() {
+  bool Changed = false;
+
+  for (Function &Fn : M)
+    Changed |= instrumentFunction(Fn);
+
+  removeFromUsedLists(M, [&](Constant *C) {
+    if (!C->getName().starts_with("__offload_san"))
+      return false;
+    return Changed = true;
+  });
+
+  return Changed;
+}
+
+PreservedAnalyses OffloadSanitizerPass::run(Module &M,
+                                            ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  OffloadSanitizerImpl Impl(M, FAM);
+  if (!Impl.instrument())
+    return PreservedAnalyses::all();
+  LLVM_DEBUG(M.dump());
+  return PreservedAnalyses::none();
+}
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 7818c8d752599..8535c5ee981b2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -94,6 +94,7 @@ set(src_files
   ${source_directory}/Misc.cpp
   ${source_directory}/Parallelism.cpp
   ${source_directory}/Reduction.cpp
+  ${source_directory}/Sanitizer.cpp
   ${source_directory}/State.cpp
   ${source_directory}/Synchronization.cpp
   ${source_directory}/Tasking.cpp
diff --git a/offload/DeviceRTL/include/Utils.h b/offload/DeviceRTL/include/Utils.h
index 82e2397b5958b..2e7767808b721 100644
--- a/offload/DeviceRTL/include/Utils.h
+++ b/offload/DeviceRTL/include/Utils.h
@@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
 
 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
 
+/// Terminate the execution of this warp.
+void terminateWarp();
+
 /// Return \p LowBits and \p HighBits packed into a single 64 bit value.
 uint64_t pack(uint32_t LowBits, uint32_t HighBits);
 
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
new file mode 100644
index 0000000000000..cf0a983f62395
--- /dev/null
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -0,0 +1,95 @@
+//===------ Sanitizer.cpp - Track allocation for sanitizer checks ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mapping.h"
+#include "Shared/Environment.h"
+#include "Synchronization.h"
+#include "Types.h"
+#include "Utils.h"
+
+using namespace ompx;
+
+#define _SAN_ATTRS                                                             \
+  [[clang::disable_sanitizer_instrumentation, gnu::used, gnu::retain]]
+#define _SAN_ENTRY_ATTRS [[gnu::flatten, gnu::always_inline]] _SAN_ATTRS
+
+#pragma omp begin declare target device_type(nohost)
+
+[[gnu::visibility("protected")]] _SAN_ATTRS SanitizerEnvironmentTy
+    *__sanitizer_environment_ptr;
+
+namespace {
+
+/// Helper to lock the sanitizer environment. While we never unlock it, this
+/// allows us to have a no-op "side effect" in the spin-wait function below.
+_SAN_ATTRS bool
+getSanitizerEnvironmentLock(SanitizerEnvironmentTy &SE,
+                            SanitizerEnvironmentTy::ErrorCodeTy ErrorCode) {
+  return atomic::cas(SE.getErrorCodeLocation(), SanitizerEnvironmentTy::NONE,
+                     ErrorCode, atomic::OrderingTy::seq_cst,
+                     atomic::OrderingTy::seq_cst);
+}
+
+/// The spin-wait function should not be inlined, it's a catch all to give one
+/// thread time to setup the sanitizer environment.
+[[clang::noinline]] _SAN_ATTRS void spinWait(SanitizerEnvironmentTy &SE) {
+  while (!atomic::load(&SE.IsInitialized, atomic::OrderingTy::aquire))
+    ;
+  __builtin_trap();
+}
+
+_SAN_ATTRS
+void setLocation(SanitizerEnvironmentTy &SE, uint64_t PC) {
+  for (int I = 0; I < 3; ++I) {
+    SE.ThreadId[I] = mapping::getThreadIdInBlock(I);
+    SE.BlockId[I] = mapping::getBlockIdInKernel(I);
+  }
+  SE.PC = PC;
+
+  // This is the last step to initialize the sanitizer environment, time to
+  // trap via the spinWait. Flush the memory writes and signal for the end.
+  fence::system(atomic::OrderingTy::release);
+  atomic::store(&SE.IsInitialized, 1, atomic::OrderingTy::release);
+}
+
+_SAN_ATTRS
+void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode,
+                         uint64_t PC) {
+  SanitizerEnvironmentTy &SE = *__sanitizer_environment_ptr;
+  bool HasLock = getSanitizerEnvironmentLock(SE, ErrorCode);
+
+  // If no thread of this warp has the lock, end execution gracefully.
+  bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock);
+  if (!AnyThreadHasLock)
+    utils::terminateWarp();
+
+  // One thread will set the location information and signal that the rest of
+  // the wapr that the actual trap can be executed now.
+  if (HasLock)
+    setLocation(SE, PC);
+
+  synchronize::warp(lanes::All);
+
+  // This is not the first thread that encountered the trap, to avoid a race
+  // on the sanitizer environment, this thread is simply going to spin-wait.
+  // The trap above will end the program for all threads.
+  spinWait(SE);
+}
+
+} // namespace
+
+extern "C" {
+
+_SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
+  raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
+}
+}
+
+#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Utils.cpp b/offload/DeviceRTL/src/Utils.cpp
index 53cc803234867..ae6bcf80e348f 100644
--- a/offload/DeviceRTL/src/Utils.cpp
+++ b/offload/DeviceRTL/src/Utils.cpp
@@ -38,6 +38,7 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
                     int32_t Width);
 
 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+void terminateWarp();
 
 /// AMDGCN Implementation
 ///
@@ -63,6 +64,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
   return Mask & __builtin_amdgcn_ballot_w64(Pred);
 }
 
+void terminateWarp() { __builtin_amdgcn_endpgm(); }
+
 bool isSharedMemPtr(const void *Ptr) {
   return __builtin_amdgcn_is_shared(
       (const __attribute__((address_space(0))) void *)Ptr);
@@ -90,6 +93,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
   return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred);
 }
 
+void terminateWarp() { __nvvm_exit(); }
+
 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
 
 #pragma omp end declare variant
@@ -126,6 +131,8 @@ uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
   return impl::ballotSync(Mask, Pred);
 }
 
+void utils::terminateWarp() { return impl::terminateWarp(); }
+
 bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
 
 extern "C" {
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index d141146b6bd5a..e2fb7109dddce 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -105,4 +105,29 @@ struct KernelLaunchEnvironmentTy {
   void *ReductionBuffer = nullptr;
 };
 
+/// The environment used to communicate sanitizer information from the device to
+/// the host.
+struct SanitizerEnvironmentTy {
+  enum ErrorCodeTy : uint8_t {
+    NONE = 0,
+    TRAP,
+    LAST = TRAP,
+  } ErrorCode;
+
+  /// Flag to indicate the environment has been initialized fully.
+  uint8_t IsInitialized;
+
+  /// Return the error code location for use in an atomic compare-and-swap.
+  uint8_t *getErrorCodeLocation() {
+    return reinterpret_cast<uint8_t *>(&ErrorCode);
+  }
+
+  /// Thread info
+  /// {
+  uint32_t ThreadId[3];
+  uint32_t BlockId[3];
+  uint64_t PC;
+  /// }
+};
+
 #endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index bca7b27304a0b..1a23dea323065 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -12,6 +12,7 @@
 #define OFFLOAD_PLUGINS_NEXTGEN_COMMON_ERROR_REPORTING_H
 
 #include "PluginInterface.h"
+#include "Shared/Environment.h"
 #include "Shared/EnvironmentVar.h"
 
 #include "llvm/ADT/STLExtras.h"
@@ -105,6 +106,15 @@ class ErrorReporter {
     print(BoldRed, Format, Args...);
     print("\n");
   }
+
+  /// Print \p Format, instantiated with \p Args to stderr, but colored with
+  /// a banner.
+  template <typename... ArgsTy>
+  [[gnu::format(__printf__, 1, 2)]] static void
+  reportWarning(const char *Format, ArgsTy &&...Args) {
+    print(Yellow, "WARNING: ");
+    print(Yellow, Format, Args...);
+  }
 #pragma clang diagnostic pop
 
   static void reportError(const char *Str) { reportError("%s", Str); }
@@ -115,6 +125,13 @@ class ErrorReporter {
     print(Color, "%s", Str.str().c_str());
   }
 
+  static void reportLocation(SanitizerEnvironmentTy &SE) {
+    print(BoldLightPurple,
+          "Triggered by thread <%u,%u,%u> block <%u,%u,%u> PC %p\n",
+          SE.ThreadId[0], SE.ThreadId[1], SE.ThreadId[2], SE.BlockId[0],
+          SE.BlockId[1], SE.BlockId[2], (void *)SE.PC);
+  }
+
   /// Pretty print a stack trace.
   static void reportStackTrace(StringRef StackTrace) {
     if (StackTrace.empty())
@@ -225,6 +242,16 @@ class ErrorReporter {
       std::function<bool(__tgt_async_info &)> AsyncInfoWrapperMatcher) {
     assert(AsyncInfoWrapperMatcher && "A matcher is required");
 
+    SanitizerEnvironmentTy *SE = nullptr;
+    for (auto &It : Device.SanitizerEnvironmentMap) {
+      if (It.second->ErrorCode == SanitizerEnvironmentTy::NONE)
+        continue;
+      if (SE)
+        reportWarning(
+            "Multiple errors encountered, information might be inaccurate.");
+      SE = It.second;
+    }
+
     uint32_t Idx = 0;
     for (uint32_t I = 0, E = KTIR.size(); I < E; ++I) {
       auto KTI = KTIR.getKernelTraceInfo(I);
@@ -243,7 +270,24 @@ class ErrorReporter {
           llvm::omp::prettityFunctionName(KTI.Kernel->getName());
       reportError("Kernel '%s'", PrettyKernelName.c_str());
     }
-    reportError("execution interrupted by hardware trap instruction");
+    assert((!SE || SE->ErrorCode != SanitizerEnvironmentTy::NONE) &&
+           "Unexpected sanitizer environment");
+    if (!SE) {
+      reportError("execution stopped, reason is unknown");
+      print(Yellow, "Compile with '-mllvm -amdgpu-enable-offload-sanitizer' "
+                    "improved diagnosis\n");
+    } else {
+      switch (SE->ErrorCode) {
+      case SanitizerEnvironmentTy::TRAP:
+        reportError("execution interrupted by hardware trap instruction");
+        break;
+      default:
+        reportError(
+            "execution stopped, reason is unknown due to invalid error code");
+      }
+
+      reportLocation(*SE);
+    }
     if (KTI.AsyncInfo && (AsyncInfoWrapperMatcher(*KTI.AsyncInfo))) {
       if (!KTI.LaunchTrace.empty())
         reportStackTrace(KTI.LaunchTrace);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 81823338fe211..fb686dd7a6418 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -712,6 +712,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
                               uint64_t PoolSize);
 
+  /// Setup the sanitizer environment to receive sanitizer information from the
+  /// device.
+  Error setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                  DeviceImageTy &Image);
+
   // Setup the RPC server for this device if needed. This may not run on some
   // plugins like the CPU targets. By default, it will not be executed so it is
   // up to the target to override this using the shouldSetupRPCServer function.
@@ -931,6 +936,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   /// Allocate and construct a kernel object.
   virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
 
+  DenseMap<DeviceImageTy *, SanitizerEnvironmentTy *> SanitizerEnvironmentMap;
+
   /// Reference to the underlying plugin that created this device.
   GenericPluginTy &Plugin;
 
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index c3ecbcc62f71f..317a3c713aeeb 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -903,6 +903,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
       return std::move(Err);
   }
 
+  if (auto Err = setupSanitizerEnvironment(Plugin, *Image))
+    return std::move(Err);
+
   if (auto Err = setupRPCServer(Plugin, *Image))
     return std::move(Err);
 
@@ -1008,6 +1011,23 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
   return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
 }
 
+Error GenericDeviceTy::setupSanitizerEnvironment(GenericPluginTy &Plugin,
+                                                 DeviceImageTy &Image) {
+  GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
+  if (!GHandler.isSymbolInImage(*this, Image, "__sanitizer_environment_ptr"))
+    return Plugin::success();
+
+  auto *&SanitizerEnvironment = SanitizerEnvironmentMap[&Image];
+  SanitizerEnvironment = reinterpret_cast<SanitizerEnvironmentTy *>(allocate(
+      sizeof(*SanitizerEnvironment), &SanitizerEnvironment, TARGET_ALLOC_HOST));
+  memset(SanitizerEnvironment, '\0', sizeof(SanitizerEnvironmentTy));
+
+  GlobalTy SanitizerEnvironmentGlobal("__sanitizer_environment_ptr",
+                                      sizeof(SanitizerEnvironment),
+                                      &SanitizerEnvironment);
+  return GHandler.writeGlobalToDevice(*this, Image, SanitizerEnvironmentGlobal);
+}
+
 Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
                                       DeviceImageTy &Image) {
   // The plugin either does not need an RPC server or it is unavailible.
diff --git a/offload/test/sanitizer/kernel_trap.c b/offload/test/sanitizer/kernel_trap.c
index db243001c9056..b9ab6f28ac170 100644
--- a/offload/test/sanitizer/kernel_trap.c
+++ b/offload/test/sanitizer/kernel_trap.c
@@ -1,11 +1,14 @@
 
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -25,19 +28,23 @@ int main(void) {
   {
   }
 #pragma omp target
+  {}
+#pragma omp target teams num_teams(32) thread_limit(128)
   {
-  }
-#pragma omp target
-  {
-    __builtin_trap();
+#pragma omp parallel
+    if (omp_get_team_num() == 17 && omp_get_thread_num() == 42)
+      __builtin_trap();
   }
 #pragma omp target
   {
   }
 }
 // clang-format off
-// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 30 (__omp_offloading_{{.*}}_main_l30)'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 32 (__omp_offloading_{{.*}}_main_l32)'
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// SANIT: Triggered by thread <42,0,0> block <17,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap.c:
diff --git a/offload/test/sanitizer/kernel_trap.cpp b/offload/test/sanitizer/kernel_trap.cpp
index 97ded79c0f194..cef4d9737d0dd 100644
--- a/offload/test/sanitizer/kernel_trap.cpp
+++ b/offload/test/sanitizer/kernel_trap.cpp
@@ -1,10 +1,10 @@
 
 // clang-format off
 // RUN: %libomptarget-compilexx-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NDEBG 
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
 // RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
 // RUN: %libomptarget-compilexx-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
 // RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
 // clang-format on
 
@@ -17,9 +17,9 @@
 // UNSUPPORTED: s390x-ibm-linux-gnu
 // UNSUPPORTED: s390x-ibm-linux-gnu-LTO
 
-#include <omp.h>
+struct S {};
 
-int main(void) {
+template <typename T> void cxx_function_name(int I, T *) {
 
 #pragma omp target
   {}
@@ -32,10 +32,12 @@ int main(void) {
 #pragma omp target
   {}
 }
+
+int main(void) {
+  struct S s;
+  cxx_function_name(1, &s);
+}
+
 // clang-format off
-// CHECK: OFFLOAD ERROR: Kernel 'omp target in main @ 28 (__omp_offloading_{{.*}}_main_l28)'
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
-// TRACE:     launchKernel
-// NDEBG:     main
-// DEBUG:     main {{.*}}kernel_trap.cpp:
+// CHECK: OFFLOAD ERROR: Kernel 'omp target in void cxx_function_name<S>(int, S*) @ [[LINE:[0-9]+]] (__omp_offloading_{{.*}}__Z17cxx_function_nameI1SEviPT__l[[LINE]])'
 // clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c
new file mode 100644
index 0000000000000..379ca8362aa83
--- /dev/null
+++ b/offload/test/sanitizer/kernel_trap_all.c
@@ -0,0 +1,31 @@
+
+// clang-format off
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+int main(void) {
+
+#pragma omp target teams
+  {
+#pragma omp parallel
+    __builtin_trap();
+  }
+}
+// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20)
+// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}}
+// TRACE:     launchKernel
+// NDEBG:     main
+// DEBUG:     main {{.*}}kernel_trap_all.c:
diff --git a/offload/test/sanitizer/kernel_trap_async.c b/offload/test/sanitizer/kernel_trap_async.c
index ee0d772fef9b8..d44689a5ee8d4 100644
--- a/offload/test/sanitizer/kernel_trap_async.c
+++ b/offload/test/sanitizer/kernel_trap_async.c
@@ -1,11 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compileopt-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
-// RUN: %libomptarget-compileopt-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG
-// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,NOSAN
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,NOSAN
+// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG,SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SANIT
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -36,7 +36,9 @@ int main(void) {
 
 // clang-format off
 // CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l30)
-// CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved diagnosis 
+// SANIT: OFFLOAD ERROR: execution interrupted by hardware trap instruction
 // TRACE:     launchKernel
 // DEBUG:     kernel_trap_async.c:
 // clang-format on
diff --git a/offload/test/sanitizer/kernel_trap_many.c b/offload/test/sanitizer/kernel_trap_many.c
index b3bdad9f07b4a..0ca67ec145906 100644
--- a/offload/test/sanitizer/kernel_trap_many.c
+++ b/offload/test/sanitizer/kernel_trap_many.c
@@ -1,9 +1,11 @@
 
 // clang-format off
 // RUN: %libomptarget-compile-generic
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG 
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=24 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,NDEBG,NOSAN
 // RUN: %libomptarget-compile-generic -g
-// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=16 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=TRACE,DEBUG,NOSAN
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT,TRACE,DEBUG
 // clang-format on
 
 // UNSUPPORTED: nvptx64-nvidia-cuda
@@ -24,13 +26,14 @@ int main(void) {
     {
     }
   }
-#pragma omp target
-  {
-    __builtin_trap();
-  }
+#pragma omp target thread_limit(1)
+  { __builtin_trap(); }
 }
-// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l27)
-// TRACE: OFFLOAD ERROR: execution interrupted by hardware trap instruction
+// TRACE: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l29)
+// NOSAN: OFFLOAD ERROR: execution stopped, reason is unknown
+// NOSAN: Compile with '-mllvm -amdgpu-enable-offload-sanitizer' improved
+// diagnosis SANIT: OFFLOAD ERROR: execution interrupted by hardware trap
+// instruction SANIT: Triggered by thread <0,0,0> block <0,0,0> PC 0x{{.*}}
 // TRACE:     launchKernel
 // NDEBG:     main
 // DEBUG:     main {{.*}}kernel_trap_many.c:

>From f91a4e7f848e1fc6e09492972ab50eb99eadac17 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes at jdoerfert.de>
Date: Thu, 25 Jul 2024 20:22:01 -0700
Subject: [PATCH 3/3] [Offload] Sanitize "standalone" unreachable instructions

If an unreachable is reached, the execution state is invalid. If the
sanitizer is enabled, we stop and report it to the user.
---
 .../Instrumentation/OffloadSanitizer.cpp      | 29 +++++++++++++++
 offload/DeviceRTL/src/Sanitizer.cpp           |  4 +++
 offload/include/Shared/Environment.h          |  3 +-
 .../common/include/ErrorReporting.h           |  4 +++
 offload/test/sanitizer/kernel_known_ub.c      | 35 +++++++++++++++++++
 5 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 offload/test/sanitizer/kernel_known_ub.c

diff --git a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
index a24fdc477a063..29f2487696729 100644
--- a/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/OffloadSanitizer.cpp
@@ -42,6 +42,8 @@ class OffloadSanitizerImpl final {
   bool shouldInstrumentFunction(Function &Fn);
   bool instrumentFunction(Function &Fn);
   bool instrumentTrapInstructions(SmallVectorImpl<IntrinsicInst *> &TrapCalls);
+  bool instrumentUnreachableInstructions(
+      SmallVectorImpl<UnreachableInst *> &UnreachableInsts);
 
   FunctionCallee getOrCreateFn(FunctionCallee &FC, StringRef Name, Type *RetTy,
                                ArrayRef<Type *> ArgTys) {
@@ -59,6 +61,13 @@ class OffloadSanitizerImpl final {
                          {/*PC*/ Int64Ty});
   }
 
+  /// void __offload_san_unreachable_info(Int64Ty);
+  FunctionCallee UnreachableInfoFn;
+  FunctionCallee getUnreachableInfoFn() {
+    return getOrCreateFn(UnreachableInfoFn, "__offload_san_unreachable_info",
+                         VoidTy, {/*PC*/ Int64Ty});
+  }
+
   CallInst *createCall(IRBuilder<> &IRB, FunctionCallee Callee,
                        ArrayRef<Value *> Args = std::nullopt,
                        const Twine &Name = "") {
@@ -107,15 +116,34 @@ bool OffloadSanitizerImpl::instrumentTrapInstructions(
   return Changed;
 }
 
+bool OffloadSanitizerImpl::instrumentUnreachableInstructions(
+    SmallVectorImpl<UnreachableInst *> &UnreachableInsts) {
+  bool Changed = false;
+  for (auto *II : UnreachableInsts) {
+    // Skip unreachables after traps since we instrument those as well.
+    if (&II->getParent()->front() != II)
+      if (auto *CI = dyn_cast<CallInst>(II->getPrevNode()))
+        if (CI->getIntrinsicID() == Intrinsic::trap)
+          continue;
+    IRBuilder<> IRB(II);
+    createCall(IRB, getUnreachableInfoFn(), {getPC(IRB)});
+  }
+  return Changed;
+}
+
 bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
   if (!shouldInstrumentFunction(Fn))
     return false;
 
+  SmallVector<UnreachableInst *> UnreachableInsts;
   SmallVector<IntrinsicInst *> TrapCalls;
 
   bool Changed = false;
   for (auto &I : instructions(Fn)) {
     switch (I.getOpcode()) {
+    case Instruction::Unreachable:
+      UnreachableInsts.push_back(cast<UnreachableInst>(&I));
+      break;
     case Instruction::Call: {
       auto &CI = cast<CallInst>(I);
       if (auto *II = dyn_cast<IntrinsicInst>(&CI))
@@ -129,6 +157,7 @@ bool OffloadSanitizerImpl::instrumentFunction(Function &Fn) {
   }
 
   Changed |= instrumentTrapInstructions(TrapCalls);
+  Changed |= instrumentUnreachableInstructions(UnreachableInsts);
 
   return Changed;
 }
diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp
index cf0a983f62395..f2e8deec10bc8 100644
--- a/offload/DeviceRTL/src/Sanitizer.cpp
+++ b/offload/DeviceRTL/src/Sanitizer.cpp
@@ -90,6 +90,10 @@ extern "C" {
 _SAN_ENTRY_ATTRS void __offload_san_trap_info(uint64_t PC) {
   raiseExecutionError(SanitizerEnvironmentTy::TRAP, PC);
 }
+
+_SAN_ENTRY_ATTRS void __offload_san_unreachable_info(uint64_t PC) {
+  raiseExecutionError(SanitizerEnvironmentTy::UNREACHABLE, PC);
+}
 }
 
 #pragma omp end declare target
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index e2fb7109dddce..95e039223a964 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -111,7 +111,8 @@ struct SanitizerEnvironmentTy {
   enum ErrorCodeTy : uint8_t {
     NONE = 0,
     TRAP,
-    LAST = TRAP,
+    UNREACHABLE,
+    LAST = UNREACHABLE,
   } ErrorCode;
 
   /// Flag to indicate the environment has been initialized fully.
diff --git a/offload/plugins-nextgen/common/include/ErrorReporting.h b/offload/plugins-nextgen/common/include/ErrorReporting.h
index 1a23dea323065..f27a6b8647a0d 100644
--- a/offload/plugins-nextgen/common/include/ErrorReporting.h
+++ b/offload/plugins-nextgen/common/include/ErrorReporting.h
@@ -281,6 +281,10 @@ class ErrorReporter {
       case SanitizerEnvironmentTy::TRAP:
         reportError("execution interrupted by hardware trap instruction");
         break;
+      case SanitizerEnvironmentTy::UNREACHABLE:
+        reportError("execution reached an \"unreachable\" state (likely caused "
+                  "by undefined behavior)");
+	break;
       default:
         reportError(
             "execution stopped, reason is unknown due to invalid error code");
diff --git a/offload/test/sanitizer/kernel_known_ub.c b/offload/test/sanitizer/kernel_known_ub.c
new file mode 100644
index 0000000000000..818e28f323e0f
--- /dev/null
+++ b/offload/test/sanitizer/kernel_known_ub.c
@@ -0,0 +1,35 @@
+
+// clang-format off
+// RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %libomptarget-compileopt-generic -g -mllvm -amdgpu-enable-offload-sanitizer
+// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+// RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=SANIT
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <omp.h>
+
+__attribute__((noinline)) void unreachable(volatile int *GoodPtr) {
+  *GoodPtr = 1;
+  __builtin_unreachable();
+}
+
+int main(void) {
+#pragma omp target
+  {
+    volatile int A = 0;
+    unreachable(&A);
+  }
+}
+// SANIT: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l27)
+// SANIT: OFFLOAD ERROR: execution reached an "unreachable" state (likely caused by undefined behavior)
+// SANIT: Triggered by thread <{{.*}},0,0> block <{{.*}},0,0> PC 0x{{.*}}



More information about the llvm-commits mailing list