[llvm] [Offload] Add framework for math conformance tests (PR #149242)

Mon Jul 28 13:59:25 PDT 2025

https://github.com/leandrolcampos updated https://github.com/llvm/llvm-project/pull/149242

>From b749446367cbe191b90656862051b31fe778f22e Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 17 Jul 2025 00:15:00 -0300
Subject: [PATCH 01/21] [Offload] Add framework for math conformance tests

---
 offload/unittests/CMakeLists.txt              |  11 +-
 offload/unittests/Conformance/CMakeLists.txt  |   7 +-
 .../Conformance/device_code/CMakeLists.txt    |   6 +-
 .../Conformance/device_code/LLVMLibm.c        |  30 +++
 .../unittests/Conformance/device_code/sin.c   |   4 -
 .../include/mathtest/DeviceContext.hpp        | 121 +++++++++
 .../include/mathtest/DeviceResources.hpp      | 129 ++++++++++
 .../Conformance/include/mathtest/Dim.hpp      |  42 ++++
 .../include/mathtest/ErrorHandling.hpp        |  29 +++
 .../include/mathtest/ExhaustiveGenerator.hpp  | 139 +++++++++++
 .../include/mathtest/GpuMathTest.hpp          | 159 ++++++++++++
 .../include/mathtest/HostRefChecker.hpp       |  82 ++++++
 .../include/mathtest/IndexedRange.hpp         |  91 +++++++
 .../include/mathtest/InputGenerator.hpp       |  14 ++
 .../Conformance/include/mathtest/Numerics.hpp | 235 ++++++++++++++++++
 .../include/mathtest/OffloadForward.hpp       |  22 ++
 .../Conformance/include/mathtest/Support.hpp  | 138 ++++++++++
 .../include/mathtest/TestResult.hpp           |  70 ++++++
 .../include/mathtest/TestRunner.hpp           | 118 +++++++++
 .../include/mathtest/TypeExtras.hpp           |   9 +
 .../unittests/Conformance/lib/CMakeLists.txt  |   5 +
 .../Conformance/lib/DeviceContext.cpp         | 201 +++++++++++++++
 .../Conformance/lib/DeviceResources.cpp       |  54 ++++
 .../Conformance/lib/ErrorHandling.cpp         |  37 +++
 offload/unittests/Conformance/sin.cpp         |   8 -
 .../Conformance/tests/CMakeLists.txt          |   2 +
 .../Conformance/tests/Hypotf16Test.cpp        |  51 ++++
 .../unittests/Conformance/tests/LogfTest.cpp  |  44 ++++
 28 files changed, 1834 insertions(+), 24 deletions(-)
 create mode 100644 offload/unittests/Conformance/device_code/LLVMLibm.c
 delete mode 100644 offload/unittests/Conformance/device_code/sin.c
 create mode 100644 offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Dim.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Numerics.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Support.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TestResult.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TestRunner.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
 create mode 100644 offload/unittests/Conformance/lib/CMakeLists.txt
 create mode 100644 offload/unittests/Conformance/lib/DeviceContext.cpp
 create mode 100644 offload/unittests/Conformance/lib/DeviceResources.cpp
 create mode 100644 offload/unittests/Conformance/lib/ErrorHandling.cpp
 delete mode 100644 offload/unittests/Conformance/sin.cpp
 create mode 100644 offload/unittests/Conformance/tests/CMakeLists.txt
 create mode 100644 offload/unittests/Conformance/tests/Hypotf16Test.cpp
 create mode 100644 offload/unittests/Conformance/tests/LogfTest.cpp

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 388d15f834b1d..bea49387528b0 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -39,9 +39,9 @@ function(add_offload_test_device_code test_filename test_name)
       add_custom_command(
         OUTPUT ${output_file}
         COMMAND ${CMAKE_C_COMPILER}
-        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
-        -c ${SRC_PATH} -o ${output_file}
+        --target=nvptx64-nvidia-cuda -march=native
+        -stdlib -nogpulib --cuda-path=${CUDA_ROOT} -flto -fno-builtin ${ARGN}
+        ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
       )
       add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
@@ -62,6 +62,8 @@ function(add_offload_test_device_code test_filename test_name)
       set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
       add_custom_command(
         OUTPUT ${output_file}
+        # TODO(jhuber6): Add and test the '-stdlib' flag here; also consider
+        #                the '-fno-builtin' flag.
         COMMAND ${CMAKE_C_COMPILER}
         --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
         -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
@@ -106,10 +108,9 @@ function(add_conformance_test test_name)
   endif()
 
   add_executable(${target_name} ${files})
-  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} ${test_name}.bin)
+  add_dependencies(${target_name} conformance_device_binaries)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${CONFORMANCE_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
-  target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
   set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
 
   add_custom_target(offload.conformance.${test_name}
diff --git a/offload/unittests/Conformance/CMakeLists.txt b/offload/unittests/Conformance/CMakeLists.txt
index bc3141757372a..ce0421553de05 100644
--- a/offload/unittests/Conformance/CMakeLists.txt
+++ b/offload/unittests/Conformance/CMakeLists.txt
@@ -1,8 +1,7 @@
 add_custom_target(offload.conformance)
 
-set(PLUGINS_TEST_COMMON LLVMOffload LLVMSupport)
-set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common)
+set(PLUGINS_TEST_COMMON MathTest)
 
 add_subdirectory(device_code)
-
-add_conformance_test(sin sin.cpp)
+add_subdirectory(lib)
+add_subdirectory(tests)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 223f04ccfb698..82c6ec9767562 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,4 @@
-# FIXME: Currently missing dependencies to build GPU portion automatically.
-add_offload_test_device_code(sin.c sin)
+add_offload_test_device_code(LLVMLibm.c LLVMLibm)
 
-set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+add_custom_target(conformance_device_binaries DEPENDS LLVMLibm.bin)
+set(CONFORMANCE_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
new file mode 100644
index 0000000000000..5f436a235eb08
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -0,0 +1,30 @@
+#include <gpuintrin.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __FLT16_MAX__
+#define HAS_FLOAT16
+typedef _Float16 float16;
+#endif
+
+#ifdef HAS_FLOAT16
+__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
+                                 size_t NumElements) {
+  uint32_t Index =
+      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+
+  if (Index < NumElements) {
+    Out[Index] = hypotf16(X[Index], Y[Index]);
+  }
+}
+#endif
+
+__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
+  uint32_t Index =
+      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+
+  if (Index < NumElements) {
+    Out[Index] = logf(X[Index]);
+  }
+}
diff --git a/offload/unittests/Conformance/device_code/sin.c b/offload/unittests/Conformance/device_code/sin.c
deleted file mode 100644
index e969e60f352a2..0000000000000
--- a/offload/unittests/Conformance/device_code/sin.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include <gpuintrin.h>
-#include <math.h>
-
-__gpu_kernel void kernel(double *out) { *out = sin(*out); }
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
new file mode 100644
index 0000000000000..74ef83ce0c195
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "mathtest/DeviceResources.hpp"
+#include "mathtest/Dim.hpp"
+#include "mathtest/ErrorHandling.hpp"
+#include "mathtest/Support.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace mathtest {
+
+std::size_t countDevices();
+
+namespace detail {
+
+void allocManagedMemory(ol_device_handle_t DeviceHandle, std::size_t Size,
+                        void **AllocationOut) noexcept;
+} // namespace detail
+
+class DeviceContext {
+  // For simplicity, the current design of this class doesn't have support for
+  // asynchronous operations and all types of memory allocation.
+  //
+  // Other use cases could benefit from operations like enqueued kernel launch
+  // and enqueued memcpy, as well as device and host memory allocation.
+
+public:
+  // TODO: Add a constructor that also takes a 'Provider'.
+  explicit DeviceContext(std::size_t DeviceId = 0);
+
+  template <typename T>
+  ManagedBuffer<T> createManagedBuffer(std::size_t Size) const noexcept {
+    void *UntypedAddress = nullptr;
+
+    detail::allocManagedMemory(DeviceHandle, Size * sizeof(T), &UntypedAddress);
+    T *TypedAddress = static_cast<T *>(UntypedAddress);
+
+    return ManagedBuffer<T>(TypedAddress, Size);
+  }
+
+  [[nodiscard]] std::shared_ptr<DeviceImage>
+  loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
+             llvm::StringRef Extension) const;
+
+  [[nodiscard]] std::shared_ptr<DeviceImage>
+  loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
+
+  template <typename KernelSignature>
+  DeviceKernel<KernelSignature>
+  getKernel(const std::shared_ptr<DeviceImage> &Image,
+            llvm::StringRef KernelName) const noexcept {
+    assert(Image && "Image provided to getKernel is null");
+
+    if (Image->DeviceHandle != this->DeviceHandle) {
+      FATAL_ERROR("Image provided to getKernel was created for a different "
+                  "device");
+    }
+
+    ol_symbol_handle_t KernelHandle = nullptr;
+    getKernelImpl(Image->Handle, KernelName, &KernelHandle);
+
+    return DeviceKernel<KernelSignature>(Image, KernelHandle);
+  }
+
+  template <typename KernelSignature, typename... ArgTypes>
+  void launchKernel(DeviceKernel<KernelSignature> Kernel, Dim NumGroups,
+                    Dim GroupSize, ArgTypes &&...Args) const noexcept {
+    using ExpectedTypes =
+        typename FunctionTypeTraits<KernelSignature>::ArgTypesTuple;
+    using ProvidedTypes = std::tuple<std::decay_t<ArgTypes>...>;
+
+    static_assert(std::is_same_v<ExpectedTypes, ProvidedTypes>,
+                  "Argument types provided to launchKernel do not match the "
+                  "kernel's signature");
+
+    if (Kernel.Image->DeviceHandle != DeviceHandle) {
+      FATAL_ERROR("Kernel provided to launchKernel was created for a different "
+                  "device");
+    }
+
+    if constexpr (sizeof...(Args) == 0) {
+      launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, nullptr, 0);
+    } else {
+      auto KernelArgs = makeKernelArgsPack(std::forward<ArgTypes>(Args)...);
+
+      static_assert(
+          (std::is_trivially_copyable_v<std::decay_t<ArgTypes>> && ...),
+          "Argument types provided to launchKernel must be trivially copyable");
+
+      launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, &KernelArgs,
+                       sizeof(KernelArgs));
+    }
+  }
+
+  [[nodiscard]] std::size_t getId() const noexcept { return DeviceId; }
+
+  [[nodiscard]] std::string getName() const;
+
+  [[nodiscard]] std::string getPlatform() const;
+
+private:
+  void getKernelImpl(ol_program_handle_t ProgramHandle,
+                     llvm::StringRef KernelName,
+                     ol_symbol_handle_t *KernelHandle) const noexcept;
+
+  void launchKernelImpl(ol_symbol_handle_t KernelHandle, const Dim &NumGroups,
+                        const Dim &GroupSize, const void *KernelArgs,
+                        std::size_t KernelArgsSize) const noexcept;
+
+  std::size_t DeviceId;
+  ol_device_handle_t DeviceHandle;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
new file mode 100644
index 0000000000000..51f7662ef548e
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
@@ -0,0 +1,129 @@
+#pragma once
+
+#include "mathtest/OffloadForward.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace mathtest {
+
+class DeviceContext;
+
+namespace detail {
+
+void freeDeviceMemory(void *Address) noexcept;
+} // namespace detail
+
+//===----------------------------------------------------------------------===//
+// ManagedBuffer
+//===----------------------------------------------------------------------===//
+
+template <typename T> class [[nodiscard]] ManagedBuffer {
+public:
+  ~ManagedBuffer() noexcept {
+    if (Address) {
+      detail::freeDeviceMemory(Address);
+    }
+  }
+
+  ManagedBuffer(const ManagedBuffer &) = delete;
+  ManagedBuffer &operator=(const ManagedBuffer &) = delete;
+
+  ManagedBuffer(ManagedBuffer &&Other) noexcept
+      : Address(Other.Address), Size(Other.Size) {
+    Other.Address = nullptr;
+    Other.Size = 0;
+  }
+
+  ManagedBuffer &operator=(ManagedBuffer &&Other) noexcept {
+    if (this == &Other)
+      return *this;
+
+    if (Address) {
+      detail::freeDeviceMemory(Address);
+    }
+
+    Address = Other.Address;
+    Size = Other.Size;
+
+    Other.Address = nullptr;
+    Other.Size = 0;
+
+    return *this;
+  }
+
+  [[nodiscard]] T *data() noexcept { return Address; }
+
+  [[nodiscard]] const T *data() const noexcept { return Address; }
+
+  [[nodiscard]] std::size_t getSize() const noexcept { return Size; }
+
+  [[nodiscard]] operator llvm::MutableArrayRef<T>() noexcept {
+    return llvm::MutableArrayRef<T>(data(), getSize());
+  }
+
+  [[nodiscard]] operator llvm::ArrayRef<T>() const noexcept {
+    return llvm::ArrayRef<T>(data(), getSize());
+  }
+
+private:
+  friend class DeviceContext;
+
+  explicit ManagedBuffer(T *Address, std::size_t Size) noexcept
+      : Address(Address), Size(Size) {}
+
+  T *Address = nullptr;
+  std::size_t Size = 0;
+};
+
+//===----------------------------------------------------------------------===//
+// DeviceImage
+//===----------------------------------------------------------------------===//
+
+class [[nodiscard]] DeviceImage {
+public:
+  ~DeviceImage() noexcept;
+  DeviceImage &operator=(DeviceImage &&Other) noexcept;
+
+  DeviceImage(const DeviceImage &) = delete;
+  DeviceImage &operator=(const DeviceImage &) = delete;
+
+  DeviceImage(DeviceImage &&Other) noexcept;
+
+private:
+  friend class DeviceContext;
+
+  explicit DeviceImage(ol_device_handle_t DeviceHandle,
+                       ol_program_handle_t Handle) noexcept;
+
+  ol_device_handle_t DeviceHandle = nullptr;
+  ol_program_handle_t Handle = nullptr;
+};
+
+//===----------------------------------------------------------------------===//
+// DeviceKernel
+//===----------------------------------------------------------------------===//
+
+template <typename KernelSignature> class [[nodiscard]] DeviceKernel {
+public:
+  DeviceKernel() = delete;
+
+  DeviceKernel(const DeviceKernel &) = default;
+  DeviceKernel &operator=(const DeviceKernel &) = default;
+  DeviceKernel(DeviceKernel &&) noexcept = default;
+  DeviceKernel &operator=(DeviceKernel &&) noexcept = default;
+
+private:
+  friend class DeviceContext;
+
+  explicit DeviceKernel(std::shared_ptr<DeviceImage> Image,
+                        ol_symbol_handle_t Kernel)
+      : Image(std::move(Image)), Handle(Kernel) {}
+
+  std::shared_ptr<DeviceImage> Image;
+  ol_symbol_handle_t Handle = nullptr;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
new file mode 100644
index 0000000000000..948c10e94cbb3
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Dim.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <initializer_list>
+
+namespace mathtest {
+
+class Dim {
+public:
+  Dim() = delete;
+
+  constexpr Dim(uint32_t X, uint32_t Y = 1, uint32_t Z = 1) noexcept
+      : Data{X, Y, Z} {
+    assert(X > 0 && Y > 0 && Z > 0 && "Dimensions must be positive");
+  }
+
+  constexpr Dim(std::initializer_list<uint32_t> Dimensions) noexcept
+      : Data{1, 1, 1} {
+    assert(Dimensions.size() <= 3 &&
+           "The number of dimensions must be less than or equal to 3");
+
+    std::size_t Index = 0;
+    for (uint32_t DimValue : Dimensions) {
+      Data[Index++] = DimValue;
+    }
+
+    assert(Data[0] > 0 && Data[1] > 0 && Data[2] > 0 &&
+           "Dimensions must be positive");
+  }
+
+  [[nodiscard]] constexpr uint32_t
+  operator[](std::size_t Index) const noexcept {
+    assert(Index < 3 && "Index is out of range");
+    return Data[Index];
+  }
+
+private:
+  uint32_t Data[3];
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
new file mode 100644
index 0000000000000..932aa79e4c902
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "mathtest/OffloadForward.hpp"
+
+#include "llvm/ADT/Twine.h"
+
+#define FATAL_ERROR(Message)                                                   \
+  mathtest::detail::reportFatalError(Message, __FILE__, __LINE__, __func__)
+
+#define OL_CHECK(ResultExpr)                                                   \
+  do {                                                                         \
+    ol_result_t Result = (ResultExpr);                                         \
+    if (Result != OL_SUCCESS) {                                                \
+      mathtest::detail::reportOffloadError(#ResultExpr, Result, __FILE__,      \
+                                           __LINE__, __func__);                \
+    }                                                                          \
+  } while (false)
+
+namespace mathtest {
+namespace detail {
+
+[[noreturn]] void reportFatalError(const llvm::Twine &Message, const char *File,
+                                   int Line, const char *FuncName);
+
+[[noreturn]] void reportOffloadError(const char *ResultExpr, ol_result_t Result,
+                                     const char *File, int Line,
+                                     const char *FuncName);
+} // namespace detail
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
new file mode 100644
index 0000000000000..1725a5b35f358
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -0,0 +1,139 @@
+#pragma once
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/InputGenerator.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Parallel.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename... InTypes>
+class [[nodiscard]] ExhaustiveGenerator final
+    : public InputGenerator<InTypes...> {
+  static constexpr std::size_t NumInputs = sizeof...(InTypes);
+  static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+
+public:
+  explicit constexpr ExhaustiveGenerator(
+      const IndexedRange<InTypes> &...Ranges) noexcept
+      : RangesTuple(Ranges...) {
+    bool Overflowed = getSizeWithOverflow(Ranges..., Size);
+
+    assert(!Overflowed && "The input space size is too large");
+    assert((Size > 0) && "The input space size must be at least 1");
+
+    IndexArrayType DimSizes = {};
+    {
+      std::size_t Index = 0;
+      ((DimSizes[Index++] = Ranges.getSize()), ...);
+    }
+
+    Strides[NumInputs - 1] = 1;
+    if constexpr (NumInputs > 1) {
+      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index) {
+        Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
+      }
+    }
+  }
+
+  [[nodiscard]] std::size_t
+  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
+    const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
+    const std::size_t BufferSize = BufferSizes[0];
+    assert((BufferSize != 0) && "Buffer size cannot be zero");
+    assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
+                       [&](std::size_t Size) { return Size == BufferSize; }) &&
+           "All input buffers must have the same size");
+
+    uint64_t StartFlatIndex, BatchSize;
+    while (true) {
+      uint64_t CurrentFlatIndex =
+          FlatIndexGenerator.load(std::memory_order_relaxed);
+      if (CurrentFlatIndex >= Size)
+        return 0;
+
+      BatchSize = std::min<uint64_t>(BufferSize, Size - CurrentFlatIndex);
+      uint64_t NextFlatIndex = CurrentFlatIndex + BatchSize;
+
+      if (FlatIndexGenerator.compare_exchange_weak(
+              CurrentFlatIndex, NextFlatIndex,
+              std::memory_order_acq_rel, // Success
+              std::memory_order_acquire  // Failure
+              )) {
+        StartFlatIndex = CurrentFlatIndex;
+        break;
+      }
+    }
+
+    auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
+
+    llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
+      writeInputs(StartFlatIndex, Offset, BufferPtrsTuple);
+    });
+
+    return static_cast<std::size_t>(BatchSize);
+  }
+
+private:
+  using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
+  using IndexArrayType = std::array<uint64_t, NumInputs>;
+
+  static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
+                                  uint64_t &Size) noexcept {
+    Size = 1;
+    bool Overflowed = false;
+
+    auto Multiplier = [&](const uint64_t RangeSize) {
+      if (!Overflowed) {
+        Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
+      }
+    };
+
+    (Multiplier(Ranges.getSize()), ...);
+
+    return Overflowed;
+  }
+
+  template <typename BufferPtrsTupleType>
+  void writeInputs(uint64_t StartFlatIndex, uint64_t Offset,
+                   BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    auto NDIndex = getNDIndex(StartFlatIndex + Offset);
+    writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
+  }
+
+  constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
+    IndexArrayType NDIndex;
+
+    for (std::size_t Index = 0; Index < NumInputs; ++Index) {
+      NDIndex[Index] = FlatIndex / Strides[Index];
+      FlatIndex -= NDIndex[Index] * Strides[Index];
+    }
+
+    return NDIndex;
+  }
+
+  template <std::size_t Index, typename BufferPtrsTupleType>
+  void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
+                       BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    if constexpr (Index < NumInputs) {
+      const auto &Range = std::get<Index>(RangesTuple);
+      std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
+      writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
+    }
+  }
+
+  uint64_t Size = 1;
+  RangesTupleType RangesTuple;
+  IndexArrayType Strides = {};
+  std::atomic<uint64_t> FlatIndexGenerator = 0;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
new file mode 100644
index 0000000000000..e5d1c6c77f634
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -0,0 +1,159 @@
+#pragma once
+
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/DeviceResources.hpp"
+#include "mathtest/HostRefChecker.hpp"
+#include "mathtest/InputGenerator.hpp"
+#include "mathtest/Support.hpp"
+#include "mathtest/TestResult.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <auto Func, typename Checker = HostRefChecker<Func>>
+class [[nodiscard]] GpuMathTest final {
+  using FunctionTraits = FunctionTraits<Func>;
+  using OutType = typename FunctionTraits::ReturnType;
+  using InTypesTuple = typename FunctionTraits::ArgTypesTuple;
+
+  template <typename... Ts>
+  using PartialResultType = TestResult<OutType, Ts...>;
+  using KernelSignature = KernelSignatureOf_t<Func>;
+
+  template <typename... Ts>
+  using TypeIdentitiesTuple = std::tuple<TypeIdentityOf<Ts>...>;
+  using InTypeIdentitiesTuple =
+      ApplyTupleTypes_t<InTypesTuple, TypeIdentitiesTuple>;
+
+  static constexpr std::size_t DefaultBufferSize =
+      DefaultBufferSizeFor_v<OutType, InTypesTuple>;
+  static constexpr uint32_t DefaultGroupSize = 512;
+
+public:
+  using FunctionConfig = FunctionConfig<Func>;
+  using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
+  using GeneratorType = ApplyTupleTypes_t<InTypesTuple, InputGenerator>;
+
+  explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
+                       llvm::StringRef Provider,
+                       llvm::StringRef DeviceBinsDirectory)
+      : Context(std::move(Context)),
+        Kernel(getKernel(this->Context, Provider, DeviceBinsDirectory)) {
+    assert(this->Context && "Context must not be null");
+  }
+
+  ResultType run(GeneratorType &Generator,
+                 std::size_t BufferSize = DefaultBufferSize,
+                 uint32_t GroupSize = DefaultGroupSize) const noexcept {
+    assert(BufferSize > 0 && "Buffer size must be a positive value");
+    assert(GroupSize > 0 && "Group size must be a positive value");
+
+    auto [InBuffersTuple, OutBuffer] = createBuffers(BufferSize);
+    ResultType FinalResult;
+
+    while (true) {
+      const std::size_t BatchSize = std::apply(
+          [&](auto &...Buffers) { return Generator.fill(Buffers...); },
+          InBuffersTuple);
+
+      if (BatchSize == 0)
+        break;
+
+      const auto BatchResult =
+          processBatch(InBuffersTuple, OutBuffer, BatchSize, GroupSize);
+
+      FinalResult.accumulate(BatchResult);
+    }
+
+    return FinalResult;
+  }
+
+  [[nodiscard]] const DeviceContext &getContext() const noexcept {
+    assert(Context && "Context must not be null");
+    return *Context;
+  }
+
+private:
+  static DeviceKernel<KernelSignature>
+  getKernel(const std::shared_ptr<DeviceContext> &Context,
+            llvm::StringRef Provider,
+            llvm::StringRef DeviceBinsDirectory) noexcept {
+    llvm::StringRef BinaryName = llvm::StringSwitch<llvm::StringRef>(Provider)
+                                     .Case("llvm-libm", "LLVMLibm")
+                                     .Default("");
+
+    if (BinaryName.empty()) {
+      FATAL_ERROR(llvm::Twine("Unsupported provider: '") + Provider + "'");
+    }
+
+    const auto Image = Context->loadBinary(DeviceBinsDirectory, BinaryName);
+
+    return Context->getKernel<KernelSignature>(Image,
+                                               FunctionConfig::KernelName);
+  }
+
+  [[nodiscard]] auto createBuffers(std::size_t BufferSize) const {
+    auto InBuffersTuple = std::apply(
+        [&](auto... InTypeIdentities) {
+          return std::make_tuple(
+              Context->createManagedBuffer<
+                  typename decltype(InTypeIdentities)::type>(BufferSize)...);
+        },
+        InTypeIdentitiesTuple{});
+    auto OutBuffer = Context->createManagedBuffer<OutType>(BufferSize);
+
+    return std::make_pair(std::move(InBuffersTuple), std::move(OutBuffer));
+  }
+
+  template <typename InBuffersTupleType>
+  [[nodiscard]] ResultType
+  processBatch(const InBuffersTupleType &InBuffersTuple,
+               ManagedBuffer<OutType> &OutBuffer, std::size_t BatchSize,
+               uint32_t GroupSize) const noexcept {
+    const uint32_t NumGroups = (BatchSize + GroupSize - 1) / GroupSize;
+    const auto KernelArgsTuple = std::apply(
+        [&](const auto &...InBuffers) {
+          return std::make_tuple(InBuffers.data()..., OutBuffer.data(),
+                                 BatchSize);
+        },
+        InBuffersTuple);
+
+    std::apply(
+        [&](const auto &...KernelArgs) {
+          Context->launchKernel(Kernel, NumGroups, GroupSize, KernelArgs...);
+        },
+        KernelArgsTuple);
+
+    return check(InBuffersTuple, OutBuffer, BatchSize);
+  }
+
+  template <typename InBuffersTupleType>
+  [[nodiscard]] static ResultType
+  check(const InBuffersTupleType &InBuffersTuple,
+        const ManagedBuffer<OutType> &OutBuffer,
+        std::size_t BatchSize) noexcept {
+    const auto InViewsTuple = std::apply(
+        [&](auto &...InBuffers) {
+          return std::make_tuple(
+              llvm::ArrayRef(InBuffers.data(), BatchSize)...);
+        },
+        InBuffersTuple);
+    const auto OutView = llvm::ArrayRef<OutType>(OutBuffer.data(), BatchSize);
+
+    return Checker::check(InViewsTuple, OutView);
+  }
+
+  std::shared_ptr<DeviceContext> Context;
+  DeviceKernel<KernelSignature> Kernel;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
new file mode 100644
index 0000000000000..c45137d652df2
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
@@ -0,0 +1,82 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+#include "mathtest/Support.hpp"
+#include "mathtest/TestResult.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/Support/Parallel.h"
+
+#include <cstddef>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <auto Func> class HostRefChecker {
+  using FuncTraits = FunctionTraits<Func>;
+  using InTypesTuple = typename FuncTraits::ArgTypesTuple;
+
+  using FuncConfig = FunctionConfig<Func>;
+
+  template <typename... Ts>
+  using BuffersTupleType = std::tuple<llvm::ArrayRef<Ts>...>;
+
+public:
+  using OutType = typename FuncTraits::ReturnType;
+
+private:
+  template <typename... Ts>
+  using PartialResultType = TestResult<OutType, Ts...>;
+
+public:
+  using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
+  using InBuffersTupleType = ApplyTupleTypes_t<InTypesTuple, BuffersTupleType>;
+
+  HostRefChecker() = delete;
+
+  static ResultType check(InBuffersTupleType InBuffersTuple,
+                          llvm::ArrayRef<OutType> OutBuffer) noexcept {
+    const std::size_t BufferSize = OutBuffer.size();
+    std::apply(
+        [&](const auto &...InBuffers) {
+          assert(
+              ((InBuffers.size() == BufferSize) && ...) &&
+              "All input buffers must have the same size as the output buffer");
+        },
+        InBuffersTuple);
+
+    assert((BufferSize != 0) && "Buffer size cannot be zero");
+
+    ResultType Init;
+
+    auto Transform = [&](std::size_t Index) {
+      auto CurrentInputsTuple = std::apply(
+          [&](const auto &...InBuffers) {
+            return std::make_tuple(InBuffers[Index]...);
+          },
+          InBuffersTuple);
+
+      const OutType Actual = OutBuffer[Index];
+      const OutType Expected = std::apply(Func, CurrentInputsTuple);
+
+      const auto UlpDistance = computeUlpDistance(Actual, Expected);
+      const bool IsFailure = UlpDistance > FuncConfig::UlpTolerance;
+
+      return ResultType(UlpDistance, IsFailure,
+                        typename ResultType::TestCase(
+                            std::move(CurrentInputsTuple), Actual, Expected));
+    };
+
+    auto Reduce = [](ResultType A, const ResultType &B) {
+      A.accumulate(B);
+      return A;
+    };
+
+    const auto Indexes = llvm::seq(BufferSize);
+    return llvm::parallelTransformReduce(Indexes.begin(), Indexes.end(), Init,
+                                         Reduce, Transform);
+  }
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
new file mode 100644
index 0000000000000..0f33978c8d30d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace mathtest {
+
+template <typename T> class [[nodiscard]] IndexedRange {
+  static_assert(IsFloatingPoint_v<T> || std::is_integral_v<T>,
+                "Type T must be an integral or floating-point type");
+  static_assert(sizeof(T) <= sizeof(uint64_t),
+                "Type T must be no wider than uint64_t");
+
+public:
+  constexpr IndexedRange() noexcept
+      : IndexedRange(getMinOrNegInf<T>(), getMaxOrInf<T>(), true) {}
+
+  explicit constexpr IndexedRange(T Begin, T End, bool Inclusive) noexcept
+      : MappedBegin(mapToOrderedUnsigned(Begin)),
+        MappedEnd(mapToOrderedUnsigned(End)) {
+    if (Inclusive) {
+      assert((Begin <= End) && "Begin must be less than or equal to End");
+    } else {
+      assert((Begin < End) && "Begin must be less than End");
+      --MappedEnd;
+    }
+
+    assert(((MappedEnd - MappedBegin) < std::numeric_limits<uint64_t>::max()) &&
+           "The range is too large to index");
+  }
+
+  [[nodiscard]] constexpr uint64_t getSize() const noexcept {
+    return static_cast<uint64_t>(MappedEnd) - MappedBegin + 1;
+  }
+
+  [[nodiscard]] constexpr T operator[](uint64_t Index) const noexcept {
+    assert((Index < getSize()) && "Index is out of range");
+
+    StorageType MappedValue = MappedBegin + Index;
+    return mapFromOrderedUnsigned(MappedValue);
+  }
+
+private:
+  using StorageType = StorageTypeOf_t<T>;
+
+  // Linearise T values into an ordered unsigned space:
+  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  //  * The difference |map(a) − map(b)| equals the number of representable
+  //    values between a and b within the same type
+  static constexpr StorageType mapToOrderedUnsigned(T Value) {
+    if constexpr (IsFloatingPoint_v<T>) {
+      StorageType SignMask = FPUtils<T>::SignMask;
+      StorageType Bits = FPUtils<T>::getAsBits(Value);
+      return (Bits & SignMask) ? SignMask - (Bits - SignMask) - 1
+                               : SignMask + Bits;
+    }
+
+    if constexpr (std::is_signed_v<T>) {
+      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      return __builtin_bit_cast(StorageType, Value) ^ SignMask;
+    }
+
+    return Value;
+  }
+
+  static constexpr T mapFromOrderedUnsigned(StorageType MappedValue) {
+    if constexpr (IsFloatingPoint_v<T>) {
+      StorageType SignMask = FPUtils<T>::SignMask;
+      StorageType Bits = (MappedValue < SignMask)
+                             ? (SignMask - MappedValue) + SignMask - 1
+                             : MappedValue - SignMask;
+
+      return FPUtils<T>::createFromBits(Bits);
+    }
+
+    if constexpr (std::is_signed_v<T>) {
+      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      return __builtin_bit_cast(T, MappedValue ^ SignMask);
+    }
+
+    return MappedValue;
+  }
+
+  StorageType MappedBegin;
+  StorageType MappedEnd;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
new file mode 100644
index 0000000000000..d9365d4b14423
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace mathtest {
+
+template <typename... InTypes> class InputGenerator {
+public:
+  virtual ~InputGenerator() noexcept = default;
+
+  [[nodiscard]] virtual size_t
+  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept = 0;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
new file mode 100644
index 0000000000000..f43a26974dda7
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -0,0 +1,235 @@
+#pragma once
+
+#include "mathtest/Support.hpp"
+#include "mathtest/TypeExtras.hpp"
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <math.h>
+#include <type_traits>
+
+namespace mathtest {
+
+//===----------------------------------------------------------------------===//
+// Type Traits
+//===----------------------------------------------------------------------===//
+
+template <typename T> struct StorageTypeOf {
+private:
+  static constexpr auto getStorageType() noexcept {
+    if constexpr (std::is_unsigned_v<T>) {
+      return TypeIdentityOf<T>{};
+    } else if constexpr (std::is_signed_v<T>) {
+      return TypeIdentityOf<std::make_unsigned_t<T>>{};
+    } else {
+      static_assert(!std::is_same_v<T, T>, "Unsupported type");
+    }
+  }
+
+public:
+  using type = typename decltype(getStorageType())::type;
+};
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct StorageTypeOf<float16> {
+  using type = uint16_t;
+};
+#endif
+
+template <> struct StorageTypeOf<float> {
+  using type = uint32_t;
+};
+
+template <> struct StorageTypeOf<double> {
+  using type = uint64_t;
+};
+
+template <typename T> using StorageTypeOf_t = typename StorageTypeOf<T>::type;
+
+template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct IsFloatingPoint<float16> : std::true_type {};
+#endif
+
+template <typename T>
+inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
+    = IsFloatingPoint<T>::value;
+
+//===----------------------------------------------------------------------===//
+// Bitmask Utilities
+//===----------------------------------------------------------------------===//
+
+template <typename UIntType, std::size_t Count>
+[[nodiscard]] constexpr UIntType maskLeadingOnes() noexcept {
+  static_assert(std::is_unsigned_v<UIntType>,
+                "UIntType must be an unsigned integer type");
+
+  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
+  static_assert(
+      Count <= TotalBits,
+      "Count must be less than or equal to the bit width of UIntType");
+
+  return Count == 0 ? UIntType(0) : (~UIntType(0) << (TotalBits - Count));
+  ;
+}
+
+template <typename UIntType, std::size_t Count>
+[[nodiscard]] constexpr UIntType maskTrailingOnes() noexcept {
+  static_assert(std::is_unsigned_v<UIntType>,
+                "UIntType must be an unsigned integer type");
+
+  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
+  static_assert(
+      Count <= TotalBits,
+      "Count must be less than or equal to the bit width of UIntType");
+
+  return Count == 0 ? UIntType(0) : (~UIntType(0) >> (TotalBits - Count));
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Utilities
+//===----------------------------------------------------------------------===//
+
+template <typename FloatType> struct FPLayout;
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct FPLayout<float16> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 5;
+  static constexpr std::size_t FractionLen = 10;
+};
+#endif
+
+template <> struct FPLayout<float> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 8;
+  static constexpr std::size_t FractionLen = 23;
+};
+
+template <> struct FPLayout<double> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 11;
+  static constexpr std::size_t FractionLen = 52;
+};
+
+template <typename FloatType> struct FPUtils : public FPLayout<FloatType> {
+  using FPLayout = FPLayout<FloatType>;
+  using StorageType = StorageTypeOf_t<FloatType>;
+  using FPLayout::ExponentLen;
+  using FPLayout::FractionLen;
+  using FPLayout::SignLen;
+
+  static constexpr StorageType SignMask =
+      maskTrailingOnes<StorageType, SignLen>() << (ExponentLen + FractionLen);
+
+  FPUtils() = delete;
+
+  [[nodiscard]] static constexpr FloatType
+  createFromBits(StorageType Bits) noexcept {
+    return __builtin_bit_cast(FloatType, Bits);
+  }
+
+  [[nodiscard]] static constexpr StorageType
+  getAsBits(FloatType Value) noexcept {
+    return __builtin_bit_cast(StorageType, Value);
+  }
+
+  [[nodiscard]] static constexpr bool isNaN(FloatType Value) noexcept {
+    return __builtin_isnan(Value);
+  }
+
+  [[nodiscard]] static constexpr bool getSignBit(FloatType Value) noexcept {
+    return getAsBits(Value) & SignMask;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Numeric Functions
+//===----------------------------------------------------------------------===//
+
+template <typename T> [[nodiscard]] constexpr T getMinOrNegInf() noexcept {
+  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
+
+  if constexpr (std::is_floating_point_v<T> &&
+                std::numeric_limits<T>::has_infinity) {
+    return -std::numeric_limits<T>::infinity();
+  }
+
+  return std::numeric_limits<T>::lowest();
+}
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> [[nodiscard]] constexpr float16 getMinOrNegInf<float16>() noexcept {
+  using StorageType = StorageTypeOf_t<float16>;
+
+  return __builtin_bit_cast(float16, static_cast<StorageType>(0xFC00U));
+}
+#endif
+
+template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
+  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
+
+  if constexpr (std::is_floating_point_v<T> &&
+                std::numeric_limits<T>::has_infinity) {
+    return std::numeric_limits<T>::infinity();
+  }
+
+  return std::numeric_limits<T>::max();
+}
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> [[nodiscard]] constexpr float16 getMaxOrInf<float16>() noexcept {
+  using StorageType = StorageTypeOf_t<float16>;
+
+  return __builtin_bit_cast(float16, static_cast<StorageType>(0x7C00U));
+}
+#endif
+
+template <typename FloatType>
+[[nodiscard]] uint64_t computeUlpDistance(FloatType X, FloatType Y) noexcept {
+  static_assert(IsFloatingPoint_v<FloatType>,
+                "FloatType must be a floating-point type");
+  using FPUtils = FPUtils<FloatType>;
+  using StorageType = typename FPUtils::StorageType;
+
+  if (X == Y) {
+    if (FPUtils::getSignBit(X) != FPUtils::getSignBit(Y)) [[unlikely]] {
+      // When X == Y, different sign bits imply that X and Y are +0.0 and -0.0
+      // (in any order). Since we want to treat them as unequal in the context
+      // of accuracy testing of mathematical functions, we return the smallest
+      // non-zero value
+      return 1;
+    }
+    return 0;
+  }
+
+  const bool XIsNaN = FPUtils::isNaN(X);
+  const bool YIsNaN = FPUtils::isNaN(Y);
+
+  if (XIsNaN && YIsNaN) {
+    return 0;
+  }
+  if (XIsNaN || YIsNaN) {
+    return std::numeric_limits<uint64_t>::max();
+  }
+
+  constexpr StorageType SignMask = FPUtils::SignMask;
+
+  // Linearise FloatType values into an ordered unsigned space:
+  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  //  * The difference |map(a) − map(b)| equals the number of std::nextafter
+  //    steps between a and b within the same type
+  auto MapToOrderedUnsigned = [](FloatType Value) {
+    const StorageType Bits = FPUtils::getAsBits(Value);
+    return (Bits & SignMask) ? SignMask - (Bits - SignMask) : SignMask + Bits;
+  };
+
+  const StorageType MappedX = MapToOrderedUnsigned(X);
+  const StorageType MappedY = MapToOrderedUnsigned(Y);
+  return static_cast<uint64_t>(MappedX > MappedY ? MappedX - MappedY
+                                                 : MappedY - MappedX);
+}
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
new file mode 100644
index 0000000000000..099b86af2929d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ol_error_struct_t;
+typedef const ol_error_struct_t *ol_result_t;
+#define OL_SUCCESS (static_cast<ol_result_t>(nullptr))
+
+struct ol_device_impl_t;
+typedef struct ol_device_impl_t *ol_device_handle_t;
+
+struct ol_program_impl_t;
+typedef struct ol_program_impl_t *ol_program_handle_t;
+
+struct ol_symbol_impl_t;
+typedef struct ol_symbol_impl_t *ol_symbol_handle_t;
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/offload/unittests/Conformance/include/mathtest/Support.hpp b/offload/unittests/Conformance/include/mathtest/Support.hpp
new file mode 100644
index 0000000000000..e2b41f9dec9ab
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Support.hpp
@@ -0,0 +1,138 @@
+#pragma once
+
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace mathtest {
+
+//===----------------------------------------------------------------------===//
+// Function & Type Traits
+//===----------------------------------------------------------------------===//
+
+namespace detail {
+
+template <typename T> struct FunctionTraitsImpl;
+
+template <typename RetType, typename... ArgTypes>
+struct FunctionTraitsImpl<RetType(ArgTypes...)> {
+  using ReturnType = RetType;
+  using ArgTypesTuple = std::tuple<ArgTypes...>;
+};
+
+template <typename RetType, typename... ArgTypes>
+struct FunctionTraitsImpl<RetType(ArgTypes...) noexcept>
+    : FunctionTraitsImpl<RetType(ArgTypes...)> {};
+
+template <typename FuncType>
+struct FunctionTraitsImpl<FuncType *> : FunctionTraitsImpl<FuncType> {};
+} // namespace detail
+
+template <auto Func>
+using FunctionTraits = detail::FunctionTraitsImpl<
+    std::remove_pointer_t<std::decay_t<decltype(Func)>>>;
+
+template <typename FuncType>
+using FunctionTypeTraits = detail::FunctionTraitsImpl<FuncType>;
+
+template <typename T> struct TypeIdentityOf {
+  using type = T;
+};
+
+template <typename TupleTypes, template <typename...> class Template>
+struct ApplyTupleTypes;
+
+template <template <typename...> class Template, typename... Ts>
+struct ApplyTupleTypes<std::tuple<Ts...>, Template> {
+  using type = Template<Ts...>;
+};
+
+template <typename TupleTypes, template <typename...> class Template>
+using ApplyTupleTypes_t = typename ApplyTupleTypes<TupleTypes, Template>::type;
+
+namespace detail {
+
+template <typename T> struct KernelSignatureOfImpl;
+
+template <typename RetType, typename... ArgTypes>
+struct KernelSignatureOfImpl<RetType(ArgTypes...)> {
+  using type = void(const std::decay_t<ArgTypes> *..., RetType *, std::size_t);
+};
+
+template <typename RetType, typename... ArgTypes>
+struct KernelSignatureOfImpl<RetType(ArgTypes...) noexcept>
+    : KernelSignatureOfImpl<RetType(ArgTypes...)> {};
+} // namespace detail
+
+template <auto Func>
+using KernelSignatureOf = detail::KernelSignatureOfImpl<
+    std::remove_pointer_t<std::decay_t<decltype(Func)>>>;
+
+template <auto Func>
+using KernelSignatureOf_t = typename KernelSignatureOf<Func>::type;
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Packing
+//===----------------------------------------------------------------------===//
+
+template <typename... ArgTypes> struct KernelArgsPack;
+
+template <typename ArgType> struct KernelArgsPack<ArgType> {
+  std::decay_t<ArgType> Arg;
+
+  constexpr KernelArgsPack(ArgType &&Arg) : Arg(std::forward<ArgType>(Arg)) {}
+};
+
+template <typename ArgType0, typename ArgType1, typename... ArgTypes>
+struct KernelArgsPack<ArgType0, ArgType1, ArgTypes...> {
+  std::decay_t<ArgType0> Arg0;
+  KernelArgsPack<ArgType1, ArgTypes...> Args;
+
+  constexpr KernelArgsPack(ArgType0 &&Arg0, ArgType1 &&Arg1, ArgTypes &&...Args)
+      : Arg0(std::forward<ArgType0>(Arg0)),
+        Args(std::forward<ArgType1>(Arg1), std::forward<ArgTypes>(Args)...) {}
+};
+
+template <typename... ArgTypes>
+KernelArgsPack<ArgTypes...> makeKernelArgsPack(ArgTypes &&...Args) {
+  return KernelArgsPack<ArgTypes...>(std::forward<ArgTypes>(Args)...);
+}
+
+//===----------------------------------------------------------------------===//
+// Configuration Helpers
+//===----------------------------------------------------------------------===//
+
+template <auto Func> struct FunctionConfig;
+
+namespace detail {
+
+template <typename... BufferTypes>
+static constexpr std::size_t getDefaultBufferSize() {
+  static_assert(sizeof...(BufferTypes) > 0,
+                "At least one buffer type must be provided");
+
+  constexpr std::size_t TotalMemoryInBytes = 512ULL << 20; // 512 MiB
+  constexpr std::size_t ElementTupleSize = (sizeof(BufferTypes) + ...);
+
+  static_assert(ElementTupleSize > 0,
+                "Cannot calculate buffer size for empty types");
+
+  return TotalMemoryInBytes / ElementTupleSize;
+}
+} // namespace detail
+
+template <typename BufferType, typename BufferTupleTypes>
+struct DefaultBufferSizeFor;
+
+template <typename BufferType, typename... BufferTypes>
+struct DefaultBufferSizeFor<BufferType, std::tuple<BufferTypes...>> {
+  static constexpr std::size_t value // NOLINT(readability-identifier-naming)
+      = detail::getDefaultBufferSize<BufferType, BufferTypes...>();
+};
+
+template <typename OutType, typename InTypesTuple>
+inline constexpr std::size_t
+    DefaultBufferSizeFor_v // NOLINT(readability-identifier-naming)
+    = DefaultBufferSizeFor<OutType, InTypesTuple>::value;
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TestResult.hpp b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
new file mode 100644
index 0000000000000..cdb4f2fa09fa1
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <typename OutType, typename... InTypes>
+class [[nodiscard]] TestResult {
+public:
+  struct [[nodiscard]] TestCase {
+    std::tuple<InTypes...> Inputs;
+    OutType Actual;
+    OutType Expected;
+
+    explicit constexpr TestCase(std::tuple<InTypes...> &&Inputs, OutType Actual,
+                                OutType Expected) noexcept
+        : Inputs(std::move(Inputs)), Actual(std::move(Actual)),
+          Expected(std::move(Expected)) {}
+  };
+
+  TestResult() = default;
+
+  explicit TestResult(uint64_t UlpDistance, bool IsFailure,
+                      TestCase &&Case) noexcept
+      : MaxUlpDistance(UlpDistance), FailureCount(IsFailure ? 1 : 0),
+        TestCaseCount(1) {
+    if (IsFailure) {
+      WorstFailingCase.emplace(std::move(Case));
+    }
+  }
+
+  void accumulate(const TestResult &Other) noexcept {
+    if (Other.MaxUlpDistance > MaxUlpDistance) {
+      MaxUlpDistance = Other.MaxUlpDistance;
+      WorstFailingCase = Other.WorstFailingCase;
+    }
+
+    FailureCount += Other.FailureCount;
+    TestCaseCount += Other.TestCaseCount;
+  }
+
+  [[nodiscard]] bool hasPassed() const noexcept { return FailureCount == 0; }
+
+  [[nodiscard]] uint64_t getMaxUlpDistance() const noexcept {
+    return MaxUlpDistance;
+  }
+
+  [[nodiscard]] uint64_t getFailureCount() const noexcept {
+    return FailureCount;
+  }
+
+  [[nodiscard]] uint64_t getTestCaseCount() const noexcept {
+    return TestCaseCount;
+  }
+
+  [[nodiscard]] const std::optional<TestCase> &
+  getWorstFailingCase() const noexcept {
+    return WorstFailingCase;
+  }
+
+private:
+  uint64_t MaxUlpDistance = 0;
+  uint64_t FailureCount = 0;
+  uint64_t TestCaseCount = 0;
+  std::optional<TestCase> WorstFailingCase;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
new file mode 100644
index 0000000000000..764642647e84b
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -0,0 +1,118 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <chrono>
+#include <tuple>
+
+namespace mathtest {
+namespace detail {
+
+template <typename T>
+void printValue(llvm::raw_ostream &OS, const T &Value) noexcept {
+  if constexpr (IsFloatingPoint_v<T>) {
+    using FPUtils = FPUtils<T>;
+
+    if constexpr (sizeof(T) < sizeof(float)) {
+      OS << float(Value);
+    } else {
+      OS << Value;
+    }
+
+    OS << llvm::formatv(" (0x{0})",
+                        llvm::Twine::utohexstr(FPUtils::getAsBits(Value)));
+  } else {
+    OS << Value;
+  }
+}
+
+template <typename... Ts>
+void printValues(llvm::raw_ostream &OS,
+                 const std::tuple<Ts...> &ValuesTuple) noexcept {
+  std::apply(
+      [&OS](const auto &...Values) {
+        bool IsFirst = true;
+        auto Print = [&](const auto &Value) {
+          if (!IsFirst) {
+            OS << ", ";
+          }
+          printValue(OS, Value);
+          IsFirst = false;
+        };
+        (Print(Values), ...);
+      },
+      ValuesTuple);
+}
+
+template <typename TestCaseType>
+void printWorstFailingCase(llvm::raw_ostream &OS,
+                           const TestCaseType &TestCase) noexcept {
+  OS << "--- Worst Failing Case ---\n";
+  OS << llvm::formatv("  {0,-14} : ", "Input(s)");
+  printValues(OS, TestCase.Inputs);
+  OS << "\n";
+
+  OS << llvm::formatv("  {0,-14} : ", "Actual");
+  printValue(OS, TestCase.Actual);
+  OS << "\n";
+
+  OS << llvm::formatv("  {0,-14} : ", "Expected");
+  printValue(OS, TestCase.Expected);
+  OS << "\n";
+}
+
+template <typename TestType, typename ResultType>
+void printReport(const TestType &Test, const ResultType &Result,
+                 const std::chrono::steady_clock::duration &Duration) noexcept {
+  using FunctionConfig = typename TestType::FunctionConfig;
+
+  const bool Passed = Result.hasPassed();
+  const auto ElapsedMilliseconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
+
+  llvm::errs() << llvm::formatv("=== Test Report for '{0}' === \n",
+                                FunctionConfig::Name);
+  llvm::errs() << llvm::formatv("{0,-17}: {1} ({2})\n", "Device",
+                                Test.getContext().getName(),
+                                Test.getContext().getPlatform());
+  llvm::errs() << llvm::formatv("{0,-17}: {1} ms\n", "Elapsed time",
+                                ElapsedMilliseconds);
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "ULP tolerance",
+                                FunctionConfig::UlpTolerance);
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Max ULP distance",
+                                Result.getMaxUlpDistance());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Test cases",
+                                Result.getTestCaseCount());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Failures",
+                                Result.getFailureCount());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Status",
+                                Passed ? "PASSED" : "FAILED");
+
+  if (auto Worst = Result.getWorstFailingCase()) {
+    printWorstFailingCase(llvm::errs(), Worst.value());
+  }
+
+  llvm::errs().flush();
+}
+} // namespace detail
+
+template <typename TestType>
+[[nodiscard]] bool
+runTest(const TestType &Test,
+        typename TestType::GeneratorType &Generator) noexcept {
+  const auto StartTime = std::chrono::steady_clock::now();
+
+  auto Result = Test.run(Generator);
+
+  const auto EndTime = std::chrono::steady_clock::now();
+  const auto Duration = EndTime - StartTime;
+
+  detail::printReport(Test, Result, Duration);
+
+  return Result.hasPassed();
+}
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
new file mode 100644
index 0000000000000..3242349ce6b4d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+namespace mathtest {
+
+#ifdef __FLT16_MAX__
+#define MATHTEST_HAS_FLOAT16
+typedef _Float16 float16;
+#endif
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/lib/CMakeLists.txt b/offload/unittests/Conformance/lib/CMakeLists.txt
new file mode 100644
index 0000000000000..a0402a54fbadf
--- /dev/null
+++ b/offload/unittests/Conformance/lib/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(MathTest STATIC DeviceContext.cpp DeviceResources.cpp ErrorHandling.cpp)
+
+target_include_directories(MathTest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
+target_compile_options(MathTest PUBLIC -fno-rtti)
+target_link_libraries(MathTest PUBLIC LLVMOffload LLVMSupport LLVMDemangle)
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
new file mode 100644
index 0000000000000..a0ca8bab3ddf9
--- /dev/null
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -0,0 +1,201 @@
+#include "mathtest/DeviceContext.hpp"
+
+#include "mathtest/ErrorHandling.hpp"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+#include <OffloadAPI.h>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+
+using namespace mathtest;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// The static 'Wrapper' instance ensures olInit() is called once at program
+// startup and olShutDown() is called once at program termination
+struct OffloadInitWrapper {
+  OffloadInitWrapper() { OL_CHECK(olInit()); }
+  ~OffloadInitWrapper() { OL_CHECK(olShutDown()); }
+};
+static OffloadInitWrapper Wrapper{};
+
+[[nodiscard]] ol_platform_backend_t
+getBackend(ol_device_handle_t DeviceHandle) noexcept {
+  ol_platform_handle_t Platform;
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
+                           sizeof(Platform), &Platform));
+  ol_platform_backend_t Backend = OL_PLATFORM_BACKEND_UNKNOWN;
+  OL_CHECK(olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND,
+                             sizeof(Backend), &Backend));
+  return Backend;
+}
+
+const std::vector<ol_device_handle_t> &getDevices() {
+  // Thread-safe initialization of a static local variable
+  static std::vector<ol_device_handle_t> Devices =
+      []() -> std::vector<ol_device_handle_t> {
+    std::vector<ol_device_handle_t> TmpDevices;
+
+    // Discovers all devices that are not the host
+    const auto *const ResultFromIterate = olIterateDevices(
+        [](ol_device_handle_t DeviceHandle, void *Data) {
+          if (getBackend(DeviceHandle) != OL_PLATFORM_BACKEND_HOST) {
+            static_cast<std::vector<ol_device_handle_t> *>(Data)->push_back(
+                DeviceHandle);
+          }
+          return true;
+        },
+        &TmpDevices);
+
+    OL_CHECK(ResultFromIterate);
+
+    return TmpDevices;
+  }();
+
+  return Devices;
+}
+} // namespace
+
+std::size_t mathtest::countDevices() { return getDevices().size(); }
+
+void detail::allocManagedMemory(ol_device_handle_t DeviceHandle,
+                                std::size_t Size,
+                                void **AllocationOut) noexcept {
+  OL_CHECK(
+      olMemAlloc(DeviceHandle, OL_ALLOC_TYPE_MANAGED, Size, AllocationOut));
+}
+
+//===----------------------------------------------------------------------===//
+// DeviceContext
+//===----------------------------------------------------------------------===//
+
+DeviceContext::DeviceContext(std::size_t DeviceId)
+    : DeviceId(DeviceId), DeviceHandle(nullptr) {
+  const auto &Devices = getDevices();
+
+  if (DeviceId >= Devices.size()) {
+    FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) + ", but only " +
+                llvm::Twine(Devices.size()) + " devices are available");
+  }
+
+  DeviceHandle = Devices[DeviceId];
+}
+
+[[nodiscard]] std::shared_ptr<DeviceImage>
+DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
+                          llvm::StringRef Extension) const {
+  llvm::SmallString<128> FullPath(Directory);
+  llvm::sys::path::append(FullPath, llvm::Twine(BinaryName) + Extension);
+
+  // For simplicity, this implementation intentionally reads the binary from
+  // disk on every call.
+  //
+  // Other use cases could benefit from a global, thread-safe cache to avoid
+  // redundant file I/O and GPU program creation.
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
+      llvm::MemoryBuffer::getFile(FullPath);
+  if (std::error_code ErrorCode = FileOrErr.getError()) {
+    FATAL_ERROR(llvm::Twine("Failed to read device binary file '") + FullPath +
+                "': " + ErrorCode.message());
+  }
+  std::unique_ptr<llvm::MemoryBuffer> &BinaryData = *FileOrErr;
+
+  ol_program_handle_t ProgramHandle = nullptr;
+  OL_CHECK(olCreateProgram(DeviceHandle, BinaryData->getBufferStart(),
+                           BinaryData->getBufferSize(), &ProgramHandle));
+
+  return std::shared_ptr<DeviceImage>(
+      new DeviceImage(DeviceHandle, ProgramHandle));
+}
+
+[[nodiscard]] std::shared_ptr<DeviceImage>
+DeviceContext::loadBinary(llvm::StringRef Directory,
+                          llvm::StringRef BinaryName) const {
+  llvm::StringRef Extension;
+
+  switch (getBackend(DeviceHandle)) {
+  case OL_PLATFORM_BACKEND_AMDGPU:
+    Extension = ".amdgpu.bin";
+    break;
+  case OL_PLATFORM_BACKEND_CUDA:
+    Extension = ".nvptx64.bin";
+    break;
+  default:
+    llvm_unreachable("Unsupported backend to infer binary extension");
+  }
+
+  return loadBinary(Directory, BinaryName, Extension);
+}
+
+void DeviceContext::getKernelImpl(
+    ol_program_handle_t ProgramHandle, llvm::StringRef KernelName,
+    ol_symbol_handle_t *KernelHandle) const noexcept {
+  llvm::SmallString<32> KernelNameBuffer(KernelName);
+  OL_CHECK(olGetSymbol(ProgramHandle, KernelNameBuffer.c_str(),
+                       OL_SYMBOL_KIND_KERNEL, KernelHandle));
+}
+
+void DeviceContext::launchKernelImpl(
+    ol_symbol_handle_t KernelHandle, const Dim &NumGroups, const Dim &GroupSize,
+    const void *KernelArgs, std::size_t KernelArgsSize) const noexcept {
+  ol_kernel_launch_size_args_t LaunchArgs;
+  LaunchArgs.Dimensions = 3; // It seems this field is not used anywhere.
+                             // Defaulting to the safest value
+  LaunchArgs.NumGroups = {NumGroups[0], NumGroups[1], NumGroups[2]};
+  LaunchArgs.GroupSize = {GroupSize[0], GroupSize[1], GroupSize[2]};
+  LaunchArgs.DynSharedMemory = 0;
+
+  OL_CHECK(olLaunchKernel(nullptr, DeviceHandle, KernelHandle, KernelArgs,
+                          KernelArgsSize, &LaunchArgs, nullptr));
+}
+
+[[nodiscard]] std::string DeviceContext::getName() const {
+  std::size_t PropSize = 0;
+  OL_CHECK(olGetDeviceInfoSize(DeviceHandle, OL_DEVICE_INFO_NAME, &PropSize));
+
+  if (PropSize == 0) {
+    return "";
+  }
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_NAME, PropSize,
+                           PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return PropValue;
+}
+
+[[nodiscard]] std::string DeviceContext::getPlatform() const {
+  ol_platform_handle_t PlatformHandle = nullptr;
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
+                           sizeof(ol_platform_handle_t), &PlatformHandle));
+
+  std::size_t PropSize = 0;
+  OL_CHECK(
+      olGetPlatformInfoSize(PlatformHandle, OL_PLATFORM_INFO_NAME, &PropSize));
+
+  if (PropSize == 0) {
+    return "";
+  }
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_NAME, PropSize,
+                             PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return PropValue;
+}
diff --git a/offload/unittests/Conformance/lib/DeviceResources.cpp b/offload/unittests/Conformance/lib/DeviceResources.cpp
new file mode 100644
index 0000000000000..5d1c94dca7677
--- /dev/null
+++ b/offload/unittests/Conformance/lib/DeviceResources.cpp
@@ -0,0 +1,54 @@
+#include "mathtest/DeviceResources.hpp"
+
+#include "mathtest/ErrorHandling.hpp"
+
+#include <OffloadAPI.h>
+
+using namespace mathtest;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+void detail::freeDeviceMemory(void *Address) noexcept {
+  if (Address) {
+    OL_CHECK(olMemFree(Address));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DeviceImage
+//===----------------------------------------------------------------------===//
+
+DeviceImage::~DeviceImage() noexcept {
+  if (Handle) {
+    OL_CHECK(olDestroyProgram(Handle));
+  }
+}
+
+DeviceImage &DeviceImage::operator=(DeviceImage &&Other) noexcept {
+  if (this == &Other)
+    return *this;
+
+  if (Handle) {
+    OL_CHECK(olDestroyProgram(Handle));
+  }
+
+  DeviceHandle = Other.DeviceHandle;
+  Handle = Other.Handle;
+
+  Other.DeviceHandle = nullptr;
+  Other.Handle = nullptr;
+
+  return *this;
+}
+
+DeviceImage::DeviceImage(DeviceImage &&Other) noexcept
+    : DeviceHandle(Other.DeviceHandle), Handle(Other.Handle) {
+  Other.DeviceHandle = nullptr;
+  Other.Handle = nullptr;
+}
+
+DeviceImage::DeviceImage(ol_device_handle_t DeviceHandle,
+                         ol_program_handle_t Handle) noexcept
+    : DeviceHandle(DeviceHandle), Handle(Handle) {}
diff --git a/offload/unittests/Conformance/lib/ErrorHandling.cpp b/offload/unittests/Conformance/lib/ErrorHandling.cpp
new file mode 100644
index 0000000000000..0f85260bc7926
--- /dev/null
+++ b/offload/unittests/Conformance/lib/ErrorHandling.cpp
@@ -0,0 +1,37 @@
+#include "mathtest/ErrorHandling.hpp"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <OffloadAPI.h>
+
+using namespace mathtest;
+
+[[noreturn]] void detail::reportFatalError(const llvm::Twine &Message,
+                                           const char *File, int Line,
+                                           const char *FuncName) {
+  // clang-format off
+  llvm::report_fatal_error(
+      llvm::Twine("Fatal error in '") + FuncName +
+          "' at " + File + ":" + llvm::Twine(Line) +
+          "\n  Message: " + Message,
+      /*gen_crash_diag=*/false);
+  // clang-format on
+}
+
+[[noreturn]] void detail::reportOffloadError(const char *ResultExpr,
+                                             ol_result_t Result,
+                                             const char *File, int Line,
+                                             const char *FuncName) {
+  // clang-format off
+  llvm::report_fatal_error(
+      llvm::Twine("OL_CHECK failed") +
+          "\n  Location: " + File + ":" + llvm::Twine(Line) +
+          "\n  Function: " + FuncName +
+          "\n  Expression: " + ResultExpr +
+          "\n  Error code: " + llvm::Twine(Result->Code) +
+          "\n  Details: " +
+          (Result->Details ? Result->Details : "No details provided"),
+      /*gen_crash_diag=*/false);
+  // clang-format on
+}
diff --git a/offload/unittests/Conformance/sin.cpp b/offload/unittests/Conformance/sin.cpp
deleted file mode 100644
index 9e15690a9e9d7..0000000000000
--- a/offload/unittests/Conformance/sin.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <OffloadAPI.h>
-#include <math.h>
-
-llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
-
-int main() { llvm::errs() << sin(0.0) << "\n"; }
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
new file mode 100644
index 0000000000000..b5da56f46fc05
--- /dev/null
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_conformance_test(Hypotf16Test Hypotf16Test.cpp)
+add_conformance_test(LogfTest LogfTest.cpp)
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
new file mode 100644
index 0000000000000..8c6b5054e2d4c
--- /dev/null
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -0,0 +1,51 @@
+#include "mathtest/TypeExtras.hpp"
+
+#ifdef MATHTEST_HAS_FLOAT16
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/GpuMathTest.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+#include <memory>
+
+using namespace mathtest;
+
+extern "C" {
+
+float16 hypotf16(float16, float16);
+}
+
+namespace mathtest {
+
+template <> struct FunctionConfig<hypotf16> {
+  static constexpr llvm::StringRef Name = "hypotf16";
+  static constexpr llvm::StringRef KernelName = "hypotf16Kernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 69 (Full Profile), Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 2;
+};
+} // namespace mathtest
+
+int main() {
+  // TODO: Add command-line arguments parsing for test configuration.
+  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
+  const llvm::StringRef Provider = "llvm-libm";
+  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+
+  GpuMathTest<hypotf16> Hypotf16Test(Context, Provider, DeviceBinsDirectory);
+
+  IndexedRange<float16> RangeX;
+  IndexedRange<float16> RangeY;
+  ExhaustiveGenerator<float16, float16> Generator(RangeX, RangeY);
+
+  const auto Passed = runTest(Hypotf16Test, Generator);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+#endif // MATHTEST_HAS_FLOAT16
diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
new file mode 100644
index 0000000000000..1af5e844ccdb1
--- /dev/null
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -0,0 +1,44 @@
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/GpuMathTest.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <limits>
+#include <math.h>
+#include <memory>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<logf> {
+  static constexpr llvm::StringRef Name = "logf";
+  static constexpr llvm::StringRef KernelName = "logfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 3;
+};
+} // namespace mathtest
+
+int main() {
+  using namespace mathtest;
+
+  // TODO: Add command-line arguments parsing for test configuration.
+  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
+  const llvm::StringRef Provider = "llvm-libm";
+  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+
+  GpuMathTest<logf> LogfTest(Context, Provider, DeviceBinsDirectory);
+
+  IndexedRange<float> Range(/*Begin=*/0.0f,
+                            /*End=*/std::numeric_limits<float>::infinity(),
+                            /*Inclusive=*/true);
+  ExhaustiveGenerator<float> Generator(Range);
+
+  const auto Passed = runTest(LogfTest, Generator);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
\ No newline at end of file

>From d781aecb5026491f20f8f4d580c04b6ff403ba22 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 17 Jul 2025 01:08:40 -0300
Subject: [PATCH 02/21] Add trailing newline

---
 offload/unittests/Conformance/tests/LogfTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
index 1af5e844ccdb1..1c866720843ff 100644
--- a/offload/unittests/Conformance/tests/LogfTest.cpp
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -41,4 +41,4 @@ int main() {
   const auto Passed = runTest(LogfTest, Generator);
 
   return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
-}
\ No newline at end of file
+}

>From fa16a4d158d77ab20c7af34c097222790d77b1fd Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 17 Jul 2025 01:15:44 -0300
Subject: [PATCH 03/21] Pass correct GPU architecture to device compiler

---
 offload/unittests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index bea49387528b0..ad64f800da014 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -39,7 +39,7 @@ function(add_offload_test_device_code test_filename test_name)
       add_custom_command(
         OUTPUT ${output_file}
         COMMAND ${CMAKE_C_COMPILER}
-        --target=nvptx64-nvidia-cuda -march=native
+        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
         -stdlib -nogpulib --cuda-path=${CUDA_ROOT} -flto -fno-builtin ${ARGN}
         ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}

>From 386b179aa80e26bf64be6fa23d8e9abca01fe436 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 17 Jul 2025 01:35:15 -0300
Subject: [PATCH 04/21] Annotate `#endif` directives for clarity

---
 offload/unittests/Conformance/device_code/LLVMLibm.c   |  4 ++--
 .../Conformance/include/mathtest/Numerics.hpp          | 10 +++++-----
 .../Conformance/include/mathtest/OffloadForward.hpp    |  4 ++--
 .../Conformance/include/mathtest/TypeExtras.hpp        |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
index 5f436a235eb08..ad08344b1c538 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.c
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -6,7 +6,7 @@
 #ifdef __FLT16_MAX__
 #define HAS_FLOAT16
 typedef _Float16 float16;
-#endif
+#endif // __FLT16_MAX__
 
 #ifdef HAS_FLOAT16
 __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
@@ -18,7 +18,7 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
     Out[Index] = hypotf16(X[Index], Y[Index]);
   }
 }
-#endif
+#endif // HAS_FLOAT16
 
 __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
   uint32_t Index =
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index f43a26974dda7..3957b2432b12e 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -36,7 +36,7 @@ template <typename T> struct StorageTypeOf {
 template <> struct StorageTypeOf<float16> {
   using type = uint16_t;
 };
-#endif
+#endif // MATHTEST_HAS_FLOAT16
 
 template <> struct StorageTypeOf<float> {
   using type = uint32_t;
@@ -52,7 +52,7 @@ template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
 
 #ifdef MATHTEST_HAS_FLOAT16
 template <> struct IsFloatingPoint<float16> : std::true_type {};
-#endif
+#endif // MATHTEST_HAS_FLOAT16
 
 template <typename T>
 inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
@@ -101,7 +101,7 @@ template <> struct FPLayout<float16> {
   static constexpr std::size_t ExponentLen = 5;
   static constexpr std::size_t FractionLen = 10;
 };
-#endif
+#endif // MATHTEST_HAS_FLOAT16
 
 template <> struct FPLayout<float> {
   static constexpr std::size_t SignLen = 1;
@@ -167,7 +167,7 @@ template <> [[nodiscard]] constexpr float16 getMinOrNegInf<float16>() noexcept {
 
   return __builtin_bit_cast(float16, static_cast<StorageType>(0xFC00U));
 }
-#endif
+#endif // MATHTEST_HAS_FLOAT16
 
 template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
   static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
@@ -186,7 +186,7 @@ template <> [[nodiscard]] constexpr float16 getMaxOrInf<float16>() noexcept {
 
   return __builtin_bit_cast(float16, static_cast<StorageType>(0x7C00U));
 }
-#endif
+#endif // MATHTEST_HAS_FLOAT16
 
 template <typename FloatType>
 [[nodiscard]] uint64_t computeUlpDistance(FloatType X, FloatType Y) noexcept {
diff --git a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
index 099b86af2929d..9609bd19f06b2 100644
--- a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
+++ b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
@@ -2,7 +2,7 @@
 
 #ifdef __cplusplus
 extern "C" {
-#endif
+#endif // __cplusplus
 
 struct ol_error_struct_t;
 typedef const ol_error_struct_t *ol_result_t;
@@ -19,4 +19,4 @@ typedef struct ol_symbol_impl_t *ol_symbol_handle_t;
 
 #ifdef __cplusplus
 }
-#endif
+#endif // __cplusplus
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
index 3242349ce6b4d..bfd58be5050ea 100644
--- a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -5,5 +5,5 @@ namespace mathtest {
 #ifdef __FLT16_MAX__
 #define MATHTEST_HAS_FLOAT16
 typedef _Float16 float16;
-#endif
+#endif // __FLT16_MAX__
 } // namespace mathtest

>From 6d5d2143e2a54977333b12ca90135558dda9cc44 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Fri, 18 Jul 2025 14:59:58 -0300
Subject: [PATCH 05/21] Rename aliases for consistency

---
 .../Conformance/include/mathtest/HostRefChecker.hpp    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
index c45137d652df2..a0083b1d1cedf 100644
--- a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
+++ b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
@@ -15,16 +15,16 @@
 namespace mathtest {
 
 template <auto Func> class HostRefChecker {
-  using FuncTraits = FunctionTraits<Func>;
-  using InTypesTuple = typename FuncTraits::ArgTypesTuple;
+  using FunctionTraits = FunctionTraits<Func>;
+  using InTypesTuple = typename FunctionTraits::ArgTypesTuple;
 
-  using FuncConfig = FunctionConfig<Func>;
+  using FunctionConfig = FunctionConfig<Func>;
 
   template <typename... Ts>
   using BuffersTupleType = std::tuple<llvm::ArrayRef<Ts>...>;
 
 public:
-  using OutType = typename FuncTraits::ReturnType;
+  using OutType = typename FunctionTraits::ReturnType;
 
 private:
   template <typename... Ts>
@@ -62,7 +62,7 @@ template <auto Func> class HostRefChecker {
       const OutType Expected = std::apply(Func, CurrentInputsTuple);
 
       const auto UlpDistance = computeUlpDistance(Actual, Expected);
-      const bool IsFailure = UlpDistance > FuncConfig::UlpTolerance;
+      const bool IsFailure = UlpDistance > FunctionConfig::UlpTolerance;
 
       return ResultType(UlpDistance, IsFailure,
                         typename ResultType::TestCase(

>From 0a1282ae14abbf72725e1457af99a619d2f9a331 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Fri, 18 Jul 2025 16:51:49 -0300
Subject: [PATCH 06/21] Replace `#pragma once` with standard include guards

---
 .../Conformance/include/mathtest/DeviceContext.hpp         | 7 +++++--
 .../Conformance/include/mathtest/DeviceResources.hpp       | 5 ++++-
 offload/unittests/Conformance/include/mathtest/Dim.hpp     | 5 ++++-
 .../Conformance/include/mathtest/ErrorHandling.hpp         | 5 ++++-
 .../Conformance/include/mathtest/ExhaustiveGenerator.hpp   | 5 ++++-
 .../unittests/Conformance/include/mathtest/GpuMathTest.hpp | 5 ++++-
 .../Conformance/include/mathtest/HostRefChecker.hpp        | 5 ++++-
 .../Conformance/include/mathtest/IndexedRange.hpp          | 5 ++++-
 .../Conformance/include/mathtest/InputGenerator.hpp        | 5 ++++-
 .../unittests/Conformance/include/mathtest/Numerics.hpp    | 5 ++++-
 .../Conformance/include/mathtest/OffloadForward.hpp        | 5 ++++-
 offload/unittests/Conformance/include/mathtest/Support.hpp | 5 ++++-
 .../unittests/Conformance/include/mathtest/TestResult.hpp  | 5 ++++-
 .../unittests/Conformance/include/mathtest/TestRunner.hpp  | 5 ++++-
 .../unittests/Conformance/include/mathtest/TypeExtras.hpp  | 5 ++++-
 15 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index 74ef83ce0c195..2e891cfcc3193 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_DEVICECONTEXT_HPP
+#define MATHTEST_DEVICECONTEXT_HPP
 
 #include "mathtest/DeviceResources.hpp"
 #include "mathtest/Dim.hpp"
@@ -33,7 +34,7 @@ class DeviceContext {
   // and enqueued memcpy, as well as device and host memory allocation.
 
 public:
-  // TODO: Add a constructor that also takes a 'Provider'.
+  // TODO: Add a constructor that also takes a 'Platform'.
   explicit DeviceContext(std::size_t DeviceId = 0);
 
   template <typename T>
@@ -119,3 +120,5 @@ class DeviceContext {
   ol_device_handle_t DeviceHandle;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_DEVICECONTEXT_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
index 51f7662ef548e..2d7130bd56f62 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_DEVICERESOURCES_HPP
+#define MATHTEST_DEVICERESOURCES_HPP
 
 #include "mathtest/OffloadForward.hpp"
 
@@ -127,3 +128,5 @@ template <typename KernelSignature> class [[nodiscard]] DeviceKernel {
   ol_symbol_handle_t Handle = nullptr;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_DEVICERESOURCES_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
index 948c10e94cbb3..1927d45255632 100644
--- a/offload/unittests/Conformance/include/mathtest/Dim.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Dim.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_DIM_HPP
+#define MATHTEST_DIM_HPP
 
 #include <cassert>
 #include <cstddef>
@@ -40,3 +41,5 @@ class Dim {
   uint32_t Data[3];
 };
 } // namespace mathtest
+
+#endif // MATHTEST_DIM_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
index 932aa79e4c902..813cdd21f4157 100644
--- a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_ERRORHANDLING_HPP
+#define MATHTEST_ERRORHANDLING_HPP
 
 #include "mathtest/OffloadForward.hpp"
 
@@ -27,3 +28,5 @@ namespace detail {
                                      const char *FuncName);
 } // namespace detail
 } // namespace mathtest
+
+#endif // MATHTEST_ERRORHANDLING_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index 1725a5b35f358..626565d085347 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_EXHAUSTIVEGENERATOR_HPP
+#define MATHTEST_EXHAUSTIVEGENERATOR_HPP
 
 #include "mathtest/IndexedRange.hpp"
 #include "mathtest/InputGenerator.hpp"
@@ -137,3 +138,5 @@ class [[nodiscard]] ExhaustiveGenerator final
   std::atomic<uint64_t> FlatIndexGenerator = 0;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_EXHAUSTIVEGENERATOR_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index e5d1c6c77f634..62b23d37bcc24 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_GPUMATHTEST_HPP
+#define MATHTEST_GPUMATHTEST_HPP
 
 #include "mathtest/DeviceContext.hpp"
 #include "mathtest/DeviceResources.hpp"
@@ -157,3 +158,5 @@ class [[nodiscard]] GpuMathTest final {
   DeviceKernel<KernelSignature> Kernel;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_GPUMATHTEST_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
index a0083b1d1cedf..f94f57ed5df16 100644
--- a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
+++ b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_HOSTREFCHECKER_HPP
+#define MATHTEST_HOSTREFCHECKER_HPP
 
 #include "mathtest/Numerics.hpp"
 #include "mathtest/Support.hpp"
@@ -80,3 +81,5 @@ template <auto Func> class HostRefChecker {
   }
 };
 } // namespace mathtest
+
+#endif // MATHTEST_HOSTREFCHECKER_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
index 0f33978c8d30d..7c856ea51aa8f 100644
--- a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
+++ b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_INDEXEDRANGE_HPP
+#define MATHTEST_INDEXEDRANGE_HPP
 
 #include "mathtest/Numerics.hpp"
 
@@ -89,3 +90,5 @@ template <typename T> class [[nodiscard]] IndexedRange {
   StorageType MappedEnd;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_INDEXEDRANGE_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
index d9365d4b14423..0362441399b5e 100644
--- a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_INPUTGENERATOR_HPP
+#define MATHTEST_INPUTGENERATOR_HPP
 
 #include "llvm/ADT/ArrayRef.h"
 
@@ -12,3 +13,5 @@ template <typename... InTypes> class InputGenerator {
   fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept = 0;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_INPUTGENERATOR_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index 3957b2432b12e..83f358fe08ad8 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_NUMERICS_HPP
+#define MATHTEST_NUMERICS_HPP
 
 #include "mathtest/Support.hpp"
 #include "mathtest/TypeExtras.hpp"
@@ -233,3 +234,5 @@ template <typename FloatType>
                                                  : MappedY - MappedX);
 }
 } // namespace mathtest
+
+#endif // MATHTEST_NUMERICS_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
index 9609bd19f06b2..831e5e3b44070 100644
--- a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
+++ b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_OFFLOADFORWARD_HPP
+#define MATHTEST_OFFLOADFORWARD_HPP
 
 #ifdef __cplusplus
 extern "C" {
@@ -20,3 +21,5 @@ typedef struct ol_symbol_impl_t *ol_symbol_handle_t;
 #ifdef __cplusplus
 }
 #endif // __cplusplus
+
+#endif // MATHTEST_OFFLOADFORWARD_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/Support.hpp b/offload/unittests/Conformance/include/mathtest/Support.hpp
index e2b41f9dec9ab..9249af822c1b1 100644
--- a/offload/unittests/Conformance/include/mathtest/Support.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Support.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_SUPPORT_HPP
+#define MATHTEST_SUPPORT_HPP
 
 #include <cstddef>
 #include <tuple>
@@ -136,3 +137,5 @@ inline constexpr std::size_t
     DefaultBufferSizeFor_v // NOLINT(readability-identifier-naming)
     = DefaultBufferSizeFor<OutType, InTypesTuple>::value;
 } // namespace mathtest
+
+#endif // MATHTEST_SUPPORT_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/TestResult.hpp b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
index cdb4f2fa09fa1..ffaa7b7862b0e 100644
--- a/offload/unittests/Conformance/include/mathtest/TestResult.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_TESTRESULT_HPP
+#define MATHTEST_TESTRESULT_HPP
 
 #include <cstdint>
 #include <optional>
@@ -68,3 +69,5 @@ class [[nodiscard]] TestResult {
   std::optional<TestCase> WorstFailingCase;
 };
 } // namespace mathtest
+
+#endif // MATHTEST_TESTRESULT_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 764642647e84b..4a80bc3e181ac 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_TESTRUNNER_HPP
+#define MATHTEST_TESTRUNNER_HPP
 
 #include "mathtest/Numerics.hpp"
 
@@ -116,3 +117,5 @@ runTest(const TestType &Test,
   return Result.hasPassed();
 }
 } // namespace mathtest
+
+#endif // MATHTEST_TESTRUNNER_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
index bfd58be5050ea..18642ec9172d8 100644
--- a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef MATHTEST_TYPEEXTRAS_HPP
+#define MATHTEST_TYPEEXTRAS_HPP
 
 namespace mathtest {
 
@@ -7,3 +8,5 @@ namespace mathtest {
 typedef _Float16 float16;
 #endif // __FLT16_MAX__
 } // namespace mathtest
+
+#endif // MATHTEST_TYPEEXTRAS_HPP

>From 6861827713942f503f437b3208e74ca3fc8ecd12 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Fri, 18 Jul 2025 21:50:55 -0300
Subject: [PATCH 07/21] Omit braces for simple single-statement blocks

---
 .../Conformance/device_code/LLVMLibm.c        |  6 ++----
 .../include/mathtest/DeviceContext.hpp        |  6 ++----
 .../include/mathtest/DeviceResources.hpp      |  6 ++----
 .../Conformance/include/mathtest/Dim.hpp      |  3 +--
 .../include/mathtest/ErrorHandling.hpp        |  4 ++--
 .../include/mathtest/ExhaustiveGenerator.hpp  | 15 +++++--------
 .../include/mathtest/GpuMathTest.hpp          |  3 +--
 .../Conformance/include/mathtest/Numerics.hpp | 21 +++++++------------
 .../include/mathtest/TestResult.hpp           |  3 +--
 .../include/mathtest/TestRunner.hpp           | 11 ++++------
 .../Conformance/lib/DeviceContext.cpp         | 17 +++++++--------
 .../Conformance/lib/DeviceResources.cpp       |  9 +++-----
 12 files changed, 38 insertions(+), 66 deletions(-)

diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
index ad08344b1c538..83db152144cbe 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.c
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -14,9 +14,8 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
   uint32_t Index =
       __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
 
-  if (Index < NumElements) {
+  if (Index < NumElements)
     Out[Index] = hypotf16(X[Index], Y[Index]);
-  }
 }
 #endif // HAS_FLOAT16
 
@@ -24,7 +23,6 @@ __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
   uint32_t Index =
       __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
 
-  if (Index < NumElements) {
+  if (Index < NumElements)
     Out[Index] = logf(X[Index]);
-  }
 }
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index 2e891cfcc3193..ca0bfe20296c0 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -60,10 +60,9 @@ class DeviceContext {
             llvm::StringRef KernelName) const noexcept {
     assert(Image && "Image provided to getKernel is null");
 
-    if (Image->DeviceHandle != this->DeviceHandle) {
+    if (Image->DeviceHandle != this->DeviceHandle)
       FATAL_ERROR("Image provided to getKernel was created for a different "
                   "device");
-    }
 
     ol_symbol_handle_t KernelHandle = nullptr;
     getKernelImpl(Image->Handle, KernelName, &KernelHandle);
@@ -82,10 +81,9 @@ class DeviceContext {
                   "Argument types provided to launchKernel do not match the "
                   "kernel's signature");
 
-    if (Kernel.Image->DeviceHandle != DeviceHandle) {
+    if (Kernel.Image->DeviceHandle != DeviceHandle)
       FATAL_ERROR("Kernel provided to launchKernel was created for a different "
                   "device");
-    }
 
     if constexpr (sizeof...(Args) == 0) {
       launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, nullptr, 0);
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
index 2d7130bd56f62..e00c80565ca9a 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
@@ -25,9 +25,8 @@ void freeDeviceMemory(void *Address) noexcept;
 template <typename T> class [[nodiscard]] ManagedBuffer {
 public:
   ~ManagedBuffer() noexcept {
-    if (Address) {
+    if (Address)
       detail::freeDeviceMemory(Address);
-    }
   }
 
   ManagedBuffer(const ManagedBuffer &) = delete;
@@ -43,9 +42,8 @@ template <typename T> class [[nodiscard]] ManagedBuffer {
     if (this == &Other)
       return *this;
 
-    if (Address) {
+    if (Address)
       detail::freeDeviceMemory(Address);
-    }
 
     Address = Other.Address;
     Size = Other.Size;
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
index 1927d45255632..17553a548f940 100644
--- a/offload/unittests/Conformance/include/mathtest/Dim.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Dim.hpp
@@ -23,9 +23,8 @@ class Dim {
            "The number of dimensions must be less than or equal to 3");
 
     std::size_t Index = 0;
-    for (uint32_t DimValue : Dimensions) {
+    for (uint32_t DimValue : Dimensions)
       Data[Index++] = DimValue;
-    }
 
     assert(Data[0] > 0 && Data[1] > 0 && Data[2] > 0 &&
            "Dimensions must be positive");
diff --git a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
index 813cdd21f4157..329ebe4238319 100644
--- a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
@@ -11,10 +11,10 @@
 #define OL_CHECK(ResultExpr)                                                   \
   do {                                                                         \
     ol_result_t Result = (ResultExpr);                                         \
-    if (Result != OL_SUCCESS) {                                                \
+    if (Result != OL_SUCCESS)                                                  \
       mathtest::detail::reportOffloadError(#ResultExpr, Result, __FILE__,      \
                                            __LINE__, __func__);                \
-    }                                                                          \
+                                                                               \
   } while (false)
 
 namespace mathtest {
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index 626565d085347..73025af4c535e 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -33,17 +33,13 @@ class [[nodiscard]] ExhaustiveGenerator final
     assert((Size > 0) && "The input space size must be at least 1");
 
     IndexArrayType DimSizes = {};
-    {
-      std::size_t Index = 0;
-      ((DimSizes[Index++] = Ranges.getSize()), ...);
-    }
+    std::size_t DimIndex = 0;
+    ((DimSizes[DimIndex++] = Ranges.getSize()), ...);
 
     Strides[NumInputs - 1] = 1;
-    if constexpr (NumInputs > 1) {
-      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index) {
+    if constexpr (NumInputs > 1)
+      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
         Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
-      }
-    }
   }
 
   [[nodiscard]] std::size_t
@@ -94,9 +90,8 @@ class [[nodiscard]] ExhaustiveGenerator final
     bool Overflowed = false;
 
     auto Multiplier = [&](const uint64_t RangeSize) {
-      if (!Overflowed) {
+      if (!Overflowed)
         Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
-      }
     };
 
     (Multiplier(Ranges.getSize()), ...);
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index 62b23d37bcc24..f7f0df8c19e41 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -93,9 +93,8 @@ class [[nodiscard]] GpuMathTest final {
                                      .Case("llvm-libm", "LLVMLibm")
                                      .Default("");
 
-    if (BinaryName.empty()) {
+    if (BinaryName.empty())
       FATAL_ERROR(llvm::Twine("Unsupported provider: '") + Provider + "'");
-    }
 
     const auto Image = Context->loadBinary(DeviceBinsDirectory, BinaryName);
 
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index 83f358fe08ad8..9f9e3b820b010 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -20,13 +20,12 @@ namespace mathtest {
 template <typename T> struct StorageTypeOf {
 private:
   static constexpr auto getStorageType() noexcept {
-    if constexpr (std::is_unsigned_v<T>) {
+    if constexpr (std::is_unsigned_v<T>)
       return TypeIdentityOf<T>{};
-    } else if constexpr (std::is_signed_v<T>) {
+    else if constexpr (std::is_signed_v<T>)
       return TypeIdentityOf<std::make_unsigned_t<T>>{};
-    } else {
+    else
       static_assert(!std::is_same_v<T, T>, "Unsupported type");
-    }
   }
 
 public:
@@ -74,7 +73,6 @@ template <typename UIntType, std::size_t Count>
       "Count must be less than or equal to the bit width of UIntType");
 
   return Count == 0 ? UIntType(0) : (~UIntType(0) << (TotalBits - Count));
-  ;
 }
 
 template <typename UIntType, std::size_t Count>
@@ -155,9 +153,8 @@ template <typename T> [[nodiscard]] constexpr T getMinOrNegInf() noexcept {
   static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
 
   if constexpr (std::is_floating_point_v<T> &&
-                std::numeric_limits<T>::has_infinity) {
+                std::numeric_limits<T>::has_infinity)
     return -std::numeric_limits<T>::infinity();
-  }
 
   return std::numeric_limits<T>::lowest();
 }
@@ -174,9 +171,8 @@ template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
   static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
 
   if constexpr (std::is_floating_point_v<T> &&
-                std::numeric_limits<T>::has_infinity) {
+                std::numeric_limits<T>::has_infinity)
     return std::numeric_limits<T>::infinity();
-  }
 
   return std::numeric_limits<T>::max();
 }
@@ -210,12 +206,11 @@ template <typename FloatType>
   const bool XIsNaN = FPUtils::isNaN(X);
   const bool YIsNaN = FPUtils::isNaN(Y);
 
-  if (XIsNaN && YIsNaN) {
+  if (XIsNaN && YIsNaN)
     return 0;
-  }
-  if (XIsNaN || YIsNaN) {
+
+  if (XIsNaN || YIsNaN)
     return std::numeric_limits<uint64_t>::max();
-  }
 
   constexpr StorageType SignMask = FPUtils::SignMask;
 
diff --git a/offload/unittests/Conformance/include/mathtest/TestResult.hpp b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
index ffaa7b7862b0e..be19969b706d9 100644
--- a/offload/unittests/Conformance/include/mathtest/TestResult.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
@@ -28,9 +28,8 @@ class [[nodiscard]] TestResult {
                       TestCase &&Case) noexcept
       : MaxUlpDistance(UlpDistance), FailureCount(IsFailure ? 1 : 0),
         TestCaseCount(1) {
-    if (IsFailure) {
+    if (IsFailure)
       WorstFailingCase.emplace(std::move(Case));
-    }
   }
 
   void accumulate(const TestResult &Other) noexcept {
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 4a80bc3e181ac..45074919f720d 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -18,11 +18,10 @@ void printValue(llvm::raw_ostream &OS, const T &Value) noexcept {
   if constexpr (IsFloatingPoint_v<T>) {
     using FPUtils = FPUtils<T>;
 
-    if constexpr (sizeof(T) < sizeof(float)) {
+    if constexpr (sizeof(T) < sizeof(float))
       OS << float(Value);
-    } else {
+    else
       OS << Value;
-    }
 
     OS << llvm::formatv(" (0x{0})",
                         llvm::Twine::utohexstr(FPUtils::getAsBits(Value)));
@@ -38,9 +37,8 @@ void printValues(llvm::raw_ostream &OS,
       [&OS](const auto &...Values) {
         bool IsFirst = true;
         auto Print = [&](const auto &Value) {
-          if (!IsFirst) {
+          if (!IsFirst)
             OS << ", ";
-          }
           printValue(OS, Value);
           IsFirst = false;
         };
@@ -93,9 +91,8 @@ void printReport(const TestType &Test, const ResultType &Result,
   llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Status",
                                 Passed ? "PASSED" : "FAILED");
 
-  if (auto Worst = Result.getWorstFailingCase()) {
+  if (auto Worst = Result.getWorstFailingCase())
     printWorstFailingCase(llvm::errs(), Worst.value());
-  }
 
   llvm::errs().flush();
 }
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index a0ca8bab3ddf9..90ce75c723186 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -52,10 +52,10 @@ const std::vector<ol_device_handle_t> &getDevices() {
     // Discovers all devices that are not the host
     const auto *const ResultFromIterate = olIterateDevices(
         [](ol_device_handle_t DeviceHandle, void *Data) {
-          if (getBackend(DeviceHandle) != OL_PLATFORM_BACKEND_HOST) {
+          if (getBackend(DeviceHandle) != OL_PLATFORM_BACKEND_HOST)
             static_cast<std::vector<ol_device_handle_t> *>(Data)->push_back(
                 DeviceHandle);
-          }
+
           return true;
         },
         &TmpDevices);
@@ -86,10 +86,9 @@ DeviceContext::DeviceContext(std::size_t DeviceId)
     : DeviceId(DeviceId), DeviceHandle(nullptr) {
   const auto &Devices = getDevices();
 
-  if (DeviceId >= Devices.size()) {
+  if (DeviceId >= Devices.size())
     FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) + ", but only " +
                 llvm::Twine(Devices.size()) + " devices are available");
-  }
 
   DeviceHandle = Devices[DeviceId];
 }
@@ -108,10 +107,10 @@ DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
 
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
       llvm::MemoryBuffer::getFile(FullPath);
-  if (std::error_code ErrorCode = FileOrErr.getError()) {
+  if (std::error_code ErrorCode = FileOrErr.getError())
     FATAL_ERROR(llvm::Twine("Failed to read device binary file '") + FullPath +
                 "': " + ErrorCode.message());
-  }
+
   std::unique_ptr<llvm::MemoryBuffer> &BinaryData = *FileOrErr;
 
   ol_program_handle_t ProgramHandle = nullptr;
@@ -167,9 +166,8 @@ void DeviceContext::launchKernelImpl(
   std::size_t PropSize = 0;
   OL_CHECK(olGetDeviceInfoSize(DeviceHandle, OL_DEVICE_INFO_NAME, &PropSize));
 
-  if (PropSize == 0) {
+  if (PropSize == 0)
     return "";
-  }
 
   std::string PropValue(PropSize, '\0');
   OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_NAME, PropSize,
@@ -188,9 +186,8 @@ void DeviceContext::launchKernelImpl(
   OL_CHECK(
       olGetPlatformInfoSize(PlatformHandle, OL_PLATFORM_INFO_NAME, &PropSize));
 
-  if (PropSize == 0) {
+  if (PropSize == 0)
     return "";
-  }
 
   std::string PropValue(PropSize, '\0');
   OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_NAME, PropSize,
diff --git a/offload/unittests/Conformance/lib/DeviceResources.cpp b/offload/unittests/Conformance/lib/DeviceResources.cpp
index 5d1c94dca7677..bb91634f6dbb7 100644
--- a/offload/unittests/Conformance/lib/DeviceResources.cpp
+++ b/offload/unittests/Conformance/lib/DeviceResources.cpp
@@ -11,9 +11,8 @@ using namespace mathtest;
 //===----------------------------------------------------------------------===//
 
 void detail::freeDeviceMemory(void *Address) noexcept {
-  if (Address) {
+  if (Address)
     OL_CHECK(olMemFree(Address));
-  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -21,18 +20,16 @@ void detail::freeDeviceMemory(void *Address) noexcept {
 //===----------------------------------------------------------------------===//
 
 DeviceImage::~DeviceImage() noexcept {
-  if (Handle) {
+  if (Handle)
     OL_CHECK(olDestroyProgram(Handle));
-  }
 }
 
 DeviceImage &DeviceImage::operator=(DeviceImage &&Other) noexcept {
   if (this == &Other)
     return *this;
 
-  if (Handle) {
+  if (Handle)
     OL_CHECK(olDestroyProgram(Handle));
-  }
 
   DeviceHandle = Other.DeviceHandle;
   Handle = Other.Handle;

>From d8ddf5e46c7c37b00ccc6ab635e13542a2efb624 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Sat, 19 Jul 2025 21:18:48 -0300
Subject: [PATCH 08/21] Align AMDGPU device compilation flags with NVPTX

---
 offload/unittests/CMakeLists.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index ad64f800da014..66f40adb21600 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -62,11 +62,10 @@ function(add_offload_test_device_code test_filename test_name)
       set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
       add_custom_command(
         OUTPUT ${output_file}
-        # TODO(jhuber6): Add and test the '-stdlib' flag here; also consider
-        #                the '-fno-builtin' flag.
         COMMAND ${CMAKE_C_COMPILER}
         --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
-        -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
+        -stdlib -nogpulib -flto -fno-builtin ${ARGN}
+        ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
       )
       add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})

>From e58a25cd752717a0803f8032bcf2ddd7e559aa19 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Sun, 20 Jul 2025 14:00:06 -0300
Subject: [PATCH 09/21] Add platform-aware constructor to `DeviceContext`

---
 .../include/mathtest/DeviceContext.hpp        |  17 +-
 .../Conformance/lib/DeviceContext.cpp         | 176 +++++++++++++-----
 2 files changed, 133 insertions(+), 60 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index ca0bfe20296c0..76b5b49b12e28 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -6,19 +6,19 @@
 #include "mathtest/ErrorHandling.hpp"
 #include "mathtest/Support.hpp"
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 
 #include <cassert>
 #include <cstddef>
 #include <memory>
-#include <string>
 #include <tuple>
 #include <type_traits>
 #include <utility>
 
 namespace mathtest {
 
-std::size_t countDevices();
+const llvm::SetVector<llvm::StringRef> &getPlatforms();
 
 namespace detail {
 
@@ -34,8 +34,9 @@ class DeviceContext {
   // and enqueued memcpy, as well as device and host memory allocation.
 
 public:
-  // TODO: Add a constructor that also takes a 'Platform'.
-  explicit DeviceContext(std::size_t DeviceId = 0);
+  explicit DeviceContext(std::size_t GlobalDeviceId = 0);
+
+  explicit DeviceContext(llvm::StringRef Platform, std::size_t DeviceId = 0);
 
   template <typename T>
   ManagedBuffer<T> createManagedBuffer(std::size_t Size) const noexcept {
@@ -99,11 +100,9 @@ class DeviceContext {
     }
   }
 
-  [[nodiscard]] std::size_t getId() const noexcept { return DeviceId; }
-
-  [[nodiscard]] std::string getName() const;
+  [[nodiscard]] llvm::StringRef getName() const;
 
-  [[nodiscard]] std::string getPlatform() const;
+  [[nodiscard]] llvm::StringRef getPlatform() const;
 
 private:
   void getKernelImpl(ol_program_handle_t ProgramHandle,
@@ -114,7 +113,7 @@ class DeviceContext {
                         const Dim &GroupSize, const void *KernelArgs,
                         std::size_t KernelArgsSize) const noexcept;
 
-  std::size_t DeviceId;
+  std::size_t GlobalDeviceId;
   ol_device_handle_t DeviceHandle;
 };
 } // namespace mathtest
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index 90ce75c723186..f14309739ff17 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -2,7 +2,10 @@
 
 #include "mathtest/ErrorHandling.hpp"
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
@@ -12,6 +15,7 @@
 #include <OffloadAPI.h>
 #include <cstddef>
 #include <memory>
+#include <optional>
 #include <string>
 #include <system_error>
 #include <vector>
@@ -32,29 +36,78 @@ struct OffloadInitWrapper {
 };
 static OffloadInitWrapper Wrapper{};
 
-[[nodiscard]] ol_platform_backend_t
-getBackend(ol_device_handle_t DeviceHandle) noexcept {
-  ol_platform_handle_t Platform;
+[[nodiscard]] std::string getDeviceName(ol_device_handle_t DeviceHandle) {
+  std::size_t PropSize = 0;
+  OL_CHECK(olGetDeviceInfoSize(DeviceHandle, OL_DEVICE_INFO_NAME, &PropSize));
+
+  if (PropSize == 0)
+    return "";
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_NAME, PropSize,
+                           PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return PropValue;
+}
+
+[[nodiscard]] ol_platform_handle_t
+getDevicePlatform(ol_device_handle_t DeviceHandle) noexcept {
+  ol_platform_handle_t PlatformHandle = nullptr;
   OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
-                           sizeof(Platform), &Platform));
+                           sizeof(PlatformHandle), &PlatformHandle));
+  return PlatformHandle;
+}
+
+[[nodiscard]] std::string getPlatformName(ol_platform_handle_t PlatformHandle) {
+  std::size_t PropSize = 0;
+  OL_CHECK(
+      olGetPlatformInfoSize(PlatformHandle, OL_PLATFORM_INFO_NAME, &PropSize));
+
+  if (PropSize == 0)
+    return "";
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_NAME, PropSize,
+                             PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return llvm::StringRef(PropValue).lower();
+}
+
+[[nodiscard]] ol_platform_backend_t
+getPlatformBackend(ol_platform_handle_t PlatformHandle) noexcept {
   ol_platform_backend_t Backend = OL_PLATFORM_BACKEND_UNKNOWN;
-  OL_CHECK(olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND,
+  OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_BACKEND,
                              sizeof(Backend), &Backend));
   return Backend;
 }
 
-const std::vector<ol_device_handle_t> &getDevices() {
+struct Device {
+  ol_device_handle_t Handle;
+  std::string Name;
+  std::string Platform;
+  ol_platform_backend_t Backend;
+};
+
+const std::vector<Device> &getDevices() {
   // Thread-safe initialization of a static local variable
-  static std::vector<ol_device_handle_t> Devices =
-      []() -> std::vector<ol_device_handle_t> {
-    std::vector<ol_device_handle_t> TmpDevices;
+  static auto Devices = []() {
+    std::vector<Device> TmpDevices;
 
     // Discovers all devices that are not the host
     const auto *const ResultFromIterate = olIterateDevices(
         [](ol_device_handle_t DeviceHandle, void *Data) {
-          if (getBackend(DeviceHandle) != OL_PLATFORM_BACKEND_HOST)
-            static_cast<std::vector<ol_device_handle_t> *>(Data)->push_back(
-                DeviceHandle);
+          ol_platform_handle_t PlatformHandle = getDevicePlatform(DeviceHandle);
+          ol_platform_backend_t Backend = getPlatformBackend(PlatformHandle);
+
+          if (Backend != OL_PLATFORM_BACKEND_HOST) {
+            auto Name = getDeviceName(DeviceHandle);
+            auto Platform = getPlatformName(PlatformHandle);
+
+            static_cast<std::vector<Device> *>(Data)->push_back(
+                {DeviceHandle, Name, Platform, Backend});
+          }
 
           return true;
         },
@@ -69,7 +122,19 @@ const std::vector<ol_device_handle_t> &getDevices() {
 }
 } // namespace
 
-std::size_t mathtest::countDevices() { return getDevices().size(); }
+const llvm::SetVector<llvm::StringRef> &mathtest::getPlatforms() {
+  // Thread-safe initialization of a static local variable
+  static auto Platforms = []() {
+    llvm::SetVector<llvm::StringRef> TmpPlatforms;
+
+    for (const auto &Device : getDevices())
+      TmpPlatforms.insert(Device.Platform);
+
+    return TmpPlatforms;
+  }();
+
+  return Platforms;
+}
 
 void detail::allocManagedMemory(ol_device_handle_t DeviceHandle,
                                 std::size_t Size,
@@ -82,15 +147,50 @@ void detail::allocManagedMemory(ol_device_handle_t DeviceHandle,
 // DeviceContext
 //===----------------------------------------------------------------------===//
 
-DeviceContext::DeviceContext(std::size_t DeviceId)
-    : DeviceId(DeviceId), DeviceHandle(nullptr) {
+DeviceContext::DeviceContext(std::size_t GlobalDeviceId)
+    : GlobalDeviceId(GlobalDeviceId), DeviceHandle(nullptr) {
+  const auto &Devices = getDevices();
+
+  if (GlobalDeviceId >= Devices.size())
+    FATAL_ERROR("Invalid GlobalDeviceId: " + llvm::Twine(GlobalDeviceId) +
+                ", but the number of available devices is " +
+                llvm::Twine(Devices.size()));
+
+  DeviceHandle = Devices[GlobalDeviceId].Handle;
+}
+
+DeviceContext::DeviceContext(llvm::StringRef Platform, std::size_t DeviceId)
+    : DeviceHandle(nullptr) {
+  std::string NormalizedPlatform = Platform.lower();
+  const auto &Platforms = getPlatforms();
+
+  if (!Platforms.contains(NormalizedPlatform))
+    FATAL_ERROR("There is no platform that matches with '" +
+                llvm::Twine(Platform) +
+                "'. Available platforms are: " + llvm::join(Platforms, ", "));
+
   const auto &Devices = getDevices();
 
-  if (DeviceId >= Devices.size())
-    FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) + ", but only " +
-                llvm::Twine(Devices.size()) + " devices are available");
+  std::optional<std::size_t> FoundGlobalDeviceId;
+  std::size_t MatchCount = 0;
+
+  for (std::size_t Index = 0; Index < Devices.size(); ++Index) {
+    if (Devices[Index].Platform == NormalizedPlatform) {
+      if (MatchCount == DeviceId) {
+        FoundGlobalDeviceId = Index;
+        break;
+      }
+      MatchCount++;
+    }
+  }
+
+  if (!FoundGlobalDeviceId.has_value())
+    FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) +
+                ", but the number of available devices on '" + Platform +
+                "' is " + llvm::Twine(MatchCount));
 
-  DeviceHandle = Devices[DeviceId];
+  GlobalDeviceId = FoundGlobalDeviceId.value();
+  DeviceHandle = Devices[GlobalDeviceId].Handle;
 }
 
 [[nodiscard]] std::shared_ptr<DeviceImage>
@@ -124,9 +224,10 @@ DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
 [[nodiscard]] std::shared_ptr<DeviceImage>
 DeviceContext::loadBinary(llvm::StringRef Directory,
                           llvm::StringRef BinaryName) const {
+  auto Backend = getDevices()[GlobalDeviceId].Backend;
   llvm::StringRef Extension;
 
-  switch (getBackend(DeviceHandle)) {
+  switch (Backend) {
   case OL_PLATFORM_BACKEND_AMDGPU:
     Extension = ".amdgpu.bin";
     break;
@@ -162,37 +263,10 @@ void DeviceContext::launchKernelImpl(
                           KernelArgsSize, &LaunchArgs, nullptr));
 }
 
-[[nodiscard]] std::string DeviceContext::getName() const {
-  std::size_t PropSize = 0;
-  OL_CHECK(olGetDeviceInfoSize(DeviceHandle, OL_DEVICE_INFO_NAME, &PropSize));
-
-  if (PropSize == 0)
-    return "";
-
-  std::string PropValue(PropSize, '\0');
-  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_NAME, PropSize,
-                           PropValue.data()));
-  PropValue.pop_back(); // Remove the null terminator
-
-  return PropValue;
+[[nodiscard]] llvm::StringRef DeviceContext::getName() const {
+  return getDevices()[GlobalDeviceId].Name;
 }
 
-[[nodiscard]] std::string DeviceContext::getPlatform() const {
-  ol_platform_handle_t PlatformHandle = nullptr;
-  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
-                           sizeof(ol_platform_handle_t), &PlatformHandle));
-
-  std::size_t PropSize = 0;
-  OL_CHECK(
-      olGetPlatformInfoSize(PlatformHandle, OL_PLATFORM_INFO_NAME, &PropSize));
-
-  if (PropSize == 0)
-    return "";
-
-  std::string PropValue(PropSize, '\0');
-  OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_NAME, PropSize,
-                             PropValue.data()));
-  PropValue.pop_back(); // Remove the null terminator
-
-  return PropValue;
+[[nodiscard]] llvm::StringRef DeviceContext::getPlatform() const {
+  return getDevices()[GlobalDeviceId].Platform;
 }

>From b11054b175c8a236622a07ae7b12adde81293e50 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Sun, 20 Jul 2025 18:43:23 -0300
Subject: [PATCH 10/21] Generate tests based on `Provider` and `Platform`

---
 offload/unittests/CMakeLists.txt              | 58 ++++++++++++++-----
 offload/unittests/Conformance/CMakeLists.txt  | 18 ++++++
 .../Conformance/device_code/CMakeLists.txt    |  5 +-
 .../include/mathtest/GpuMathTest.hpp          | 17 +++---
 .../include/mathtest/TestRunner.hpp           |  5 +-
 .../Conformance/tests/CMakeLists.txt          |  4 +-
 .../Conformance/tests/Hypotf16Test.cpp        | 10 ++--
 .../unittests/Conformance/tests/LogfTest.cpp  | 10 ++--
 8 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 66f40adb21600..95c08310145b7 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -97,26 +97,56 @@ function(add_offload_unittest test_dirname)
 endfunction()
 
 function(add_conformance_test test_name)
-  set(target_name "${test_name}.conformance")
-
-  list(TRANSFORM ARGN PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" OUTPUT_VARIABLE files)
+  set(options "")
+  set(oneValueArgs "")
+  set(multiValueArgs "SOURCES;PROVIDERS")
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
+  if(NOT ARG_SOURCES)
+    message(WARNING "Conformance test '${test_name}' must specify at least one source file")
+    return()
+  endif()
+  if(NOT ARG_PROVIDERS)
+    message(WARNING "Conformance test '${test_name}' must specify at least one provider")
+    return()
+  endif()
   if(NOT TARGET libc)
     message(WARNING "Cannot run conformance tests without the LLVM C library")
     return()
   endif()
 
-  add_executable(${target_name} ${files})
-  add_dependencies(${target_name} conformance_device_binaries)
-  target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${CONFORMANCE_TEST_DEVICE_CODE_PATH}")
-  target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
-  set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
-
-  add_custom_target(offload.conformance.${test_name}
-    COMMAND $<TARGET_FILE:${target_name}>
-    DEPENDS ${target_name}
-    COMMENT "Running conformance test ${test_name}")
-  add_dependencies(offload.conformance offload.conformance.${test_name})
+  list(TRANSFORM ARG_SOURCES PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" OUTPUT_VARIABLE files)
+  add_custom_target(offload.conformance.${test_name})
+
+  foreach(provider IN LISTS ARG_PROVIDERS)
+    set(candidate_platforms ${OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_${provider}})
+    add_custom_target(offload.conformance.${test_name}.${provider})
+
+    foreach(platform IN LISTS candidate_platforms)
+      if(platform IN_LIST OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS)
+        set(target_name "${test_name}.${provider}.${platform}.conformance")
+        set(target_call "offload.conformance.${test_name}.${provider}.${platform}")
+
+        add_executable(${target_name} ${files})
+        add_dependencies(${target_name} ${provider}.bin)
+        target_compile_definitions(${target_name}
+          PRIVATE PROVIDER="${provider}"
+          PRIVATE PLATFORM="${platform}"
+          PRIVATE DEVICE_BINARY_DIR="${OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR}")
+        target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
+        set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
+
+        add_custom_target(${target_call}
+          COMMAND $<TARGET_FILE:${target_name}>
+          DEPENDS ${target_name}
+          COMMENT "Running conformance test '${test_name}' with '${provider}' on '${platform}'")
+
+        add_dependencies(offload.conformance ${target_call})
+        add_dependencies(offload.conformance.${test_name} ${target_call})
+        add_dependencies(offload.conformance.${test_name}.${provider} ${target_call})
+      endif()
+    endforeach()
+  endforeach()
 endfunction()
 
 set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING
diff --git a/offload/unittests/Conformance/CMakeLists.txt b/offload/unittests/Conformance/CMakeLists.txt
index ce0421553de05..762f25fea2d31 100644
--- a/offload/unittests/Conformance/CMakeLists.txt
+++ b/offload/unittests/Conformance/CMakeLists.txt
@@ -2,6 +2,24 @@ add_custom_target(offload.conformance)
 
 set(PLUGINS_TEST_COMMON MathTest)
 
+set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_llvm-libm "cuda;amdgpu")
+set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_cuda-math "cuda")
+set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_hip-math "amdgpu")
+
+set(OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "")
+
+if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+  find_package(CUDAToolkit QUIET)
+  if(PLATFORM_HAS_NVPTX AND CUDAToolkit_FOUND)
+    list(APPEND OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "cuda")
+  endif()
+endif()
+if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+  if(PLATFORM_HAS_AMDGPU)
+    list(APPEND OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "amdgpu")
+  endif()
+endif()
+
 add_subdirectory(device_code)
 add_subdirectory(lib)
 add_subdirectory(tests)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 82c6ec9767562..9dbfd9953805a 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,3 @@
-add_offload_test_device_code(LLVMLibm.c LLVMLibm)
+add_offload_test_device_code(LLVMLibm.c llvm-libm)
 
-add_custom_target(conformance_device_binaries DEPENDS LLVMLibm.bin)
-set(CONFORMANCE_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index f7f0df8c19e41..b2f1932071592 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -9,8 +9,8 @@
 #include "mathtest/TestResult.hpp"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
 
 #include <cassert>
 #include <cstddef>
@@ -47,9 +47,9 @@ class [[nodiscard]] GpuMathTest final {
 
   explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
                        llvm::StringRef Provider,
-                       llvm::StringRef DeviceBinsDirectory)
+                       llvm::StringRef DeviceBinaryDir)
       : Context(std::move(Context)),
-        Kernel(getKernel(this->Context, Provider, DeviceBinsDirectory)) {
+        Kernel(getKernel(this->Context, Provider, DeviceBinaryDir)) {
     assert(this->Context && "Context must not be null");
   }
 
@@ -88,15 +88,14 @@ class [[nodiscard]] GpuMathTest final {
   static DeviceKernel<KernelSignature>
   getKernel(const std::shared_ptr<DeviceContext> &Context,
             llvm::StringRef Provider,
-            llvm::StringRef DeviceBinsDirectory) noexcept {
-    llvm::StringRef BinaryName = llvm::StringSwitch<llvm::StringRef>(Provider)
-                                     .Case("llvm-libm", "LLVMLibm")
-                                     .Default("");
+            llvm::StringRef DeviceBinaryDir) noexcept {
+    constexpr llvm::StringRef ValidProviders[] = {"llvm-libm"};
 
-    if (BinaryName.empty())
+    if (llvm::find(ValidProviders, Provider) == std::end(ValidProviders))
       FATAL_ERROR(llvm::Twine("Unsupported provider: '") + Provider + "'");
 
-    const auto Image = Context->loadBinary(DeviceBinsDirectory, BinaryName);
+    llvm::StringRef BinaryName = Provider;
+    const auto Image = Context->loadBinary(DeviceBinaryDir, BinaryName);
 
     return Context->getKernel<KernelSignature>(Image,
                                                FunctionConfig::KernelName);
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 45074919f720d..079e3bcde52e7 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -75,9 +75,8 @@ void printReport(const TestType &Test, const ResultType &Result,
 
   llvm::errs() << llvm::formatv("=== Test Report for '{0}' === \n",
                                 FunctionConfig::Name);
-  llvm::errs() << llvm::formatv("{0,-17}: {1} ({2})\n", "Device",
-                                Test.getContext().getName(),
-                                Test.getContext().getPlatform());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Device",
+                                Test.getContext().getName());
   llvm::errs() << llvm::formatv("{0,-17}: {1} ms\n", "Elapsed time",
                                 ElapsedMilliseconds);
   llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "ULP tolerance",
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
index b5da56f46fc05..8423ab2365ad3 100644
--- a/offload/unittests/Conformance/tests/CMakeLists.txt
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@@ -1,2 +1,2 @@
-add_conformance_test(Hypotf16Test Hypotf16Test.cpp)
-add_conformance_test(LogfTest LogfTest.cpp)
+add_conformance_test(hypotf16 SOURCES Hypotf16Test.cpp PROVIDERS llvm-libm)
+add_conformance_test(logf SOURCES LogfTest.cpp PROVIDERS llvm-libm)
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
index 8c6b5054e2d4c..370aab1283f49 100644
--- a/offload/unittests/Conformance/tests/Hypotf16Test.cpp
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -33,12 +33,12 @@ template <> struct FunctionConfig<hypotf16> {
 } // namespace mathtest
 
 int main() {
-  // TODO: Add command-line arguments parsing for test configuration.
-  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
-  const llvm::StringRef Provider = "llvm-libm";
-  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+  const llvm::StringRef Platform = PLATFORM;
+  auto Context = std::make_shared<DeviceContext>(Platform, /*DeviceId=*/0);
 
-  GpuMathTest<hypotf16> Hypotf16Test(Context, Provider, DeviceBinsDirectory);
+  const llvm::StringRef Provider = PROVIDER;
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  GpuMathTest<hypotf16> Hypotf16Test(Context, Provider, DeviceBinaryDir);
 
   IndexedRange<float16> RangeX;
   IndexedRange<float16> RangeY;
diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
index 1c866720843ff..8a3f8fa0b2389 100644
--- a/offload/unittests/Conformance/tests/LogfTest.cpp
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -26,12 +26,12 @@ template <> struct FunctionConfig<logf> {
 int main() {
   using namespace mathtest;
 
-  // TODO: Add command-line arguments parsing for test configuration.
-  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
-  const llvm::StringRef Provider = "llvm-libm";
-  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+  const llvm::StringRef Platform = PLATFORM;
+  auto Context = std::make_shared<DeviceContext>(Platform, /*DeviceId=*/0);
 
-  GpuMathTest<logf> LogfTest(Context, Provider, DeviceBinsDirectory);
+  const llvm::StringRef Provider = PROVIDER;
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  GpuMathTest<logf> LogfTest(Context, Provider, DeviceBinaryDir);
 
   IndexedRange<float> Range(/*Begin=*/0.0f,
                             /*End=*/std::numeric_limits<float>::infinity(),

>From 6b726d7f9fcdaaf4dc43519710481d594a6b6bf3 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Mon, 21 Jul 2025 13:02:12 -0300
Subject: [PATCH 11/21] Remove conditional compilation guards for `_Float16`

---
 offload/unittests/Conformance/device_code/LLVMLibm.c   |  5 -----
 .../Conformance/include/mathtest/Numerics.hpp          | 10 ----------
 .../Conformance/include/mathtest/TypeExtras.hpp        |  5 +----
 offload/unittests/Conformance/tests/Hypotf16Test.cpp   |  2 --
 4 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
index 83db152144cbe..23227b45c0e5c 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.c
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -3,12 +3,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#ifdef __FLT16_MAX__
-#define HAS_FLOAT16
 typedef _Float16 float16;
-#endif // __FLT16_MAX__
 
-#ifdef HAS_FLOAT16
 __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
                                  size_t NumElements) {
   uint32_t Index =
@@ -17,7 +13,6 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
   if (Index < NumElements)
     Out[Index] = hypotf16(X[Index], Y[Index]);
 }
-#endif // HAS_FLOAT16
 
 __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
   uint32_t Index =
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index 9f9e3b820b010..5cc34f730934d 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -32,11 +32,9 @@ template <typename T> struct StorageTypeOf {
   using type = typename decltype(getStorageType())::type;
 };
 
-#ifdef MATHTEST_HAS_FLOAT16
 template <> struct StorageTypeOf<float16> {
   using type = uint16_t;
 };
-#endif // MATHTEST_HAS_FLOAT16
 
 template <> struct StorageTypeOf<float> {
   using type = uint32_t;
@@ -50,9 +48,7 @@ template <typename T> using StorageTypeOf_t = typename StorageTypeOf<T>::type;
 
 template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
 
-#ifdef MATHTEST_HAS_FLOAT16
 template <> struct IsFloatingPoint<float16> : std::true_type {};
-#endif // MATHTEST_HAS_FLOAT16
 
 template <typename T>
 inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
@@ -94,13 +90,11 @@ template <typename UIntType, std::size_t Count>
 
 template <typename FloatType> struct FPLayout;
 
-#ifdef MATHTEST_HAS_FLOAT16
 template <> struct FPLayout<float16> {
   static constexpr std::size_t SignLen = 1;
   static constexpr std::size_t ExponentLen = 5;
   static constexpr std::size_t FractionLen = 10;
 };
-#endif // MATHTEST_HAS_FLOAT16
 
 template <> struct FPLayout<float> {
   static constexpr std::size_t SignLen = 1;
@@ -159,13 +153,11 @@ template <typename T> [[nodiscard]] constexpr T getMinOrNegInf() noexcept {
   return std::numeric_limits<T>::lowest();
 }
 
-#ifdef MATHTEST_HAS_FLOAT16
 template <> [[nodiscard]] constexpr float16 getMinOrNegInf<float16>() noexcept {
   using StorageType = StorageTypeOf_t<float16>;
 
   return __builtin_bit_cast(float16, static_cast<StorageType>(0xFC00U));
 }
-#endif // MATHTEST_HAS_FLOAT16
 
 template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
   static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
@@ -177,13 +169,11 @@ template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
   return std::numeric_limits<T>::max();
 }
 
-#ifdef MATHTEST_HAS_FLOAT16
 template <> [[nodiscard]] constexpr float16 getMaxOrInf<float16>() noexcept {
   using StorageType = StorageTypeOf_t<float16>;
 
   return __builtin_bit_cast(float16, static_cast<StorageType>(0x7C00U));
 }
-#endif // MATHTEST_HAS_FLOAT16
 
 template <typename FloatType>
 [[nodiscard]] uint64_t computeUlpDistance(FloatType X, FloatType Y) noexcept {
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
index 18642ec9172d8..7214cd969ce29 100644
--- a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -3,10 +3,7 @@
 
 namespace mathtest {
 
-#ifdef __FLT16_MAX__
-#define MATHTEST_HAS_FLOAT16
-typedef _Float16 float16;
-#endif // __FLT16_MAX__
+using float16 = _Float16;
 } // namespace mathtest
 
 #endif // MATHTEST_TYPEEXTRAS_HPP
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
index 370aab1283f49..376c6a8bdb8b7 100644
--- a/offload/unittests/Conformance/tests/Hypotf16Test.cpp
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -1,6 +1,5 @@
 #include "mathtest/TypeExtras.hpp"
 
-#ifdef MATHTEST_HAS_FLOAT16
 #include "mathtest/DeviceContext.hpp"
 #include "mathtest/ExhaustiveGenerator.hpp"
 #include "mathtest/GpuMathTest.hpp"
@@ -48,4 +47,3 @@ int main() {
 
   return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-#endif // MATHTEST_HAS_FLOAT16

>From a73757748f85e12c658ab494f23c91dd00453b31 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 10:51:17 -0300
Subject: [PATCH 12/21] Simplify implementation using libc's FPBits utility

---
 .../include/mathtest/IndexedRange.hpp         |  24 ++-
 .../Conformance/include/mathtest/Numerics.hpp | 202 +++++-------------
 .../include/mathtest/TestRunner.hpp           |   5 +-
 .../unittests/Conformance/lib/CMakeLists.txt  |   4 +-
 4 files changed, 74 insertions(+), 161 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
index 7c856ea51aa8f..bdfa3d20ca1de 100644
--- a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
+++ b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
@@ -3,6 +3,8 @@
 
 #include "mathtest/Numerics.hpp"
 
+#include "llvm/Support/MathExtras.h"
+
 #include <cassert>
 #include <cstdint>
 #include <limits>
@@ -54,14 +56,14 @@ template <typename T> class [[nodiscard]] IndexedRange {
   //    values between a and b within the same type
   static constexpr StorageType mapToOrderedUnsigned(T Value) {
     if constexpr (IsFloatingPoint_v<T>) {
-      StorageType SignMask = FPUtils<T>::SignMask;
-      StorageType Bits = FPUtils<T>::getAsBits(Value);
-      return (Bits & SignMask) ? SignMask - (Bits - SignMask) - 1
-                               : SignMask + Bits;
+      constexpr StorageType SignMask = FPBits<T>::SIGN_MASK;
+      StorageType Unsigned = FPBits<T>(Value).uintval();
+      return (Unsigned & SignMask) ? SignMask - (Unsigned - SignMask) - 1
+                                   : SignMask + Unsigned;
     }
 
     if constexpr (std::is_signed_v<T>) {
-      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      constexpr StorageType SignMask = llvm::maskLeadingOnes<StorageType>(1);
       return __builtin_bit_cast(StorageType, Value) ^ SignMask;
     }
 
@@ -70,16 +72,16 @@ template <typename T> class [[nodiscard]] IndexedRange {
 
   static constexpr T mapFromOrderedUnsigned(StorageType MappedValue) {
     if constexpr (IsFloatingPoint_v<T>) {
-      StorageType SignMask = FPUtils<T>::SignMask;
-      StorageType Bits = (MappedValue < SignMask)
-                             ? (SignMask - MappedValue) + SignMask - 1
-                             : MappedValue - SignMask;
+      constexpr StorageType SignMask = FPBits<T>::SIGN_MASK;
+      StorageType Unsigned = (MappedValue < SignMask)
+                                 ? (SignMask - MappedValue) + SignMask - 1
+                                 : MappedValue - SignMask;
 
-      return FPUtils<T>::createFromBits(Bits);
+      return FPBits<T>(Unsigned).get_val();
     }
 
     if constexpr (std::is_signed_v<T>) {
-      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      constexpr StorageType SignMask = llvm::maskLeadingOnes<StorageType>(1);
       return __builtin_bit_cast(T, MappedValue ^ SignMask);
     }
 
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index 5cc34f730934d..bd5ece09f882d 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -4,8 +4,11 @@
 #include "mathtest/Support.hpp"
 #include "mathtest/TypeExtras.hpp"
 
+// These headers are in the shared LLVM-libc header library
+#include "shared/fp_bits.h"
+#include "shared/sign.h"
+
 #include <climits>
-#include <cstddef>
 #include <cstdint>
 #include <limits>
 #include <math.h>
@@ -13,14 +16,29 @@
 
 namespace mathtest {
 
+template <typename FloatType>
+using FPBits = LIBC_NAMESPACE::shared::FPBits<FloatType>;
+
+using Sign = LIBC_NAMESPACE::shared::Sign;
+
 //===----------------------------------------------------------------------===//
 // Type Traits
 //===----------------------------------------------------------------------===//
 
+template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
+
+template <> struct IsFloatingPoint<float16> : std::true_type {};
+
+template <typename T>
+inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
+    = IsFloatingPoint<T>::value;
+
 template <typename T> struct StorageTypeOf {
 private:
   static constexpr auto getStorageType() noexcept {
-    if constexpr (std::is_unsigned_v<T>)
+    if constexpr (IsFloatingPoint_v<T>)
+      return TypeIdentityOf<typename FPBits<T>::StorageType>{};
+    else if constexpr (std::is_unsigned_v<T>)
       return TypeIdentityOf<T>{};
     else if constexpr (std::is_signed_v<T>)
       return TypeIdentityOf<std::make_unsigned_t<T>>{};
@@ -32,158 +50,48 @@ template <typename T> struct StorageTypeOf {
   using type = typename decltype(getStorageType())::type;
 };
 
-template <> struct StorageTypeOf<float16> {
-  using type = uint16_t;
-};
-
-template <> struct StorageTypeOf<float> {
-  using type = uint32_t;
-};
-
-template <> struct StorageTypeOf<double> {
-  using type = uint64_t;
-};
-
 template <typename T> using StorageTypeOf_t = typename StorageTypeOf<T>::type;
 
-template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
-
-template <> struct IsFloatingPoint<float16> : std::true_type {};
-
-template <typename T>
-inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
-    = IsFloatingPoint<T>::value;
-
-//===----------------------------------------------------------------------===//
-// Bitmask Utilities
-//===----------------------------------------------------------------------===//
-
-template <typename UIntType, std::size_t Count>
-[[nodiscard]] constexpr UIntType maskLeadingOnes() noexcept {
-  static_assert(std::is_unsigned_v<UIntType>,
-                "UIntType must be an unsigned integer type");
-
-  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
-  static_assert(
-      Count <= TotalBits,
-      "Count must be less than or equal to the bit width of UIntType");
-
-  return Count == 0 ? UIntType(0) : (~UIntType(0) << (TotalBits - Count));
-}
-
-template <typename UIntType, std::size_t Count>
-[[nodiscard]] constexpr UIntType maskTrailingOnes() noexcept {
-  static_assert(std::is_unsigned_v<UIntType>,
-                "UIntType must be an unsigned integer type");
-
-  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
-  static_assert(
-      Count <= TotalBits,
-      "Count must be less than or equal to the bit width of UIntType");
-
-  return Count == 0 ? UIntType(0) : (~UIntType(0) >> (TotalBits - Count));
-}
-
-//===----------------------------------------------------------------------===//
-// Floating-Point Utilities
-//===----------------------------------------------------------------------===//
-
-template <typename FloatType> struct FPLayout;
-
-template <> struct FPLayout<float16> {
-  static constexpr std::size_t SignLen = 1;
-  static constexpr std::size_t ExponentLen = 5;
-  static constexpr std::size_t FractionLen = 10;
-};
-
-template <> struct FPLayout<float> {
-  static constexpr std::size_t SignLen = 1;
-  static constexpr std::size_t ExponentLen = 8;
-  static constexpr std::size_t FractionLen = 23;
-};
-
-template <> struct FPLayout<double> {
-  static constexpr std::size_t SignLen = 1;
-  static constexpr std::size_t ExponentLen = 11;
-  static constexpr std::size_t FractionLen = 52;
-};
-
-template <typename FloatType> struct FPUtils : public FPLayout<FloatType> {
-  using FPLayout = FPLayout<FloatType>;
-  using StorageType = StorageTypeOf_t<FloatType>;
-  using FPLayout::ExponentLen;
-  using FPLayout::FractionLen;
-  using FPLayout::SignLen;
-
-  static constexpr StorageType SignMask =
-      maskTrailingOnes<StorageType, SignLen>() << (ExponentLen + FractionLen);
-
-  FPUtils() = delete;
-
-  [[nodiscard]] static constexpr FloatType
-  createFromBits(StorageType Bits) noexcept {
-    return __builtin_bit_cast(FloatType, Bits);
-  }
-
-  [[nodiscard]] static constexpr StorageType
-  getAsBits(FloatType Value) noexcept {
-    return __builtin_bit_cast(StorageType, Value);
-  }
-
-  [[nodiscard]] static constexpr bool isNaN(FloatType Value) noexcept {
-    return __builtin_isnan(Value);
-  }
-
-  [[nodiscard]] static constexpr bool getSignBit(FloatType Value) noexcept {
-    return getAsBits(Value) & SignMask;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 // Numeric Functions
 //===----------------------------------------------------------------------===//
 
 template <typename T> [[nodiscard]] constexpr T getMinOrNegInf() noexcept {
-  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
-
-  if constexpr (std::is_floating_point_v<T> &&
-                std::numeric_limits<T>::has_infinity)
-    return -std::numeric_limits<T>::infinity();
-
-  return std::numeric_limits<T>::lowest();
-}
-
-template <> [[nodiscard]] constexpr float16 getMinOrNegInf<float16>() noexcept {
-  using StorageType = StorageTypeOf_t<float16>;
-
-  return __builtin_bit_cast(float16, static_cast<StorageType>(0xFC00U));
+  if constexpr (IsFloatingPoint_v<T>) {
+    // All currently supported floating-point types have infinity
+    return FPBits<T>::inf(Sign::NEG).get_val();
+  } else {
+    static_assert(std::is_integral_v<T>,
+                  "Type T must be an integral or floating-point type");
+
+    return std::numeric_limits<T>::lowest();
+  }
 }
 
 template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
-  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
-
-  if constexpr (std::is_floating_point_v<T> &&
-                std::numeric_limits<T>::has_infinity)
-    return std::numeric_limits<T>::infinity();
-
-  return std::numeric_limits<T>::max();
-}
-
-template <> [[nodiscard]] constexpr float16 getMaxOrInf<float16>() noexcept {
-  using StorageType = StorageTypeOf_t<float16>;
-
-  return __builtin_bit_cast(float16, static_cast<StorageType>(0x7C00U));
+  if constexpr (IsFloatingPoint_v<T>) {
+    // All currently supported floating-point types have infinity
+    return FPBits<T>::inf(Sign::POS).get_val();
+  } else {
+    static_assert(std::is_integral_v<T>,
+                  "Type T must be an integral or floating-point type");
+
+    return std::numeric_limits<T>::max();
+  }
 }
 
 template <typename FloatType>
 [[nodiscard]] uint64_t computeUlpDistance(FloatType X, FloatType Y) noexcept {
   static_assert(IsFloatingPoint_v<FloatType>,
                 "FloatType must be a floating-point type");
-  using FPUtils = FPUtils<FloatType>;
-  using StorageType = typename FPUtils::StorageType;
+  using FPBits = FPBits<FloatType>;
+  using StorageType = typename FPBits::StorageType;
+
+  const FPBits XBits(X);
+  const FPBits YBits(Y);
 
   if (X == Y) {
-    if (FPUtils::getSignBit(X) != FPUtils::getSignBit(Y)) [[unlikely]] {
+    if (XBits.sign() != YBits.sign()) [[unlikely]] {
       // When X == Y, different sign bits imply that X and Y are +0.0 and -0.0
       // (in any order). Since we want to treat them as unequal in the context
       // of accuracy testing of mathematical functions, we return the smallest
@@ -193,8 +101,8 @@ template <typename FloatType>
     return 0;
   }
 
-  const bool XIsNaN = FPUtils::isNaN(X);
-  const bool YIsNaN = FPUtils::isNaN(Y);
+  const bool XIsNaN = XBits.is_nan();
+  const bool YIsNaN = YBits.is_nan();
 
   if (XIsNaN && YIsNaN)
     return 0;
@@ -202,19 +110,21 @@ template <typename FloatType>
   if (XIsNaN || YIsNaN)
     return std::numeric_limits<uint64_t>::max();
 
-  constexpr StorageType SignMask = FPUtils::SignMask;
+  constexpr StorageType SignMask = FPBits::SIGN_MASK;
 
-  // Linearise FloatType values into an ordered unsigned space:
-  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  // Linearise FloatType values into an ordered unsigned space. Let a and b
+  // be bits(x), bits(y), respectively, where x and y are FloatType values.
+  //  * The mapping is monotonic: x >= y if, and only if, map(a) >= map(b)
   //  * The difference |map(a) − map(b)| equals the number of std::nextafter
   //    steps between a and b within the same type
-  auto MapToOrderedUnsigned = [](FloatType Value) {
-    const StorageType Bits = FPUtils::getAsBits(Value);
-    return (Bits & SignMask) ? SignMask - (Bits - SignMask) : SignMask + Bits;
+  auto MapToOrderedUnsigned = [](FPBits Bits) {
+    const StorageType Unsigned = Bits.uintval();
+    return (Unsigned & SignMask) ? SignMask - (Unsigned - SignMask)
+                                 : SignMask + Unsigned;
   };
 
-  const StorageType MappedX = MapToOrderedUnsigned(X);
-  const StorageType MappedY = MapToOrderedUnsigned(Y);
+  const StorageType MappedX = MapToOrderedUnsigned(XBits);
+  const StorageType MappedY = MapToOrderedUnsigned(YBits);
   return static_cast<uint64_t>(MappedX > MappedY ? MappedX - MappedY
                                                  : MappedY - MappedX);
 }
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 079e3bcde52e7..e041abbe4b950 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -16,15 +16,14 @@ namespace detail {
 template <typename T>
 void printValue(llvm::raw_ostream &OS, const T &Value) noexcept {
   if constexpr (IsFloatingPoint_v<T>) {
-    using FPUtils = FPUtils<T>;
 
     if constexpr (sizeof(T) < sizeof(float))
       OS << float(Value);
     else
       OS << Value;
 
-    OS << llvm::formatv(" (0x{0})",
-                        llvm::Twine::utohexstr(FPUtils::getAsBits(Value)));
+    const FPBits<T> Bits(Value);
+    OS << llvm::formatv(" (0x{0})", llvm::Twine::utohexstr(Bits.uintval()));
   } else {
     OS << Value;
   }
diff --git a/offload/unittests/Conformance/lib/CMakeLists.txt b/offload/unittests/Conformance/lib/CMakeLists.txt
index a0402a54fbadf..adf2fa3604cb4 100644
--- a/offload/unittests/Conformance/lib/CMakeLists.txt
+++ b/offload/unittests/Conformance/lib/CMakeLists.txt
@@ -1,5 +1,7 @@
 add_library(MathTest STATIC DeviceContext.cpp DeviceResources.cpp ErrorHandling.cpp)
 
+include(FindLibcCommonUtils)
+
 target_include_directories(MathTest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
 target_compile_options(MathTest PUBLIC -fno-rtti)
-target_link_libraries(MathTest PUBLIC LLVMOffload LLVMSupport LLVMDemangle)
+target_link_libraries(MathTest PUBLIC LLVMOffload LLVMSupport LLVMDemangle llvm-libc-common-utilities)

>From 2a6789a6bc7a9b47f126796f51d6b50fb5bc601b Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 17:03:36 -0300
Subject: [PATCH 13/21] Add file headers

---
 .../include/mathtest/DeviceContext.hpp        | 17 ++++++++++++++++
 .../include/mathtest/DeviceResources.hpp      | 14 +++++++++++++
 .../Conformance/include/mathtest/Dim.hpp      | 17 ++++++++++++++++
 .../include/mathtest/ErrorHandling.hpp        | 14 +++++++++++++
 .../include/mathtest/ExhaustiveGenerator.hpp  | 15 ++++++++++++++
 .../include/mathtest/GpuMathTest.hpp          | 15 ++++++++++++++
 .../include/mathtest/HostRefChecker.hpp       | 15 ++++++++++++++
 .../include/mathtest/IndexedRange.hpp         | 18 +++++++++++++++--
 .../include/mathtest/InputGenerator.hpp       | 14 +++++++++++++
 .../Conformance/include/mathtest/Numerics.hpp | 20 ++++++++++++++++---
 .../include/mathtest/OffloadForward.hpp       | 14 +++++++++++++
 .../Conformance/include/mathtest/Support.hpp  | 14 +++++++++++++
 .../include/mathtest/TestResult.hpp           | 14 +++++++++++++
 .../include/mathtest/TestRunner.hpp           | 14 +++++++++++++
 .../include/mathtest/TypeExtras.hpp           | 14 +++++++++++++
 .../Conformance/lib/DeviceContext.cpp         | 16 ++++++++++++++-
 .../Conformance/lib/DeviceResources.cpp       | 14 +++++++++++++
 .../Conformance/lib/ErrorHandling.cpp         | 14 +++++++++++++
 .../Conformance/tests/Hypotf16Test.cpp        | 14 ++++++++++++-
 .../unittests/Conformance/tests/LogfTest.cpp  | 13 ++++++++++++
 20 files changed, 293 insertions(+), 7 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index 76b5b49b12e28..e7a71486ccf68 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -1,3 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the DeviceContext class, which serves
+/// as the high-level interface to a particular device (GPU).
+///
+/// This class provides methods for allocating buffers, loading binaries, and
+/// getting and launching kernels on the device.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_DEVICECONTEXT_HPP
 #define MATHTEST_DEVICECONTEXT_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
index e00c80565ca9a..860448afa3a01 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of wrappers that manage device resources
+/// like buffers, binaries, and kernels.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_DEVICERESOURCES_HPP
 #define MATHTEST_DEVICERESOURCES_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
index 17553a548f940..890bf95e0d861 100644
--- a/offload/unittests/Conformance/include/mathtest/Dim.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Dim.hpp
@@ -1,3 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the Dim class, which is used to
+/// represent 1D, 2D, or 3D dimensions.
+///
+/// This class is used by DeviceContext to specify the number of thread groups
+/// (NumGroups) and the size of each group (GroupSize) for a kernel launch.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_DIM_HPP
 #define MATHTEST_DIM_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
index 329ebe4238319..7ec276227acc3 100644
--- a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of error handling macros for reporting
+/// fatal error conditions and validating Offload API calls.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_ERRORHANDLING_HPP
 #define MATHTEST_ERRORHANDLING_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index 73025af4c535e..1143cbe775b54 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -1,3 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the ExhaustiveGenerator class, a
+/// concrete input generator that exhaustively creates inputs from a given
+/// sequence of ranges.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_EXHAUSTIVEGENERATOR_HPP
 #define MATHTEST_EXHAUSTIVEGENERATOR_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index b2f1932071592..accdb6a908c17 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -1,3 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the GpuMathTest class, a test harness
+/// that orchestrates running a math function on a device (GPU) and verifying
+/// its results.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_GPUMATHTEST_HPP
 #define MATHTEST_GPUMATHTEST_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
index f94f57ed5df16..488aefda67ef4 100644
--- a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
+++ b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
@@ -1,3 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the HostRefChecker class, which
+/// verifies the results of a device computation against a reference
+/// implementation on the host.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_HOSTREFCHECKER_HPP
 #define MATHTEST_HOSTREFCHECKER_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
index bdfa3d20ca1de..24aa00f6c6d7d 100644
--- a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
+++ b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the IndexedRange class, which provides
+/// an indexable view over a contiguous range of numeric values.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_INDEXEDRANGE_HPP
 #define MATHTEST_INDEXEDRANGE_HPP
 
@@ -51,9 +65,9 @@ template <typename T> class [[nodiscard]] IndexedRange {
   using StorageType = StorageTypeOf_t<T>;
 
   // Linearise T values into an ordered unsigned space:
-  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b).
   //  * The difference |map(a) − map(b)| equals the number of representable
-  //    values between a and b within the same type
+  //    values between a and b within the same type.
   static constexpr StorageType mapToOrderedUnsigned(T Value) {
     if constexpr (IsFloatingPoint_v<T>) {
       constexpr StorageType SignMask = FPBits<T>::SIGN_MASK;
diff --git a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
index 0362441399b5e..ab1c32899d6b5 100644
--- a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the InputGenerator class, which defines
+/// the abstract interface for classes that generate math test inputs.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_INPUTGENERATOR_HPP
 #define MATHTEST_INPUTGENERATOR_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index bd5ece09f882d..322ca4747d435 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of numerical utilities, including
+/// functions to compute ULP distance and traits for floating-point types.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_NUMERICS_HPP
 #define MATHTEST_NUMERICS_HPP
 
@@ -95,7 +109,7 @@ template <typename FloatType>
       // When X == Y, different sign bits imply that X and Y are +0.0 and -0.0
       // (in any order). Since we want to treat them as unequal in the context
       // of accuracy testing of mathematical functions, we return the smallest
-      // non-zero value
+      // non-zero value.
       return 1;
     }
     return 0;
@@ -114,9 +128,9 @@ template <typename FloatType>
 
   // Linearise FloatType values into an ordered unsigned space. Let a and b
   // be bits(x), bits(y), respectively, where x and y are FloatType values.
-  //  * The mapping is monotonic: x >= y if, and only if, map(a) >= map(b)
+  //  * The mapping is monotonic: x >= y if, and only if, map(a) >= map(b).
   //  * The difference |map(a) − map(b)| equals the number of std::nextafter
-  //    steps between a and b within the same type
+  //    steps between a and b within the same type.
   auto MapToOrderedUnsigned = [](FPBits Bits) {
     const StorageType Unsigned = Bits.uintval();
     return (Unsigned & SignMask) ? SignMask - (Unsigned - SignMask)
diff --git a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
index 831e5e3b44070..788989a0d4211 100644
--- a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
+++ b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains forward declarations for the opaque types and handles
+/// used by the Offload API.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_OFFLOADFORWARD_HPP
 #define MATHTEST_OFFLOADFORWARD_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/Support.hpp b/offload/unittests/Conformance/include/mathtest/Support.hpp
index 9249af822c1b1..2d3dceef2a230 100644
--- a/offload/unittests/Conformance/include/mathtest/Support.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Support.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of various metaprogramming helpers and
+/// support utilities for the math test framework.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_SUPPORT_HPP
 #define MATHTEST_SUPPORT_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/TestResult.hpp b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
index be19969b706d9..303ef4d36ee3f 100644
--- a/offload/unittests/Conformance/include/mathtest/TestResult.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the TestResult class, which aggregates
+/// and stores the results of a math test run.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_TESTRESULT_HPP
 #define MATHTEST_TESTRESULT_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index e041abbe4b950..958e1fdb85ff6 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the runTest function, which executes a
+/// test instance and prints a formatted report of the results.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_TESTRUNNER_HPP
 #define MATHTEST_TESTRUNNER_HPP
 
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
index 7214cd969ce29..9991a44f52944 100644
--- a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of type aliases for extended floating
+/// -point types.
+///
+//===----------------------------------------------------------------------===//
+
 #ifndef MATHTEST_TYPEEXTRAS_HPP
 #define MATHTEST_TYPEEXTRAS_HPP
 
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index f14309739ff17..267e822c7dc77 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of helpers and non-template member
+/// functions for the DeviceContext class.
+///
+//===----------------------------------------------------------------------===//
+
 #include "mathtest/DeviceContext.hpp"
 
 #include "mathtest/ErrorHandling.hpp"
@@ -254,7 +268,7 @@ void DeviceContext::launchKernelImpl(
     const void *KernelArgs, std::size_t KernelArgsSize) const noexcept {
   ol_kernel_launch_size_args_t LaunchArgs;
   LaunchArgs.Dimensions = 3; // It seems this field is not used anywhere.
-                             // Defaulting to the safest value
+                             // Defaulting to the safest value.
   LaunchArgs.NumGroups = {NumGroups[0], NumGroups[1], NumGroups[2]};
   LaunchArgs.GroupSize = {GroupSize[0], GroupSize[1], GroupSize[2]};
   LaunchArgs.DynSharedMemory = 0;
diff --git a/offload/unittests/Conformance/lib/DeviceResources.cpp b/offload/unittests/Conformance/lib/DeviceResources.cpp
index bb91634f6dbb7..d1c7b90e751e6 100644
--- a/offload/unittests/Conformance/lib/DeviceResources.cpp
+++ b/offload/unittests/Conformance/lib/DeviceResources.cpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of helpers and non-template member
+/// functions for the device resource classes.
+///
+//===----------------------------------------------------------------------===//
+
 #include "mathtest/DeviceResources.hpp"
 
 #include "mathtest/ErrorHandling.hpp"
diff --git a/offload/unittests/Conformance/lib/ErrorHandling.cpp b/offload/unittests/Conformance/lib/ErrorHandling.cpp
index 0f85260bc7926..f757087fc904a 100644
--- a/offload/unittests/Conformance/lib/ErrorHandling.cpp
+++ b/offload/unittests/Conformance/lib/ErrorHandling.cpp
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the helper functions for the error
+/// handling macros.
+///
+//===----------------------------------------------------------------------===//
+
 #include "mathtest/ErrorHandling.hpp"
 
 #include "llvm/ADT/Twine.h"
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
index 376c6a8bdb8b7..13926d7193ecc 100644
--- a/offload/unittests/Conformance/tests/Hypotf16Test.cpp
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -1,10 +1,22 @@
-#include "mathtest/TypeExtras.hpp"
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the hypotf16 function.
+///
+//===----------------------------------------------------------------------===//
 
 #include "mathtest/DeviceContext.hpp"
 #include "mathtest/ExhaustiveGenerator.hpp"
 #include "mathtest/GpuMathTest.hpp"
 #include "mathtest/IndexedRange.hpp"
 #include "mathtest/TestRunner.hpp"
+#include "mathtest/TypeExtras.hpp"
 
 #include "llvm/ADT/StringRef.h"
 
diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
index 8a3f8fa0b2389..2f348c621742e 100644
--- a/offload/unittests/Conformance/tests/LogfTest.cpp
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -1,3 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the logf function.
+///
+//===----------------------------------------------------------------------===//
+
 #include "mathtest/DeviceContext.hpp"
 #include "mathtest/ExhaustiveGenerator.hpp"
 #include "mathtest/GpuMathTest.hpp"

>From e67a20e12575bf710dcfcfec8521bf473b68a71f Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 17:26:14 -0300
Subject: [PATCH 14/21] Refine wording in Numerics.hpp file header

---
 offload/unittests/Conformance/include/mathtest/Numerics.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
index 322ca4747d435..8b0e900a4c823 100644
--- a/offload/unittests/Conformance/include/mathtest/Numerics.hpp
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains the definition of numerical utilities, including
-/// functions to compute ULP distance and traits for floating-point types.
+/// This file contains the definition of numeric utilities, including functions
+/// to compute ULP distance and traits for floating-point types.
 ///
 //===----------------------------------------------------------------------===//
 

>From e1ac85856c235a11aa67ef27dc2e7bca26f4cf6f Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 21:05:40 -0300
Subject: [PATCH 15/21] Simplify implementation of `ExhaustiveGenerator` for
 single-threaded use

---
 .../include/mathtest/ExhaustiveGenerator.hpp  | 34 ++++++-------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index 1143cbe775b54..e7de3dee6428d 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -24,7 +24,6 @@
 
 #include <algorithm>
 #include <array>
-#include <atomic>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -66,30 +65,17 @@ class [[nodiscard]] ExhaustiveGenerator final
                        [&](std::size_t Size) { return Size == BufferSize; }) &&
            "All input buffers must have the same size");
 
-    uint64_t StartFlatIndex, BatchSize;
-    while (true) {
-      uint64_t CurrentFlatIndex =
-          FlatIndexGenerator.load(std::memory_order_relaxed);
-      if (CurrentFlatIndex >= Size)
-        return 0;
-
-      BatchSize = std::min<uint64_t>(BufferSize, Size - CurrentFlatIndex);
-      uint64_t NextFlatIndex = CurrentFlatIndex + BatchSize;
-
-      if (FlatIndexGenerator.compare_exchange_weak(
-              CurrentFlatIndex, NextFlatIndex,
-              std::memory_order_acq_rel, // Success
-              std::memory_order_acquire  // Failure
-              )) {
-        StartFlatIndex = CurrentFlatIndex;
-        break;
-      }
-    }
+    if (NextFlatIndex >= Size)
+      return 0;
+
+    const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
+    const auto CurrentFlatIndex = NextFlatIndex;
+    NextFlatIndex += BatchSize;
 
     auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
 
     llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
-      writeInputs(StartFlatIndex, Offset, BufferPtrsTuple);
+      writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
     });
 
     return static_cast<std::size_t>(BatchSize);
@@ -115,9 +101,9 @@ class [[nodiscard]] ExhaustiveGenerator final
   }
 
   template <typename BufferPtrsTupleType>
-  void writeInputs(uint64_t StartFlatIndex, uint64_t Offset,
+  void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
                    BufferPtrsTupleType BufferPtrsTuple) const noexcept {
-    auto NDIndex = getNDIndex(StartFlatIndex + Offset);
+    auto NDIndex = getNDIndex(CurrentFlatIndex + Offset);
     writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
   }
 
@@ -145,7 +131,7 @@ class [[nodiscard]] ExhaustiveGenerator final
   uint64_t Size = 1;
   RangesTupleType RangesTuple;
   IndexArrayType Strides = {};
-  std::atomic<uint64_t> FlatIndexGenerator = 0;
+  uint64_t NextFlatIndex = 0;
 };
 } // namespace mathtest
 

>From a3a1fc729e29c0c4cae4ca7531d1a4d19b65272b Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 22:17:14 -0300
Subject: [PATCH 16/21] Add file header

---
 .../unittests/Conformance/device_code/LLVMLibm.c   | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
index 23227b45c0e5c..fe5196a539455 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.c
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -1,3 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the device kernels that wrap the
+/// math functions from the llvm-libm provider.
+///
+//===----------------------------------------------------------------------===//
+
 #include <gpuintrin.h>
 #include <math.h>
 #include <stddef.h>

>From 786921273c4a9aad2b324ab1387ab46f9c232595 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 23 Jul 2025 22:59:55 -0300
Subject: [PATCH 17/21] Improve test report and API safety in GpuMathTest

---
 .../Conformance/include/mathtest/GpuMathTest.hpp      | 11 +++++++----
 .../Conformance/include/mathtest/TestRunner.hpp       | 10 +++++++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index accdb6a908c17..b15ac0449cbea 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -31,6 +31,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <string>
 #include <tuple>
 #include <utility>
 
@@ -63,7 +64,7 @@ class [[nodiscard]] GpuMathTest final {
   explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
                        llvm::StringRef Provider,
                        llvm::StringRef DeviceBinaryDir)
-      : Context(std::move(Context)),
+      : Context(std::move(Context)), Provider(Provider),
         Kernel(getKernel(this->Context, Provider, DeviceBinaryDir)) {
     assert(this->Context && "Context must not be null");
   }
@@ -94,11 +95,12 @@ class [[nodiscard]] GpuMathTest final {
     return FinalResult;
   }
 
-  [[nodiscard]] const DeviceContext &getContext() const noexcept {
-    assert(Context && "Context must not be null");
-    return *Context;
+  [[nodiscard]] std::shared_ptr<DeviceContext> getContext() const noexcept {
+    return Context;
   }
 
+  [[nodiscard]] std::string getProvider() const noexcept { return Provider; }
+
 private:
   static DeviceKernel<KernelSignature>
   getKernel(const std::shared_ptr<DeviceContext> &Context,
@@ -168,6 +170,7 @@ class [[nodiscard]] GpuMathTest final {
   }
 
   std::shared_ptr<DeviceContext> Context;
+  std::string Provider;
   DeviceKernel<KernelSignature> Kernel;
 };
 } // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 958e1fdb85ff6..3e1f184ef4366 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -82,14 +82,18 @@ void printReport(const TestType &Test, const ResultType &Result,
                  const std::chrono::steady_clock::duration &Duration) noexcept {
   using FunctionConfig = typename TestType::FunctionConfig;
 
-  const bool Passed = Result.hasPassed();
+  const auto Context = Test.getContext();
   const auto ElapsedMilliseconds =
       std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
+  const bool Passed = Result.hasPassed();
 
   llvm::errs() << llvm::formatv("=== Test Report for '{0}' === \n",
                                 FunctionConfig::Name);
-  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Device",
-                                Test.getContext().getName());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Provider",
+                                Test.getProvider());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Platform",
+                                Context->getPlatform());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Device", Context->getName());
   llvm::errs() << llvm::formatv("{0,-17}: {1} ms\n", "Elapsed time",
                                 ElapsedMilliseconds);
   llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "ULP tolerance",

>From 4a5128a292c3f17fa5265ed907904852c116b4f4 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 24 Jul 2025 09:42:42 -0300
Subject: [PATCH 18/21] Move some device compile flags to the call site

---
 offload/unittests/CMakeLists.txt                         | 5 ++---
 offload/unittests/Conformance/device_code/CMakeLists.txt | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 95c08310145b7..e1cc8f1e2f6b2 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -40,7 +40,7 @@ function(add_offload_test_device_code test_filename test_name)
         OUTPUT ${output_file}
         COMMAND ${CMAKE_C_COMPILER}
         --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-        -stdlib -nogpulib --cuda-path=${CUDA_ROOT} -flto -fno-builtin ${ARGN}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
         ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
       )
@@ -64,8 +64,7 @@ function(add_offload_test_device_code test_filename test_name)
         OUTPUT ${output_file}
         COMMAND ${CMAKE_C_COMPILER}
         --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
-        -stdlib -nogpulib -flto -fno-builtin ${ARGN}
-        ${SRC_PATH} -o ${output_file}
+        -nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
       )
       add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 9dbfd9953805a..1ea6816aaa8dd 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_offload_test_device_code(LLVMLibm.c llvm-libm)
+add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin)
 
 set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

>From 8acd4585978a2edcc7028370cd1ce30cf98d99f0 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 24 Jul 2025 21:21:07 -0300
Subject: [PATCH 19/21] Add non-fatal tryLoadBinary and tryGetKernel methods

---
 .../include/mathtest/DeviceContext.hpp        |  51 ++++++---
 .../Conformance/lib/DeviceContext.cpp         | 103 ++++++++++++------
 2 files changed, 107 insertions(+), 47 deletions(-)

diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index e7a71486ccf68..d90b9510b0ef0 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -25,10 +25,12 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
 
 #include <cassert>
 #include <cstddef>
 #include <memory>
+#include <optional>
 #include <tuple>
 #include <type_traits>
 #include <utility>
@@ -65,27 +67,47 @@ class DeviceContext {
     return ManagedBuffer<T>(TypedAddress, Size);
   }
 
-  [[nodiscard]] std::shared_ptr<DeviceImage>
-  loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
-             llvm::StringRef Extension) const;
-
   [[nodiscard]] std::shared_ptr<DeviceImage>
   loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
 
+  [[nodiscard]] std::optional<std::shared_ptr<DeviceImage>>
+  tryLoadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
+
   template <typename KernelSignature>
   DeviceKernel<KernelSignature>
   getKernel(const std::shared_ptr<DeviceImage> &Image,
             llvm::StringRef KernelName) const noexcept {
     assert(Image && "Image provided to getKernel is null");
 
-    if (Image->DeviceHandle != this->DeviceHandle)
+    if (Image->DeviceHandle != DeviceHandle)
       FATAL_ERROR("Image provided to getKernel was created for a different "
                   "device");
 
-    ol_symbol_handle_t KernelHandle = nullptr;
-    getKernelImpl(Image->Handle, KernelName, &KernelHandle);
+    auto KernelHandleOrErr = getKernelImpl(Image->Handle, KernelName);
+
+    if (auto Err = KernelHandleOrErr.takeError())
+      FATAL_ERROR(llvm::toString(std::move(Err)));
+
+    return DeviceKernel<KernelSignature>(Image, *KernelHandleOrErr);
+  }
+
+  template <typename KernelSignature>
+  [[nodiscard]] std::optional<DeviceKernel<KernelSignature>>
+  tryGetKernel(const std::shared_ptr<DeviceImage> &Image,
+               llvm::StringRef KernelName) const noexcept {
+    assert(Image && "Image provided to getKernel is null");
+
+    if (Image->DeviceHandle != DeviceHandle)
+      return std::nullopt;
+
+    auto KernelHandleOrErr = getKernelImpl(Image->Handle, KernelName);
 
-    return DeviceKernel<KernelSignature>(Image, KernelHandle);
+    if (auto Err = KernelHandleOrErr.takeError()) {
+      llvm::consumeError(std::move(Err));
+      return std::nullopt;
+    }
+
+    return DeviceKernel<KernelSignature>(Image, *KernelHandleOrErr);
   }
 
   template <typename KernelSignature, typename... ArgTypes>
@@ -117,14 +139,17 @@ class DeviceContext {
     }
   }
 
-  [[nodiscard]] llvm::StringRef getName() const;
+  [[nodiscard]] llvm::StringRef getName() const noexcept;
 
-  [[nodiscard]] llvm::StringRef getPlatform() const;
+  [[nodiscard]] llvm::StringRef getPlatform() const noexcept;
 
 private:
-  void getKernelImpl(ol_program_handle_t ProgramHandle,
-                     llvm::StringRef KernelName,
-                     ol_symbol_handle_t *KernelHandle) const noexcept;
+  [[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
+  loadBinaryImpl(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
+
+  [[nodiscard]] llvm::Expected<ol_symbol_handle_t>
+  getKernelImpl(ol_program_handle_t ProgramHandle,
+                llvm::StringRef KernelName) const noexcept;
 
   void launchKernelImpl(ol_symbol_handle_t KernelHandle, const Dim &NumGroups,
                         const Dim &GroupSize, const void *KernelArgs,
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index 267e822c7dc77..613a3dc575a17 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -198,38 +199,55 @@ DeviceContext::DeviceContext(llvm::StringRef Platform, std::size_t DeviceId)
     }
   }
 
-  if (!FoundGlobalDeviceId.has_value())
+  if (!FoundGlobalDeviceId)
     FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) +
                 ", but the number of available devices on '" + Platform +
                 "' is " + llvm::Twine(MatchCount));
 
-  GlobalDeviceId = FoundGlobalDeviceId.value();
+  GlobalDeviceId = *FoundGlobalDeviceId;
   DeviceHandle = Devices[GlobalDeviceId].Handle;
 }
 
-[[nodiscard]] std::shared_ptr<DeviceImage>
-DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
-                          llvm::StringRef Extension) const {
+[[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
+DeviceContext::loadBinaryImpl(llvm::StringRef Directory,
+                              llvm::StringRef BinaryName) const {
+  auto Backend = getDevices()[GlobalDeviceId].Backend;
+  llvm::StringRef Extension;
+
+  switch (Backend) {
+  case OL_PLATFORM_BACKEND_AMDGPU:
+    Extension = ".amdgpu.bin";
+    break;
+  case OL_PLATFORM_BACKEND_CUDA:
+    Extension = ".nvptx64.bin";
+    break;
+  default:
+    llvm_unreachable("Unsupported backend to infer binary extension");
+  }
+
   llvm::SmallString<128> FullPath(Directory);
   llvm::sys::path::append(FullPath, llvm::Twine(BinaryName) + Extension);
 
-  // For simplicity, this implementation intentionally reads the binary from
-  // disk on every call.
-  //
-  // Other use cases could benefit from a global, thread-safe cache to avoid
-  // redundant file I/O and GPU program creation.
-
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
       llvm::MemoryBuffer::getFile(FullPath);
+
   if (std::error_code ErrorCode = FileOrErr.getError())
-    FATAL_ERROR(llvm::Twine("Failed to read device binary file '") + FullPath +
-                "': " + ErrorCode.message());
+    return llvm::errorCodeToError(ErrorCode);
 
   std::unique_ptr<llvm::MemoryBuffer> &BinaryData = *FileOrErr;
 
   ol_program_handle_t ProgramHandle = nullptr;
-  OL_CHECK(olCreateProgram(DeviceHandle, BinaryData->getBufferStart(),
-                           BinaryData->getBufferSize(), &ProgramHandle));
+  const ol_result_t OlResult =
+      olCreateProgram(DeviceHandle, BinaryData->getBufferStart(),
+                      BinaryData->getBufferSize(), &ProgramHandle);
+
+  if (OlResult != OL_SUCCESS) {
+    llvm::StringRef Details =
+        OlResult->Details ? OlResult->Details : "No details provided";
+
+    return llvm::createStringError(llvm::Twine(Details) + " (Code " +
+                                   llvm::Twine(OlResult->Code) + ")");
+  }
 
   return std::shared_ptr<DeviceImage>(
       new DeviceImage(DeviceHandle, ProgramHandle));
@@ -238,29 +256,46 @@ DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
 [[nodiscard]] std::shared_ptr<DeviceImage>
 DeviceContext::loadBinary(llvm::StringRef Directory,
                           llvm::StringRef BinaryName) const {
-  auto Backend = getDevices()[GlobalDeviceId].Backend;
-  llvm::StringRef Extension;
+  auto ImageOrErr = loadBinaryImpl(Directory, BinaryName);
 
-  switch (Backend) {
-  case OL_PLATFORM_BACKEND_AMDGPU:
-    Extension = ".amdgpu.bin";
-    break;
-  case OL_PLATFORM_BACKEND_CUDA:
-    Extension = ".nvptx64.bin";
-    break;
-  default:
-    llvm_unreachable("Unsupported backend to infer binary extension");
+  if (auto Err = ImageOrErr.takeError())
+    FATAL_ERROR(llvm::toString(std::move(Err)));
+
+  return std::move(*ImageOrErr);
+}
+
+[[nodiscard]] std::optional<std::shared_ptr<DeviceImage>>
+DeviceContext::tryLoadBinary(llvm::StringRef Directory,
+                             llvm::StringRef BinaryName) const {
+  auto ImageOrErr = loadBinaryImpl(Directory, BinaryName);
+
+  if (auto Err = ImageOrErr.takeError()) {
+    llvm::consumeError(std::move(Err));
+    return std::nullopt;
   }
 
-  return loadBinary(Directory, BinaryName, Extension);
+  return std::move(*ImageOrErr);
 }
 
-void DeviceContext::getKernelImpl(
-    ol_program_handle_t ProgramHandle, llvm::StringRef KernelName,
-    ol_symbol_handle_t *KernelHandle) const noexcept {
+[[nodiscard]] llvm::Expected<ol_symbol_handle_t>
+DeviceContext::getKernelImpl(ol_program_handle_t ProgramHandle,
+                             llvm::StringRef KernelName) const noexcept {
+  ol_symbol_handle_t KernelHandle = nullptr;
   llvm::SmallString<32> KernelNameBuffer(KernelName);
-  OL_CHECK(olGetSymbol(ProgramHandle, KernelNameBuffer.c_str(),
-                       OL_SYMBOL_KIND_KERNEL, KernelHandle));
+
+  const ol_result_t OlResult =
+      olGetSymbol(ProgramHandle, KernelNameBuffer.c_str(),
+                  OL_SYMBOL_KIND_KERNEL, &KernelHandle);
+
+  if (OlResult != OL_SUCCESS) {
+    llvm::StringRef Details =
+        OlResult->Details ? OlResult->Details : "No details provided";
+
+    return llvm::createStringError(llvm::Twine(Details) + " (Code " +
+                                   llvm::Twine(OlResult->Code) + ")");
+  }
+
+  return KernelHandle;
 }
 
 void DeviceContext::launchKernelImpl(
@@ -277,10 +312,10 @@ void DeviceContext::launchKernelImpl(
                           KernelArgsSize, &LaunchArgs, nullptr));
 }
 
-[[nodiscard]] llvm::StringRef DeviceContext::getName() const {
+[[nodiscard]] llvm::StringRef DeviceContext::getName() const noexcept {
   return getDevices()[GlobalDeviceId].Name;
 }
 
-[[nodiscard]] llvm::StringRef DeviceContext::getPlatform() const {
+[[nodiscard]] llvm::StringRef DeviceContext::getPlatform() const noexcept {
   return getDevices()[GlobalDeviceId].Platform;
 }

>From 724e2b6af48ba2ca4997eec66fd46184b865a2e2 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 24 Jul 2025 22:40:08 -0300
Subject: [PATCH 20/21] Remove the Dim class

---
 .../include/mathtest/DeviceContext.hpp        | 10 +--
 .../Conformance/include/mathtest/Dim.hpp      | 61 -------------------
 .../Conformance/lib/DeviceContext.cpp         | 10 +--
 3 files changed, 10 insertions(+), 71 deletions(-)
 delete mode 100644 offload/unittests/Conformance/include/mathtest/Dim.hpp

diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index d90b9510b0ef0..b104df780b0d6 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -19,7 +19,6 @@
 #define MATHTEST_DEVICECONTEXT_HPP
 
 #include "mathtest/DeviceResources.hpp"
-#include "mathtest/Dim.hpp"
 #include "mathtest/ErrorHandling.hpp"
 #include "mathtest/Support.hpp"
 
@@ -29,6 +28,7 @@
 
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <memory>
 #include <optional>
 #include <tuple>
@@ -111,8 +111,8 @@ class DeviceContext {
   }
 
   template <typename KernelSignature, typename... ArgTypes>
-  void launchKernel(DeviceKernel<KernelSignature> Kernel, Dim NumGroups,
-                    Dim GroupSize, ArgTypes &&...Args) const noexcept {
+  void launchKernel(DeviceKernel<KernelSignature> Kernel, uint32_t NumGroups,
+                    uint32_t GroupSize, ArgTypes &&...Args) const noexcept {
     using ExpectedTypes =
         typename FunctionTypeTraits<KernelSignature>::ArgTypesTuple;
     using ProvidedTypes = std::tuple<std::decay_t<ArgTypes>...>;
@@ -151,8 +151,8 @@ class DeviceContext {
   getKernelImpl(ol_program_handle_t ProgramHandle,
                 llvm::StringRef KernelName) const noexcept;
 
-  void launchKernelImpl(ol_symbol_handle_t KernelHandle, const Dim &NumGroups,
-                        const Dim &GroupSize, const void *KernelArgs,
+  void launchKernelImpl(ol_symbol_handle_t KernelHandle, uint32_t NumGroups,
+                        uint32_t GroupSize, const void *KernelArgs,
                         std::size_t KernelArgsSize) const noexcept;
 
   std::size_t GlobalDeviceId;
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
deleted file mode 100644
index 890bf95e0d861..0000000000000
--- a/offload/unittests/Conformance/include/mathtest/Dim.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the definition of the Dim class, which is used to
-/// represent 1D, 2D, or 3D dimensions.
-///
-/// This class is used by DeviceContext to specify the number of thread groups
-/// (NumGroups) and the size of each group (GroupSize) for a kernel launch.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef MATHTEST_DIM_HPP
-#define MATHTEST_DIM_HPP
-
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <initializer_list>
-
-namespace mathtest {
-
-class Dim {
-public:
-  Dim() = delete;
-
-  constexpr Dim(uint32_t X, uint32_t Y = 1, uint32_t Z = 1) noexcept
-      : Data{X, Y, Z} {
-    assert(X > 0 && Y > 0 && Z > 0 && "Dimensions must be positive");
-  }
-
-  constexpr Dim(std::initializer_list<uint32_t> Dimensions) noexcept
-      : Data{1, 1, 1} {
-    assert(Dimensions.size() <= 3 &&
-           "The number of dimensions must be less than or equal to 3");
-
-    std::size_t Index = 0;
-    for (uint32_t DimValue : Dimensions)
-      Data[Index++] = DimValue;
-
-    assert(Data[0] > 0 && Data[1] > 0 && Data[2] > 0 &&
-           "Dimensions must be positive");
-  }
-
-  [[nodiscard]] constexpr uint32_t
-  operator[](std::size_t Index) const noexcept {
-    assert(Index < 3 && "Index is out of range");
-    return Data[Index];
-  }
-
-private:
-  uint32_t Data[3];
-};
-} // namespace mathtest
-
-#endif // MATHTEST_DIM_HPP
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index 613a3dc575a17..023440f2bde2f 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -29,6 +29,7 @@
 
 #include <OffloadAPI.h>
 #include <cstddef>
+#include <cstdint>
 #include <memory>
 #include <optional>
 #include <string>
@@ -299,13 +300,12 @@ DeviceContext::getKernelImpl(ol_program_handle_t ProgramHandle,
 }
 
 void DeviceContext::launchKernelImpl(
-    ol_symbol_handle_t KernelHandle, const Dim &NumGroups, const Dim &GroupSize,
+    ol_symbol_handle_t KernelHandle, uint32_t NumGroups, uint32_t GroupSize,
     const void *KernelArgs, std::size_t KernelArgsSize) const noexcept {
   ol_kernel_launch_size_args_t LaunchArgs;
-  LaunchArgs.Dimensions = 3; // It seems this field is not used anywhere.
-                             // Defaulting to the safest value.
-  LaunchArgs.NumGroups = {NumGroups[0], NumGroups[1], NumGroups[2]};
-  LaunchArgs.GroupSize = {GroupSize[0], GroupSize[1], GroupSize[2]};
+  LaunchArgs.Dimensions = 1;
+  LaunchArgs.NumGroups = {NumGroups, 1, 1};
+  LaunchArgs.GroupSize = {GroupSize, 1, 1};
   LaunchArgs.DynSharedMemory = 0;
 
   OL_CHECK(olLaunchKernel(nullptr, DeviceHandle, KernelHandle, KernelArgs,

>From 3d08837f276f3c0feb9d308a56c8f64a746facb2 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Mon, 28 Jul 2025 06:57:36 -0300
Subject: [PATCH 21/21] Add support for the `--test-configs` command-line
 option

---
 offload/unittests/CMakeLists.txt              | 58 +++--------
 offload/unittests/Conformance/CMakeLists.txt  | 18 ----
 .../Conformance/device_code/CMakeLists.txt    |  1 +
 .../include/mathtest/CommandLine.hpp          | 94 ++++++++++++++++++
 .../include/mathtest/CommandLineExtras.hpp    | 38 +++++++
 .../include/mathtest/DeviceContext.hpp        | 48 +++------
 .../include/mathtest/ExhaustiveGenerator.hpp  |  2 +
 .../include/mathtest/GpuMathTest.hpp          | 46 +++++----
 .../include/mathtest/InputGenerator.hpp       |  2 +
 .../include/mathtest/TestConfig.hpp           | 38 +++++++
 .../include/mathtest/TestRunner.hpp           | 99 ++++++++++++++++---
 .../unittests/Conformance/lib/CMakeLists.txt  | 12 ++-
 .../Conformance/lib/CommandLineExtras.cpp     | 45 +++++++++
 .../Conformance/lib/DeviceContext.cpp         | 74 ++++++--------
 .../unittests/Conformance/lib/TestConfig.cpp  | 56 +++++++++++
 .../Conformance/tests/CMakeLists.txt          |  4 +-
 .../Conformance/tests/Hypotf16Test.cpp        | 22 ++---
 .../unittests/Conformance/tests/LogfTest.cpp  | 23 +++--
 18 files changed, 477 insertions(+), 203 deletions(-)
 create mode 100644 offload/unittests/Conformance/include/mathtest/CommandLine.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/CommandLineExtras.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TestConfig.hpp
 create mode 100644 offload/unittests/Conformance/lib/CommandLineExtras.cpp
 create mode 100644 offload/unittests/Conformance/lib/TestConfig.cpp

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index e1cc8f1e2f6b2..6d165ffd4c53a 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -96,56 +96,26 @@ function(add_offload_unittest test_dirname)
 endfunction()
 
 function(add_conformance_test test_name)
-  set(options "")
-  set(oneValueArgs "")
-  set(multiValueArgs "SOURCES;PROVIDERS")
-  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+  set(target_name "${test_name}.conformance")
+
+  list(TRANSFORM ARGN PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" OUTPUT_VARIABLE files)
 
-  if(NOT ARG_SOURCES)
-    message(WARNING "Conformance test '${test_name}' must specify at least one source file")
-    return()
-  endif()
-  if(NOT ARG_PROVIDERS)
-    message(WARNING "Conformance test '${test_name}' must specify at least one provider")
-    return()
-  endif()
   if(NOT TARGET libc)
     message(WARNING "Cannot run conformance tests without the LLVM C library")
     return()
   endif()
 
-  list(TRANSFORM ARG_SOURCES PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" OUTPUT_VARIABLE files)
-  add_custom_target(offload.conformance.${test_name})
-
-  foreach(provider IN LISTS ARG_PROVIDERS)
-    set(candidate_platforms ${OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_${provider}})
-    add_custom_target(offload.conformance.${test_name}.${provider})
-
-    foreach(platform IN LISTS candidate_platforms)
-      if(platform IN_LIST OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS)
-        set(target_name "${test_name}.${provider}.${platform}.conformance")
-        set(target_call "offload.conformance.${test_name}.${provider}.${platform}")
-
-        add_executable(${target_name} ${files})
-        add_dependencies(${target_name} ${provider}.bin)
-        target_compile_definitions(${target_name}
-          PRIVATE PROVIDER="${provider}"
-          PRIVATE PLATFORM="${platform}"
-          PRIVATE DEVICE_BINARY_DIR="${OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR}")
-        target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
-        set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
-
-        add_custom_target(${target_call}
-          COMMAND $<TARGET_FILE:${target_name}>
-          DEPENDS ${target_name}
-          COMMENT "Running conformance test '${test_name}' with '${provider}' on '${platform}'")
-
-        add_dependencies(offload.conformance ${target_call})
-        add_dependencies(offload.conformance.${test_name} ${target_call})
-        add_dependencies(offload.conformance.${test_name}.${provider} ${target_call})
-      endif()
-    endforeach()
-  endforeach()
+  add_executable(${target_name} ${files})
+  add_dependencies(${target_name} conformance_device_binaries)
+  target_compile_definitions(${target_name}
+    PRIVATE DEVICE_BINARY_DIR="${OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR}")
+  target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
+  set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
+
+  add_custom_target(offload.conformance.${test_name}
+    COMMAND $<TARGET_FILE:${target_name}>
+    DEPENDS ${target_name})
+  add_dependencies(offload.conformance offload.conformance.${test_name})
 endfunction()
 
 set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING
diff --git a/offload/unittests/Conformance/CMakeLists.txt b/offload/unittests/Conformance/CMakeLists.txt
index 762f25fea2d31..ce0421553de05 100644
--- a/offload/unittests/Conformance/CMakeLists.txt
+++ b/offload/unittests/Conformance/CMakeLists.txt
@@ -2,24 +2,6 @@ add_custom_target(offload.conformance)
 
 set(PLUGINS_TEST_COMMON MathTest)
 
-set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_llvm-libm "cuda;amdgpu")
-set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_cuda-math "cuda")
-set(OFFLOAD_CONFORMANCE_PROVIDER_PLATFORMS_hip-math "amdgpu")
-
-set(OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "")
-
-if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-  find_package(CUDAToolkit QUIET)
-  if(PLATFORM_HAS_NVPTX AND CUDAToolkit_FOUND)
-    list(APPEND OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "cuda")
-  endif()
-endif()
-if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-  if(PLATFORM_HAS_AMDGPU)
-    list(APPEND OFFLOAD_CONFORMANCE_AVAILABLE_PLATFORMS "amdgpu")
-  endif()
-endif()
-
 add_subdirectory(device_code)
 add_subdirectory(lib)
 add_subdirectory(tests)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 1ea6816aaa8dd..18f54b8dc5252 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin)
 
+add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
 set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/include/mathtest/CommandLine.hpp b/offload/unittests/Conformance/include/mathtest/CommandLine.hpp
new file mode 100644
index 0000000000000..129e50dca1970
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/CommandLine.hpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of custom command-line argument parsers
+/// using llvm::cl.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_COMMANDLINE_HPP
+#define MATHTEST_COMMANDLINE_HPP
+
+#include "mathtest/TestConfig.hpp"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <string>
+
+namespace llvm {
+namespace cl {
+
+struct TestConfigsArg {
+  enum class Mode { Default, All, Explicit } Mode = Mode::Default;
+  llvm::SmallVector<mathtest::TestConfig, 4> Explicit;
+};
+
+template <> class parser<TestConfigsArg> : public basic_parser<TestConfigsArg> {
+public:
+  parser(Option &O) : basic_parser<TestConfigsArg>(O) {}
+
+  static bool isAllowed(const mathtest::TestConfig &Config) {
+    static const llvm::SmallVector<mathtest::TestConfig, 4> &AllTestConfigs =
+        mathtest::getAllTestConfigs();
+
+    return llvm::is_contained(AllTestConfigs, Config);
+  }
+
+  bool parse(Option &O, StringRef ArgName, StringRef ArgValue,
+             TestConfigsArg &Val) {
+    ArgValue = ArgValue.trim();
+    if (ArgValue.empty())
+      return O.error(
+          "Expected '" + getValueName() +
+          "', but got an empty string. Omit the flag to use defaults");
+
+    if (ArgValue.equals_insensitive("all")) {
+      Val.Mode = TestConfigsArg::Mode::All;
+      return false;
+    }
+
+    llvm::SmallVector<StringRef, 8> Pairs;
+    ArgValue.split(Pairs, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+    Val.Mode = TestConfigsArg::Mode::Explicit;
+    Val.Explicit.clear();
+
+    for (StringRef Pair : Pairs) {
+      size_t Pos = Pair.find(':');
+      if (Pos == StringRef::npos)
+        return O.error("Expected '<provider>:<platform>', got '" + Pair + "'");
+
+      StringRef Provider = Pair.take_front(Pos);
+      StringRef Platform = Pair.drop_front(Pos + 1);
+      mathtest::TestConfig Config = {Provider.str(), Platform.str()};
+      if (!isAllowed(Config))
+        return O.error("Invalid pair '" + Pair + "'");
+
+      Val.Explicit.push_back(Config);
+    }
+
+    return false;
+  }
+
+  StringRef getValueName() const override {
+    return "all|provider:platform[,provider:platform...]";
+  }
+
+  void printOptionDiff(const Option &O, const TestConfigsArg &V, OptVal Default,
+                       size_t GlobalWidth) const {
+    printOptionNoValue(O, GlobalWidth);
+  }
+};
+} // namespace cl
+} // namespace llvm
+
+#endif // MATHTEST_COMMANDLINE_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/CommandLineExtras.hpp b/offload/unittests/Conformance/include/mathtest/CommandLineExtras.hpp
new file mode 100644
index 0000000000000..e80fdbf179d6c
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/CommandLineExtras.hpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the command-line options and the main
+/// interface for selecting test configurations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_COMMANDLINEEXTRAS_HPP
+#define MATHTEST_COMMANDLINEEXTRAS_HPP
+
+#include "mathtest/CommandLine.hpp"
+#include "mathtest/TestConfig.hpp"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace mathtest {
+namespace cl {
+
+extern llvm::cl::opt<bool> IsVerbose;
+
+namespace detail {
+
+extern llvm::cl::opt<llvm::cl::TestConfigsArg> TestConfigsOpt;
+} // namespace detail
+
+const llvm::SmallVector<TestConfig, 4> &getTestConfigs();
+} // namespace cl
+} // namespace mathtest
+
+#endif // MATHTEST_COMMANDLINEEXTRAS_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
index b104df780b0d6..5c31fc3da53cd 100644
--- a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -30,7 +30,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <memory>
-#include <optional>
 #include <tuple>
 #include <type_traits>
 #include <utility>
@@ -67,47 +66,25 @@ class DeviceContext {
     return ManagedBuffer<T>(TypedAddress, Size);
   }
 
-  [[nodiscard]] std::shared_ptr<DeviceImage>
+  [[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
   loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
 
-  [[nodiscard]] std::optional<std::shared_ptr<DeviceImage>>
-  tryLoadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
-
   template <typename KernelSignature>
-  DeviceKernel<KernelSignature>
+  [[nodiscard]] llvm::Expected<DeviceKernel<KernelSignature>>
   getKernel(const std::shared_ptr<DeviceImage> &Image,
-            llvm::StringRef KernelName) const noexcept {
+            llvm::StringRef KernelName) const {
     assert(Image && "Image provided to getKernel is null");
 
     if (Image->DeviceHandle != DeviceHandle)
-      FATAL_ERROR("Image provided to getKernel was created for a different "
-                  "device");
+      return llvm::createStringError(
+          "Image provided to getKernel was created for a different device");
 
-    auto KernelHandleOrErr = getKernelImpl(Image->Handle, KernelName);
+    auto ExpectedHandle = getKernelHandle(Image->Handle, KernelName);
 
-    if (auto Err = KernelHandleOrErr.takeError())
-      FATAL_ERROR(llvm::toString(std::move(Err)));
+    if (!ExpectedHandle)
+      return ExpectedHandle.takeError();
 
-    return DeviceKernel<KernelSignature>(Image, *KernelHandleOrErr);
-  }
-
-  template <typename KernelSignature>
-  [[nodiscard]] std::optional<DeviceKernel<KernelSignature>>
-  tryGetKernel(const std::shared_ptr<DeviceImage> &Image,
-               llvm::StringRef KernelName) const noexcept {
-    assert(Image && "Image provided to getKernel is null");
-
-    if (Image->DeviceHandle != DeviceHandle)
-      return std::nullopt;
-
-    auto KernelHandleOrErr = getKernelImpl(Image->Handle, KernelName);
-
-    if (auto Err = KernelHandleOrErr.takeError()) {
-      llvm::consumeError(std::move(Err));
-      return std::nullopt;
-    }
-
-    return DeviceKernel<KernelSignature>(Image, *KernelHandleOrErr);
+    return DeviceKernel<KernelSignature>(Image, *ExpectedHandle);
   }
 
   template <typename KernelSignature, typename... ArgTypes>
@@ -144,12 +121,9 @@ class DeviceContext {
   [[nodiscard]] llvm::StringRef getPlatform() const noexcept;
 
 private:
-  [[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
-  loadBinaryImpl(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
-
   [[nodiscard]] llvm::Expected<ol_symbol_handle_t>
-  getKernelImpl(ol_program_handle_t ProgramHandle,
-                llvm::StringRef KernelName) const noexcept;
+  getKernelHandle(ol_program_handle_t ProgramHandle,
+                  llvm::StringRef KernelName) const noexcept;
 
   void launchKernelImpl(ol_symbol_handle_t KernelHandle, uint32_t NumGroups,
                         uint32_t GroupSize, const void *KernelArgs,
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index e7de3dee6428d..6f7f7a9b665d0 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -56,6 +56,8 @@ class [[nodiscard]] ExhaustiveGenerator final
         Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
   }
 
+  void reset() noexcept override { NextFlatIndex = 0; }
+
   [[nodiscard]] std::size_t
   fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
     const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
index b15ac0449cbea..b88d6e9aebdc8 100644
--- a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -24,8 +24,8 @@
 #include "mathtest/TestResult.hpp"
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
 
 #include <cassert>
 #include <cstddef>
@@ -61,12 +61,16 @@ class [[nodiscard]] GpuMathTest final {
   using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
   using GeneratorType = ApplyTupleTypes_t<InTypesTuple, InputGenerator>;
 
-  explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
-                       llvm::StringRef Provider,
-                       llvm::StringRef DeviceBinaryDir)
-      : Context(std::move(Context)), Provider(Provider),
-        Kernel(getKernel(this->Context, Provider, DeviceBinaryDir)) {
-    assert(this->Context && "Context must not be null");
+  [[nodiscard]] static llvm::Expected<GpuMathTest>
+  create(std::shared_ptr<DeviceContext> Context, llvm::StringRef Provider,
+         llvm::StringRef DeviceBinaryDir) {
+    assert(Context && "Context must not be null");
+
+    auto ExpectedKernel = getKernel(*Context, Provider, DeviceBinaryDir);
+    if (!ExpectedKernel)
+      return ExpectedKernel.takeError();
+
+    return GpuMathTest(std::move(Context), Provider, *ExpectedKernel);
   }
 
   ResultType run(GeneratorType &Generator,
@@ -102,20 +106,26 @@ class [[nodiscard]] GpuMathTest final {
   [[nodiscard]] std::string getProvider() const noexcept { return Provider; }
 
 private:
-  static DeviceKernel<KernelSignature>
-  getKernel(const std::shared_ptr<DeviceContext> &Context,
-            llvm::StringRef Provider,
-            llvm::StringRef DeviceBinaryDir) noexcept {
-    constexpr llvm::StringRef ValidProviders[] = {"llvm-libm"};
-
-    if (llvm::find(ValidProviders, Provider) == std::end(ValidProviders))
-      FATAL_ERROR(llvm::Twine("Unsupported provider: '") + Provider + "'");
+  explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
+                       llvm::StringRef Provider,
+                       DeviceKernel<KernelSignature> Kernel)
+      : Context(std::move(Context)), Provider(Provider), Kernel(Kernel) {}
 
+  static llvm::Expected<DeviceKernel<KernelSignature>>
+  getKernel(const DeviceContext &Context, llvm::StringRef Provider,
+            llvm::StringRef DeviceBinaryDir) {
     llvm::StringRef BinaryName = Provider;
-    const auto Image = Context->loadBinary(DeviceBinaryDir, BinaryName);
 
-    return Context->getKernel<KernelSignature>(Image,
-                                               FunctionConfig::KernelName);
+    auto ExpectedImage = Context.loadBinary(DeviceBinaryDir, BinaryName);
+    if (!ExpectedImage)
+      return ExpectedImage.takeError();
+
+    auto ExpectedKernel = Context.getKernel<KernelSignature>(
+        *ExpectedImage, FunctionConfig::KernelName);
+    if (!ExpectedKernel)
+      return ExpectedKernel.takeError();
+
+    return *ExpectedKernel;
   }
 
   [[nodiscard]] auto createBuffers(std::size_t BufferSize) const {
diff --git a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
index ab1c32899d6b5..0154d0b024762 100644
--- a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
@@ -23,6 +23,8 @@ template <typename... InTypes> class InputGenerator {
 public:
   virtual ~InputGenerator() noexcept = default;
 
+  virtual void reset() noexcept = 0;
+
   [[nodiscard]] virtual size_t
   fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept = 0;
 };
diff --git a/offload/unittests/Conformance/include/mathtest/TestConfig.hpp b/offload/unittests/Conformance/include/mathtest/TestConfig.hpp
new file mode 100644
index 0000000000000..49fe8d845b8af
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TestConfig.hpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the TestConfig struct and declares the
+/// functions for retrieving the set of all and default test configurations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_TESTCONFIG_HPP
+#define MATHTEST_TESTCONFIG_HPP
+
+#include "llvm/ADT/SmallVector.h"
+
+#include <string>
+
+namespace mathtest {
+
+struct TestConfig {
+  std::string Provider;
+  std::string Platform;
+
+  [[nodiscard]] bool operator==(const TestConfig &RHS) const noexcept {
+    return Provider == RHS.Provider && Platform == RHS.Platform;
+  }
+};
+
+[[nodiscard]] const llvm::SmallVector<TestConfig, 4> &getAllTestConfigs();
+
+[[nodiscard]] const llvm::SmallVector<TestConfig, 4> &getDefaultTestConfigs();
+} // namespace mathtest
+
+#endif // MATHTEST_TESTCONFIG_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index 3e1f184ef4366..f89d151d0161e 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -7,30 +7,50 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains the definition of the runTest function, which executes a
-/// test instance and prints a formatted report of the results.
+/// This file contains the definition of the runTests function, which executes a
+/// a suite of tests and print a formatted report for each.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef MATHTEST_TESTRUNNER_HPP
 #define MATHTEST_TESTRUNNER_HPP
 
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/GpuMathTest.hpp"
 #include "mathtest/Numerics.hpp"
+#include "mathtest/TestConfig.hpp"
 
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <chrono>
+#include <cstddef>
+#include <memory>
 #include <tuple>
 
 namespace mathtest {
 namespace detail {
 
+template <auto Func>
+void printPreamble(const TestConfig &Config, size_t Index,
+                   size_t Total) noexcept {
+  using FunctionConfig = FunctionConfig<Func>;
+
+  llvm::outs() << "[" << (Index + 1) << "/" << Total << "] "
+               << "Running conformance test '" << FunctionConfig::Name
+               << "' with '" << Config.Provider << "' on '" << Config.Platform
+               << "'\n";
+  llvm::outs().flush();
+}
+
 template <typename T>
 void printValue(llvm::raw_ostream &OS, const T &Value) noexcept {
   if constexpr (IsFloatingPoint_v<T>) {
-
     if constexpr (sizeof(T) < sizeof(float))
       OS << float(Value);
     else
@@ -49,13 +69,13 @@ void printValues(llvm::raw_ostream &OS,
   std::apply(
       [&OS](const auto &...Values) {
         bool IsFirst = true;
-        auto Print = [&](const auto &Value) {
+        auto PrintWithComma = [&](const auto &Value) {
           if (!IsFirst)
             OS << ", ";
           printValue(OS, Value);
           IsFirst = false;
         };
-        (Print(Values), ...);
+        (PrintWithComma(Values), ...);
       },
       ValuesTuple);
 }
@@ -107,28 +127,81 @@ void printReport(const TestType &Test, const ResultType &Result,
   llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Status",
                                 Passed ? "PASSED" : "FAILED");
 
-  if (auto Worst = Result.getWorstFailingCase())
+  if (const auto &Worst = Result.getWorstFailingCase())
     printWorstFailingCase(llvm::errs(), Worst.value());
 
   llvm::errs().flush();
 }
-} // namespace detail
 
-template <typename TestType>
-[[nodiscard]] bool
-runTest(const TestType &Test,
-        typename TestType::GeneratorType &Generator) noexcept {
+template <auto Func, typename TestType = GpuMathTest<Func>>
+[[nodiscard]] llvm::Expected<bool>
+runTest(typename TestType::GeneratorType &Generator, const TestConfig &Config,
+        llvm::StringRef DeviceBinaryDir) {
+  const auto &Platforms = getPlatforms();
+
+  if (!llvm::any_of(Platforms, [&](llvm::StringRef Platform) {
+        return Platform.equals_insensitive(Config.Platform);
+      }))
+    return llvm::createStringError("Platform '" + Config.Platform +
+                                   "' is not available on this system");
+
+  auto Context =
+      std::make_shared<DeviceContext>(Config.Platform, /*DeviceId=*/0);
+  auto ExpectedTest =
+      TestType::create(Context, Config.Provider, DeviceBinaryDir);
+
+  if (!ExpectedTest)
+    return ExpectedTest.takeError();
+
   const auto StartTime = std::chrono::steady_clock::now();
 
-  auto Result = Test.run(Generator);
+  auto Result = ExpectedTest->run(Generator);
 
   const auto EndTime = std::chrono::steady_clock::now();
   const auto Duration = EndTime - StartTime;
 
-  detail::printReport(Test, Result, Duration);
+  printReport(*ExpectedTest, Result, Duration);
 
   return Result.hasPassed();
 }
+} // namespace detail
+
+template <auto Func, typename TestType = GpuMathTest<Func>>
+[[nodiscard]] bool runTests(typename TestType::GeneratorType &Generator,
+                            const llvm::SmallVector<TestConfig, 4> &Configs,
+                            llvm::StringRef DeviceBinaryDir,
+                            bool IsVerbose = false) {
+  const size_t NumConfigs = Configs.size();
+
+  if (NumConfigs == 0)
+    llvm::errs() << "There is no test configuration to run a test\n";
+
+  bool Passed = true;
+
+  for (const auto &[Index, Config] : llvm::enumerate(Configs)) {
+    detail::printPreamble<Func>(Config, Index, NumConfigs);
+
+    Generator.reset();
+
+    auto ExpectedPassed =
+        detail::runTest<Func, TestType>(Generator, Config, DeviceBinaryDir);
+
+    if (!ExpectedPassed) {
+      const auto Details = llvm::toString(ExpectedPassed.takeError());
+      llvm::errs()
+          << "WARNING: Conformance test not supported on this system\n";
+
+      if (IsVerbose)
+        llvm::errs() << "Details: " << Details << "\n";
+    } else {
+      Passed &= *ExpectedPassed;
+    }
+
+    llvm::errs() << "\n";
+  }
+
+  return Passed;
+}
 } // namespace mathtest
 
 #endif // MATHTEST_TESTRUNNER_HPP
diff --git a/offload/unittests/Conformance/lib/CMakeLists.txt b/offload/unittests/Conformance/lib/CMakeLists.txt
index adf2fa3604cb4..8e86f101729ad 100644
--- a/offload/unittests/Conformance/lib/CMakeLists.txt
+++ b/offload/unittests/Conformance/lib/CMakeLists.txt
@@ -1,7 +1,11 @@
-add_library(MathTest STATIC DeviceContext.cpp DeviceResources.cpp ErrorHandling.cpp)
-
-include(FindLibcCommonUtils)
+add_library(MathTest STATIC
+  CommandLineExtras.cpp DeviceContext.cpp DeviceResources.cpp ErrorHandling.cpp TestConfig.cpp)
 
 target_include_directories(MathTest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
-target_compile_options(MathTest PUBLIC -fno-rtti)
+
+if(NOT LLVM_REQUIRES_RTTI)
+  target_compile_options(MathTest PUBLIC -fno-rtti)
+endif()
+
+include(FindLibcCommonUtils)
 target_link_libraries(MathTest PUBLIC LLVMOffload LLVMSupport LLVMDemangle llvm-libc-common-utilities)
diff --git a/offload/unittests/Conformance/lib/CommandLineExtras.cpp b/offload/unittests/Conformance/lib/CommandLineExtras.cpp
new file mode 100644
index 0000000000000..96f5058105467
--- /dev/null
+++ b/offload/unittests/Conformance/lib/CommandLineExtras.cpp
@@ -0,0 +1,45 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the command-line options and the
+/// implementation of the logic for selecting test configurations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+
+#include "mathtest/CommandLine.hpp"
+#include "mathtest/TestConfig.hpp"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mathtest;
+
+llvm::cl::opt<bool> mathtest::cl::IsVerbose(
+    "verbose",
+    llvm::cl::desc("Enable verbose output for failed and unsupported tests"),
+    llvm::cl::init(false));
+
+llvm::cl::opt<llvm::cl::TestConfigsArg> mathtest::cl::detail::TestConfigsOpt(
+    "test-configs", llvm::cl::Optional,
+    llvm::cl::desc("Select test configurations"),
+    llvm::cl::value_desc("all|provider:platform[,provider:platform...]"));
+
+const llvm::SmallVector<TestConfig, 4> &mathtest::cl::getTestConfigs() {
+  switch (detail::TestConfigsOpt.Mode) {
+  case llvm::cl::TestConfigsArg::Mode::Default:
+    return getDefaultTestConfigs();
+  case llvm::cl::TestConfigsArg::Mode::All:
+    return getAllTestConfigs();
+  case llvm::cl::TestConfigsArg::Mode::Explicit:
+    return detail::TestConfigsOpt.Explicit;
+  }
+  llvm_unreachable("Unknown TestConfigsArg mode");
+}
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
index 023440f2bde2f..c190bd8d7d307 100644
--- a/offload/unittests/Conformance/lib/DeviceContext.cpp
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -16,6 +16,7 @@
 
 #include "mathtest/ErrorHandling.hpp"
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -88,7 +89,7 @@ getDevicePlatform(ol_device_handle_t DeviceHandle) noexcept {
                              PropValue.data()));
   PropValue.pop_back(); // Remove the null terminator
 
-  return llvm::StringRef(PropValue).lower();
+  return PropValue;
 }
 
 [[nodiscard]] ol_platform_backend_t
@@ -177,10 +178,11 @@ DeviceContext::DeviceContext(std::size_t GlobalDeviceId)
 
 DeviceContext::DeviceContext(llvm::StringRef Platform, std::size_t DeviceId)
     : DeviceHandle(nullptr) {
-  std::string NormalizedPlatform = Platform.lower();
   const auto &Platforms = getPlatforms();
 
-  if (!Platforms.contains(NormalizedPlatform))
+  if (!llvm::any_of(Platforms, [&](llvm::StringRef CurrentPlatform) {
+        return CurrentPlatform.equals_insensitive(Platform);
+      }))
     FATAL_ERROR("There is no platform that matches with '" +
                 llvm::Twine(Platform) +
                 "'. Available platforms are: " + llvm::join(Platforms, ", "));
@@ -191,7 +193,7 @@ DeviceContext::DeviceContext(llvm::StringRef Platform, std::size_t DeviceId)
   std::size_t MatchCount = 0;
 
   for (std::size_t Index = 0; Index < Devices.size(); ++Index) {
-    if (Devices[Index].Platform == NormalizedPlatform) {
+    if (Platform.equals_insensitive(Devices[Index].Platform)) {
       if (MatchCount == DeviceId) {
         FoundGlobalDeviceId = Index;
         break;
@@ -210,8 +212,8 @@ DeviceContext::DeviceContext(llvm::StringRef Platform, std::size_t DeviceId)
 }
 
 [[nodiscard]] llvm::Expected<std::shared_ptr<DeviceImage>>
-DeviceContext::loadBinaryImpl(llvm::StringRef Directory,
-                              llvm::StringRef BinaryName) const {
+DeviceContext::loadBinary(llvm::StringRef Directory,
+                          llvm::StringRef BinaryName) const {
   auto Backend = getDevices()[GlobalDeviceId].Backend;
   llvm::StringRef Extension;
 
@@ -223,7 +225,8 @@ DeviceContext::loadBinaryImpl(llvm::StringRef Directory,
     Extension = ".nvptx64.bin";
     break;
   default:
-    llvm_unreachable("Unsupported backend to infer binary extension");
+    return llvm::createStringError(
+        "Unsupported backend to infer binary extension");
   }
 
   llvm::SmallString<128> FullPath(Directory);
@@ -233,7 +236,9 @@ DeviceContext::loadBinaryImpl(llvm::StringRef Directory,
       llvm::MemoryBuffer::getFile(FullPath);
 
   if (std::error_code ErrorCode = FileOrErr.getError())
-    return llvm::errorCodeToError(ErrorCode);
+    return llvm::createStringError(
+        llvm::Twine("Failed to read device binary file '") + FullPath +
+        "': " + ErrorCode.message());
 
   std::unique_ptr<llvm::MemoryBuffer> &BinaryData = *FileOrErr;
 
@@ -246,57 +251,38 @@ DeviceContext::loadBinaryImpl(llvm::StringRef Directory,
     llvm::StringRef Details =
         OlResult->Details ? OlResult->Details : "No details provided";
 
-    return llvm::createStringError(llvm::Twine(Details) + " (Code " +
-                                   llvm::Twine(OlResult->Code) + ")");
+    // clang-format off
+    return llvm::createStringError(
+      llvm::Twine(Details) +
+      " (code " + llvm::Twine(OlResult->Code) + ")");
+    // clang-format on
   }
 
   return std::shared_ptr<DeviceImage>(
       new DeviceImage(DeviceHandle, ProgramHandle));
 }
 
-[[nodiscard]] std::shared_ptr<DeviceImage>
-DeviceContext::loadBinary(llvm::StringRef Directory,
-                          llvm::StringRef BinaryName) const {
-  auto ImageOrErr = loadBinaryImpl(Directory, BinaryName);
-
-  if (auto Err = ImageOrErr.takeError())
-    FATAL_ERROR(llvm::toString(std::move(Err)));
-
-  return std::move(*ImageOrErr);
-}
-
-[[nodiscard]] std::optional<std::shared_ptr<DeviceImage>>
-DeviceContext::tryLoadBinary(llvm::StringRef Directory,
-                             llvm::StringRef BinaryName) const {
-  auto ImageOrErr = loadBinaryImpl(Directory, BinaryName);
-
-  if (auto Err = ImageOrErr.takeError()) {
-    llvm::consumeError(std::move(Err));
-    return std::nullopt;
-  }
-
-  return std::move(*ImageOrErr);
-}
-
 [[nodiscard]] llvm::Expected<ol_symbol_handle_t>
-DeviceContext::getKernelImpl(ol_program_handle_t ProgramHandle,
-                             llvm::StringRef KernelName) const noexcept {
-  ol_symbol_handle_t KernelHandle = nullptr;
-  llvm::SmallString<32> KernelNameBuffer(KernelName);
+DeviceContext::getKernelHandle(ol_program_handle_t ProgramHandle,
+                               llvm::StringRef KernelName) const noexcept {
+  ol_symbol_handle_t Handle = nullptr;
+  llvm::SmallString<32> NameBuffer(KernelName);
 
-  const ol_result_t OlResult =
-      olGetSymbol(ProgramHandle, KernelNameBuffer.c_str(),
-                  OL_SYMBOL_KIND_KERNEL, &KernelHandle);
+  const ol_result_t OlResult = olGetSymbol(ProgramHandle, NameBuffer.c_str(),
+                                           OL_SYMBOL_KIND_KERNEL, &Handle);
 
   if (OlResult != OL_SUCCESS) {
     llvm::StringRef Details =
         OlResult->Details ? OlResult->Details : "No details provided";
 
-    return llvm::createStringError(llvm::Twine(Details) + " (Code " +
-                                   llvm::Twine(OlResult->Code) + ")");
+    // clang-format off
+    return llvm::createStringError(
+      llvm::Twine(Details) +
+      " (code " + llvm::Twine(OlResult->Code) + ")");
+    // clang-format on
   }
 
-  return KernelHandle;
+  return Handle;
 }
 
 void DeviceContext::launchKernelImpl(
diff --git a/offload/unittests/Conformance/lib/TestConfig.cpp b/offload/unittests/Conformance/lib/TestConfig.cpp
new file mode 100644
index 0000000000000..b4396fcc74444
--- /dev/null
+++ b/offload/unittests/Conformance/lib/TestConfig.cpp
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation for the functions that define the set
+/// of all and default test configurations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/TestConfig.hpp"
+
+#include "mathtest/DeviceContext.hpp"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallVectorExtras.h"
+
+using namespace mathtest;
+
+[[nodiscard]] const llvm::SmallVector<TestConfig, 4> &
+mathtest::getAllTestConfigs() {
+  // Thread-safe initialization of a static local variable
+  static auto AllTestConfigs = []() -> llvm::SmallVector<TestConfig, 4> {
+    return {
+        {"llvm-libm", "amdgpu"},
+        {"llvm-libm", "cuda"},
+        {"cuda-math", "cuda"},
+        {"hip-math", "amdgpu"},
+    };
+  }();
+
+  return AllTestConfigs;
+};
+
+[[nodiscard]] const llvm::SmallVector<TestConfig, 4> &
+mathtest::getDefaultTestConfigs() {
+  // Thread-safe initialization of a static local variable
+  static auto DefaultTestConfigs = []() -> llvm::SmallVector<TestConfig, 4> {
+    const auto Platforms = getPlatforms();
+    const auto AllTestConfigs = getAllTestConfigs();
+    llvm::StringRef Provider = "llvm-libm";
+
+    return llvm::filter_to_vector(AllTestConfigs, [&](const auto &Config) {
+      return Provider.equals_insensitive(Config.Provider) &&
+             llvm::any_of(Platforms, [&](llvm::StringRef Platform) {
+               return Platform.equals_insensitive(Config.Platform);
+             });
+    });
+  }();
+
+  return DefaultTestConfigs;
+};
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
index 8423ab2365ad3..b1aa22b44bb5c 100644
--- a/offload/unittests/Conformance/tests/CMakeLists.txt
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@@ -1,2 +1,2 @@
-add_conformance_test(hypotf16 SOURCES Hypotf16Test.cpp PROVIDERS llvm-libm)
-add_conformance_test(logf SOURCES LogfTest.cpp PROVIDERS llvm-libm)
+add_conformance_test(hypotf16 Hypotf16Test.cpp)
+add_conformance_test(logf LogfTest.cpp)
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
index 13926d7193ecc..fbc001a9fb683 100644
--- a/offload/unittests/Conformance/tests/Hypotf16Test.cpp
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -11,10 +11,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "mathtest/DeviceContext.hpp"
+#include "mathtest/CommandLineExtras.hpp"
 #include "mathtest/ExhaustiveGenerator.hpp"
-#include "mathtest/GpuMathTest.hpp"
 #include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestConfig.hpp"
 #include "mathtest/TestRunner.hpp"
 #include "mathtest/TypeExtras.hpp"
 
@@ -22,7 +22,6 @@
 
 #include <cstdlib>
 #include <math.h>
-#include <memory>
 
 using namespace mathtest;
 
@@ -43,19 +42,20 @@ template <> struct FunctionConfig<hypotf16> {
 };
 } // namespace mathtest
 
-int main() {
-  const llvm::StringRef Platform = PLATFORM;
-  auto Context = std::make_shared<DeviceContext>(Platform, /*DeviceId=*/0);
-
-  const llvm::StringRef Provider = PROVIDER;
-  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
-  GpuMathTest<hypotf16> Hypotf16Test(Context, Provider, DeviceBinaryDir);
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(
+      argc, argv, "Conformance test of the hypotf16 function");
 
   IndexedRange<float16> RangeX;
   IndexedRange<float16> RangeY;
   ExhaustiveGenerator<float16, float16> Generator(RangeX, RangeY);
 
-  const auto Passed = runTest(Hypotf16Test, Generator);
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed =
+      runTests<hypotf16>(Generator, Configs, DeviceBinaryDir, IsVerbose);
 
   return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
 }
diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
index 2f348c621742e..97249edffaa66 100644
--- a/offload/unittests/Conformance/tests/LogfTest.cpp
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -11,10 +11,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "mathtest/DeviceContext.hpp"
+#include "mathtest/CommandLineExtras.hpp"
 #include "mathtest/ExhaustiveGenerator.hpp"
-#include "mathtest/GpuMathTest.hpp"
 #include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestConfig.hpp"
 #include "mathtest/TestRunner.hpp"
 
 #include "llvm/ADT/StringRef.h"
@@ -22,7 +22,6 @@
 #include <cstdlib>
 #include <limits>
 #include <math.h>
-#include <memory>
 
 namespace mathtest {
 
@@ -36,22 +35,22 @@ template <> struct FunctionConfig<logf> {
 };
 } // namespace mathtest
 
-int main() {
-  using namespace mathtest;
-
-  const llvm::StringRef Platform = PLATFORM;
-  auto Context = std::make_shared<DeviceContext>(Platform, /*DeviceId=*/0);
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the logf function");
 
-  const llvm::StringRef Provider = PROVIDER;
-  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
-  GpuMathTest<logf> LogfTest(Context, Provider, DeviceBinaryDir);
+  using namespace mathtest;
 
   IndexedRange<float> Range(/*Begin=*/0.0f,
                             /*End=*/std::numeric_limits<float>::infinity(),
                             /*Inclusive=*/true);
   ExhaustiveGenerator<float> Generator(Range);
 
-  const auto Passed = runTest(LogfTest, Generator);
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed = runTests<logf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
 
   return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
 }