[llvm] [Offload] Add framework for math conformance tests (PR #149242)

Wed Jul 16 20:51:45 PDT 2025

https://github.com/leandrolcampos created https://github.com/llvm/llvm-project/pull/149242

This PR introduces the initial version of a C++ framework for the conformance testing of GPU math library functions, building upon the skeleton provided in #146391.

The main goal of this framework is to systematically measure the accuracy of math functions in the GPU libc, verifying correctness or at least conformance to standards like OpenCL via exhaustive or random accuracy tests.

### How to Build and Run

This test suite depends on the Offload and GPU libc runtimes. After ensuring the runtimes are built (see the [official instructions](https://libc.llvm.org/gpu/building.html#id1)), a specific test can be run via:

```bash
ninja -C path/to/build/runtimes/runtimes-bins offload.conformance.LogfTest
```

### Next Steps

The planned next steps are:

- [ ] Add command-line argument support for `--platform` and `--device_id`.
- [ ] Add more tests for half-precision functions and single-precision univariate functions.
- [ ] Implement the `RandomGenerator` to enable testing of functions with large input spaces (e.g., double-precision).
- [ ] Add tests for single-precision bivariate functions and double-precision functions.
- [ ] Add support for other math library providers: `cuda-math` and `hip-math`.
- [ ] Add support for the `--provider` command-line argument.
- [ ] Implement a unit test suite for internal framework utilities (e.g., `computeUlpDistance`).
- [ ] Add a README.md file.

>From 21b1fe1d366b60b56a858d847a245ebabcab11bc Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Thu, 17 Jul 2025 00:15:00 -0300
Subject: [PATCH] [Offload] Add framework for math conformance tests

---
 offload/unittests/CMakeLists.txt              |  11 +-
 offload/unittests/Conformance/CMakeLists.txt  |   7 +-
 .../Conformance/device_code/CMakeLists.txt    |   6 +-
 .../Conformance/device_code/LLVMLibm.c        |  30 +++
 .../unittests/Conformance/device_code/sin.c   |   4 -
 .../include/mathtest/DeviceContext.hpp        | 121 +++++++++
 .../include/mathtest/DeviceResources.hpp      | 129 ++++++++++
 .../Conformance/include/mathtest/Dim.hpp      |  42 ++++
 .../include/mathtest/ErrorHandling.hpp        |  29 +++
 .../include/mathtest/ExhaustiveGenerator.hpp  | 139 +++++++++++
 .../include/mathtest/GpuMathTest.hpp          | 159 ++++++++++++
 .../include/mathtest/HostRefChecker.hpp       |  82 ++++++
 .../include/mathtest/IndexedRange.hpp         |  91 +++++++
 .../include/mathtest/InputGenerator.hpp       |  14 ++
 .../Conformance/include/mathtest/Numerics.hpp | 235 ++++++++++++++++++
 .../include/mathtest/OffloadForward.hpp       |  22 ++
 .../Conformance/include/mathtest/Support.hpp  | 138 ++++++++++
 .../include/mathtest/TestResult.hpp           |  70 ++++++
 .../include/mathtest/TestRunner.hpp           | 118 +++++++++
 .../include/mathtest/TypeExtras.hpp           |   9 +
 .../unittests/Conformance/lib/CMakeLists.txt  |   5 +
 .../Conformance/lib/DeviceContext.cpp         | 201 +++++++++++++++
 .../Conformance/lib/DeviceResources.cpp       |  54 ++++
 .../Conformance/lib/ErrorHandling.cpp         |  37 +++
 offload/unittests/Conformance/sin.cpp         |   8 -
 .../Conformance/tests/CMakeLists.txt          |   2 +
 .../Conformance/tests/Hypotf16Test.cpp        |  51 ++++
 .../unittests/Conformance/tests/LogfTest.cpp  |  44 ++++
 28 files changed, 1834 insertions(+), 24 deletions(-)
 create mode 100644 offload/unittests/Conformance/device_code/LLVMLibm.c
 delete mode 100644 offload/unittests/Conformance/device_code/sin.c
 create mode 100644 offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Dim.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Numerics.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/Support.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TestResult.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TestRunner.hpp
 create mode 100644 offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
 create mode 100644 offload/unittests/Conformance/lib/CMakeLists.txt
 create mode 100644 offload/unittests/Conformance/lib/DeviceContext.cpp
 create mode 100644 offload/unittests/Conformance/lib/DeviceResources.cpp
 create mode 100644 offload/unittests/Conformance/lib/ErrorHandling.cpp
 delete mode 100644 offload/unittests/Conformance/sin.cpp
 create mode 100644 offload/unittests/Conformance/tests/CMakeLists.txt
 create mode 100644 offload/unittests/Conformance/tests/Hypotf16Test.cpp
 create mode 100644 offload/unittests/Conformance/tests/LogfTest.cpp

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 388d15f834b1d..bea49387528b0 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -39,9 +39,9 @@ function(add_offload_test_device_code test_filename test_name)
       add_custom_command(
         OUTPUT ${output_file}
         COMMAND ${CMAKE_C_COMPILER}
-        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
-        -c ${SRC_PATH} -o ${output_file}
+        --target=nvptx64-nvidia-cuda -march=native
+        -stdlib -nogpulib --cuda-path=${CUDA_ROOT} -flto -fno-builtin ${ARGN}
+        ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
       )
       add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
@@ -62,6 +62,8 @@ function(add_offload_test_device_code test_filename test_name)
       set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
       add_custom_command(
         OUTPUT ${output_file}
+        # TODO(jhuber6): Add and test the '-stdlib' flag here; also consider
+        #                the '-fno-builtin' flag.
         COMMAND ${CMAKE_C_COMPILER}
         --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
         -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
@@ -106,10 +108,9 @@ function(add_conformance_test test_name)
   endif()
 
   add_executable(${target_name} ${files})
-  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} ${test_name}.bin)
+  add_dependencies(${target_name} conformance_device_binaries)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${CONFORMANCE_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON} libc)
-  target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
   set_target_properties(${target_name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
 
   add_custom_target(offload.conformance.${test_name}
diff --git a/offload/unittests/Conformance/CMakeLists.txt b/offload/unittests/Conformance/CMakeLists.txt
index bc3141757372a..ce0421553de05 100644
--- a/offload/unittests/Conformance/CMakeLists.txt
+++ b/offload/unittests/Conformance/CMakeLists.txt
@@ -1,8 +1,7 @@
 add_custom_target(offload.conformance)
 
-set(PLUGINS_TEST_COMMON LLVMOffload LLVMSupport)
-set(PLUGINS_TEST_INCLUDE ${LIBOMPTARGET_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/common)
+set(PLUGINS_TEST_COMMON MathTest)
 
 add_subdirectory(device_code)
-
-add_conformance_test(sin sin.cpp)
+add_subdirectory(lib)
+add_subdirectory(tests)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 223f04ccfb698..82c6ec9767562 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,4 @@
-# FIXME: Currently missing dependencies to build GPU portion automatically.
-add_offload_test_device_code(sin.c sin)
+add_offload_test_device_code(LLVMLibm.c LLVMLibm)
 
-set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
+add_custom_target(conformance_device_binaries DEPENDS LLVMLibm.bin)
+set(CONFORMANCE_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.c
new file mode 100644
index 0000000000000..5f436a235eb08
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.c
@@ -0,0 +1,30 @@
+#include <gpuintrin.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __FLT16_MAX__
+#define HAS_FLOAT16
+typedef _Float16 float16;
+#endif
+
+#ifdef HAS_FLOAT16
+__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
+                                 size_t NumElements) {
+  uint32_t Index =
+      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+
+  if (Index < NumElements) {
+    Out[Index] = hypotf16(X[Index], Y[Index]);
+  }
+}
+#endif
+
+__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
+  uint32_t Index =
+      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+
+  if (Index < NumElements) {
+    Out[Index] = logf(X[Index]);
+  }
+}
diff --git a/offload/unittests/Conformance/device_code/sin.c b/offload/unittests/Conformance/device_code/sin.c
deleted file mode 100644
index e969e60f352a2..0000000000000
--- a/offload/unittests/Conformance/device_code/sin.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include <gpuintrin.h>
-#include <math.h>
-
-__gpu_kernel void kernel(double *out) { *out = sin(*out); }
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
new file mode 100644
index 0000000000000..74ef83ce0c195
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/DeviceContext.hpp
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "mathtest/DeviceResources.hpp"
+#include "mathtest/Dim.hpp"
+#include "mathtest/ErrorHandling.hpp"
+#include "mathtest/Support.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace mathtest {
+
+std::size_t countDevices();
+
+namespace detail {
+
+void allocManagedMemory(ol_device_handle_t DeviceHandle, std::size_t Size,
+                        void **AllocationOut) noexcept;
+} // namespace detail
+
+class DeviceContext {
+  // For simplicity, the current design of this class doesn't have support for
+  // asynchronous operations and all types of memory allocation.
+  //
+  // Other use cases could benefit from operations like enqueued kernel launch
+  // and enqueued memcpy, as well as device and host memory allocation.
+
+public:
+  // TODO: Add a constructor that also takes a 'Provider'.
+  explicit DeviceContext(std::size_t DeviceId = 0);
+
+  template <typename T>
+  ManagedBuffer<T> createManagedBuffer(std::size_t Size) const noexcept {
+    void *UntypedAddress = nullptr;
+
+    detail::allocManagedMemory(DeviceHandle, Size * sizeof(T), &UntypedAddress);
+    T *TypedAddress = static_cast<T *>(UntypedAddress);
+
+    return ManagedBuffer<T>(TypedAddress, Size);
+  }
+
+  [[nodiscard]] std::shared_ptr<DeviceImage>
+  loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
+             llvm::StringRef Extension) const;
+
+  [[nodiscard]] std::shared_ptr<DeviceImage>
+  loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName) const;
+
+  template <typename KernelSignature>
+  DeviceKernel<KernelSignature>
+  getKernel(const std::shared_ptr<DeviceImage> &Image,
+            llvm::StringRef KernelName) const noexcept {
+    assert(Image && "Image provided to getKernel is null");
+
+    if (Image->DeviceHandle != this->DeviceHandle) {
+      FATAL_ERROR("Image provided to getKernel was created for a different "
+                  "device");
+    }
+
+    ol_symbol_handle_t KernelHandle = nullptr;
+    getKernelImpl(Image->Handle, KernelName, &KernelHandle);
+
+    return DeviceKernel<KernelSignature>(Image, KernelHandle);
+  }
+
+  template <typename KernelSignature, typename... ArgTypes>
+  void launchKernel(DeviceKernel<KernelSignature> Kernel, Dim NumGroups,
+                    Dim GroupSize, ArgTypes &&...Args) const noexcept {
+    using ExpectedTypes =
+        typename FunctionTypeTraits<KernelSignature>::ArgTypesTuple;
+    using ProvidedTypes = std::tuple<std::decay_t<ArgTypes>...>;
+
+    static_assert(std::is_same_v<ExpectedTypes, ProvidedTypes>,
+                  "Argument types provided to launchKernel do not match the "
+                  "kernel's signature");
+
+    if (Kernel.Image->DeviceHandle != DeviceHandle) {
+      FATAL_ERROR("Kernel provided to launchKernel was created for a different "
+                  "device");
+    }
+
+    if constexpr (sizeof...(Args) == 0) {
+      launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, nullptr, 0);
+    } else {
+      auto KernelArgs = makeKernelArgsPack(std::forward<ArgTypes>(Args)...);
+
+      static_assert(
+          (std::is_trivially_copyable_v<std::decay_t<ArgTypes>> && ...),
+          "Argument types provided to launchKernel must be trivially copyable");
+
+      launchKernelImpl(Kernel.Handle, NumGroups, GroupSize, &KernelArgs,
+                       sizeof(KernelArgs));
+    }
+  }
+
+  [[nodiscard]] std::size_t getId() const noexcept { return DeviceId; }
+
+  [[nodiscard]] std::string getName() const;
+
+  [[nodiscard]] std::string getPlatform() const;
+
+private:
+  void getKernelImpl(ol_program_handle_t ProgramHandle,
+                     llvm::StringRef KernelName,
+                     ol_symbol_handle_t *KernelHandle) const noexcept;
+
+  void launchKernelImpl(ol_symbol_handle_t KernelHandle, const Dim &NumGroups,
+                        const Dim &GroupSize, const void *KernelArgs,
+                        std::size_t KernelArgsSize) const noexcept;
+
+  std::size_t DeviceId;
+  ol_device_handle_t DeviceHandle;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
new file mode 100644
index 0000000000000..51f7662ef548e
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/DeviceResources.hpp
@@ -0,0 +1,129 @@
+#pragma once
+
+#include "mathtest/OffloadForward.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace mathtest {
+
+class DeviceContext;
+
+namespace detail {
+
+void freeDeviceMemory(void *Address) noexcept;
+} // namespace detail
+
+//===----------------------------------------------------------------------===//
+// ManagedBuffer
+//===----------------------------------------------------------------------===//
+
+template <typename T> class [[nodiscard]] ManagedBuffer {
+public:
+  ~ManagedBuffer() noexcept {
+    if (Address) {
+      detail::freeDeviceMemory(Address);
+    }
+  }
+
+  ManagedBuffer(const ManagedBuffer &) = delete;
+  ManagedBuffer &operator=(const ManagedBuffer &) = delete;
+
+  ManagedBuffer(ManagedBuffer &&Other) noexcept
+      : Address(Other.Address), Size(Other.Size) {
+    Other.Address = nullptr;
+    Other.Size = 0;
+  }
+
+  ManagedBuffer &operator=(ManagedBuffer &&Other) noexcept {
+    if (this == &Other)
+      return *this;
+
+    if (Address) {
+      detail::freeDeviceMemory(Address);
+    }
+
+    Address = Other.Address;
+    Size = Other.Size;
+
+    Other.Address = nullptr;
+    Other.Size = 0;
+
+    return *this;
+  }
+
+  [[nodiscard]] T *data() noexcept { return Address; }
+
+  [[nodiscard]] const T *data() const noexcept { return Address; }
+
+  [[nodiscard]] std::size_t getSize() const noexcept { return Size; }
+
+  [[nodiscard]] operator llvm::MutableArrayRef<T>() noexcept {
+    return llvm::MutableArrayRef<T>(data(), getSize());
+  }
+
+  [[nodiscard]] operator llvm::ArrayRef<T>() const noexcept {
+    return llvm::ArrayRef<T>(data(), getSize());
+  }
+
+private:
+  friend class DeviceContext;
+
+  explicit ManagedBuffer(T *Address, std::size_t Size) noexcept
+      : Address(Address), Size(Size) {}
+
+  T *Address = nullptr;
+  std::size_t Size = 0;
+};
+
+//===----------------------------------------------------------------------===//
+// DeviceImage
+//===----------------------------------------------------------------------===//
+
+class [[nodiscard]] DeviceImage {
+public:
+  ~DeviceImage() noexcept;
+  DeviceImage &operator=(DeviceImage &&Other) noexcept;
+
+  DeviceImage(const DeviceImage &) = delete;
+  DeviceImage &operator=(const DeviceImage &) = delete;
+
+  DeviceImage(DeviceImage &&Other) noexcept;
+
+private:
+  friend class DeviceContext;
+
+  explicit DeviceImage(ol_device_handle_t DeviceHandle,
+                       ol_program_handle_t Handle) noexcept;
+
+  ol_device_handle_t DeviceHandle = nullptr;
+  ol_program_handle_t Handle = nullptr;
+};
+
+//===----------------------------------------------------------------------===//
+// DeviceKernel
+//===----------------------------------------------------------------------===//
+
+template <typename KernelSignature> class [[nodiscard]] DeviceKernel {
+public:
+  DeviceKernel() = delete;
+
+  DeviceKernel(const DeviceKernel &) = default;
+  DeviceKernel &operator=(const DeviceKernel &) = default;
+  DeviceKernel(DeviceKernel &&) noexcept = default;
+  DeviceKernel &operator=(DeviceKernel &&) noexcept = default;
+
+private:
+  friend class DeviceContext;
+
+  explicit DeviceKernel(std::shared_ptr<DeviceImage> Image,
+                        ol_symbol_handle_t Kernel)
+      : Image(std::move(Image)), Handle(Kernel) {}
+
+  std::shared_ptr<DeviceImage> Image;
+  ol_symbol_handle_t Handle = nullptr;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/Dim.hpp b/offload/unittests/Conformance/include/mathtest/Dim.hpp
new file mode 100644
index 0000000000000..948c10e94cbb3
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Dim.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <initializer_list>
+
+namespace mathtest {
+
+class Dim {
+public:
+  Dim() = delete;
+
+  constexpr Dim(uint32_t X, uint32_t Y = 1, uint32_t Z = 1) noexcept
+      : Data{X, Y, Z} {
+    assert(X > 0 && Y > 0 && Z > 0 && "Dimensions must be positive");
+  }
+
+  constexpr Dim(std::initializer_list<uint32_t> Dimensions) noexcept
+      : Data{1, 1, 1} {
+    assert(Dimensions.size() <= 3 &&
+           "The number of dimensions must be less than or equal to 3");
+
+    std::size_t Index = 0;
+    for (uint32_t DimValue : Dimensions) {
+      Data[Index++] = DimValue;
+    }
+
+    assert(Data[0] > 0 && Data[1] > 0 && Data[2] > 0 &&
+           "Dimensions must be positive");
+  }
+
+  [[nodiscard]] constexpr uint32_t
+  operator[](std::size_t Index) const noexcept {
+    assert(Index < 3 && "Index is out of range");
+    return Data[Index];
+  }
+
+private:
+  uint32_t Data[3];
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
new file mode 100644
index 0000000000000..932aa79e4c902
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/ErrorHandling.hpp
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "mathtest/OffloadForward.hpp"
+
+#include "llvm/ADT/Twine.h"
+
+#define FATAL_ERROR(Message)                                                   \
+  mathtest::detail::reportFatalError(Message, __FILE__, __LINE__, __func__)
+
+#define OL_CHECK(ResultExpr)                                                   \
+  do {                                                                         \
+    ol_result_t Result = (ResultExpr);                                         \
+    if (Result != OL_SUCCESS) {                                                \
+      mathtest::detail::reportOffloadError(#ResultExpr, Result, __FILE__,      \
+                                           __LINE__, __func__);                \
+    }                                                                          \
+  } while (false)
+
+namespace mathtest {
+namespace detail {
+
+[[noreturn]] void reportFatalError(const llvm::Twine &Message, const char *File,
+                                   int Line, const char *FuncName);
+
+[[noreturn]] void reportOffloadError(const char *ResultExpr, ol_result_t Result,
+                                     const char *File, int Line,
+                                     const char *FuncName);
+} // namespace detail
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
new file mode 100644
index 0000000000000..1725a5b35f358
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -0,0 +1,139 @@
+#pragma once
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/InputGenerator.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Parallel.h"
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename... InTypes>
+class [[nodiscard]] ExhaustiveGenerator final
+    : public InputGenerator<InTypes...> {
+  static constexpr std::size_t NumInputs = sizeof...(InTypes);
+  static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+
+public:
+  explicit constexpr ExhaustiveGenerator(
+      const IndexedRange<InTypes> &...Ranges) noexcept
+      : RangesTuple(Ranges...) {
+    bool Overflowed = getSizeWithOverflow(Ranges..., Size);
+
+    assert(!Overflowed && "The input space size is too large");
+    assert((Size > 0) && "The input space size must be at least 1");
+
+    IndexArrayType DimSizes = {};
+    {
+      std::size_t Index = 0;
+      ((DimSizes[Index++] = Ranges.getSize()), ...);
+    }
+
+    Strides[NumInputs - 1] = 1;
+    if constexpr (NumInputs > 1) {
+      for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index) {
+        Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
+      }
+    }
+  }
+
+  [[nodiscard]] std::size_t
+  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
+    const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
+    const std::size_t BufferSize = BufferSizes[0];
+    assert((BufferSize != 0) && "Buffer size cannot be zero");
+    assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
+                       [&](std::size_t Size) { return Size == BufferSize; }) &&
+           "All input buffers must have the same size");
+
+    uint64_t StartFlatIndex, BatchSize;
+    while (true) {
+      uint64_t CurrentFlatIndex =
+          FlatIndexGenerator.load(std::memory_order_relaxed);
+      if (CurrentFlatIndex >= Size)
+        return 0;
+
+      BatchSize = std::min<uint64_t>(BufferSize, Size - CurrentFlatIndex);
+      uint64_t NextFlatIndex = CurrentFlatIndex + BatchSize;
+
+      if (FlatIndexGenerator.compare_exchange_weak(
+              CurrentFlatIndex, NextFlatIndex,
+              std::memory_order_acq_rel, // Success
+              std::memory_order_acquire  // Failure
+              )) {
+        StartFlatIndex = CurrentFlatIndex;
+        break;
+      }
+    }
+
+    auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
+
+    llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
+      writeInputs(StartFlatIndex, Offset, BufferPtrsTuple);
+    });
+
+    return static_cast<std::size_t>(BatchSize);
+  }
+
+private:
+  using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
+  using IndexArrayType = std::array<uint64_t, NumInputs>;
+
+  static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
+                                  uint64_t &Size) noexcept {
+    Size = 1;
+    bool Overflowed = false;
+
+    auto Multiplier = [&](const uint64_t RangeSize) {
+      if (!Overflowed) {
+        Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
+      }
+    };
+
+    (Multiplier(Ranges.getSize()), ...);
+
+    return Overflowed;
+  }
+
+  template <typename BufferPtrsTupleType>
+  void writeInputs(uint64_t StartFlatIndex, uint64_t Offset,
+                   BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    auto NDIndex = getNDIndex(StartFlatIndex + Offset);
+    writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
+  }
+
+  constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
+    IndexArrayType NDIndex;
+
+    for (std::size_t Index = 0; Index < NumInputs; ++Index) {
+      NDIndex[Index] = FlatIndex / Strides[Index];
+      FlatIndex -= NDIndex[Index] * Strides[Index];
+    }
+
+    return NDIndex;
+  }
+
+  template <std::size_t Index, typename BufferPtrsTupleType>
+  void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
+                       BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+    if constexpr (Index < NumInputs) {
+      const auto &Range = std::get<Index>(RangesTuple);
+      std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
+      writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
+    }
+  }
+
+  uint64_t Size = 1;
+  RangesTupleType RangesTuple;
+  IndexArrayType Strides = {};
+  std::atomic<uint64_t> FlatIndexGenerator = 0;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
new file mode 100644
index 0000000000000..e5d1c6c77f634
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/GpuMathTest.hpp
@@ -0,0 +1,159 @@
+#pragma once
+
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/DeviceResources.hpp"
+#include "mathtest/HostRefChecker.hpp"
+#include "mathtest/InputGenerator.hpp"
+#include "mathtest/Support.hpp"
+#include "mathtest/TestResult.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <auto Func, typename Checker = HostRefChecker<Func>>
+class [[nodiscard]] GpuMathTest final {
+  using FunctionTraits = FunctionTraits<Func>;
+  using OutType = typename FunctionTraits::ReturnType;
+  using InTypesTuple = typename FunctionTraits::ArgTypesTuple;
+
+  template <typename... Ts>
+  using PartialResultType = TestResult<OutType, Ts...>;
+  using KernelSignature = KernelSignatureOf_t<Func>;
+
+  template <typename... Ts>
+  using TypeIdentitiesTuple = std::tuple<TypeIdentityOf<Ts>...>;
+  using InTypeIdentitiesTuple =
+      ApplyTupleTypes_t<InTypesTuple, TypeIdentitiesTuple>;
+
+  static constexpr std::size_t DefaultBufferSize =
+      DefaultBufferSizeFor_v<OutType, InTypesTuple>;
+  static constexpr uint32_t DefaultGroupSize = 512;
+
+public:
+  using FunctionConfig = FunctionConfig<Func>;
+  using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
+  using GeneratorType = ApplyTupleTypes_t<InTypesTuple, InputGenerator>;
+
+  explicit GpuMathTest(std::shared_ptr<DeviceContext> Context,
+                       llvm::StringRef Provider,
+                       llvm::StringRef DeviceBinsDirectory)
+      : Context(std::move(Context)),
+        Kernel(getKernel(this->Context, Provider, DeviceBinsDirectory)) {
+    assert(this->Context && "Context must not be null");
+  }
+
+  ResultType run(GeneratorType &Generator,
+                 std::size_t BufferSize = DefaultBufferSize,
+                 uint32_t GroupSize = DefaultGroupSize) const noexcept {
+    assert(BufferSize > 0 && "Buffer size must be a positive value");
+    assert(GroupSize > 0 && "Group size must be a positive value");
+
+    auto [InBuffersTuple, OutBuffer] = createBuffers(BufferSize);
+    ResultType FinalResult;
+
+    while (true) {
+      const std::size_t BatchSize = std::apply(
+          [&](auto &...Buffers) { return Generator.fill(Buffers...); },
+          InBuffersTuple);
+
+      if (BatchSize == 0)
+        break;
+
+      const auto BatchResult =
+          processBatch(InBuffersTuple, OutBuffer, BatchSize, GroupSize);
+
+      FinalResult.accumulate(BatchResult);
+    }
+
+    return FinalResult;
+  }
+
+  [[nodiscard]] const DeviceContext &getContext() const noexcept {
+    assert(Context && "Context must not be null");
+    return *Context;
+  }
+
+private:
+  static DeviceKernel<KernelSignature>
+  getKernel(const std::shared_ptr<DeviceContext> &Context,
+            llvm::StringRef Provider,
+            llvm::StringRef DeviceBinsDirectory) noexcept {
+    llvm::StringRef BinaryName = llvm::StringSwitch<llvm::StringRef>(Provider)
+                                     .Case("llvm-libm", "LLVMLibm")
+                                     .Default("");
+
+    if (BinaryName.empty()) {
+      FATAL_ERROR(llvm::Twine("Unsupported provider: '") + Provider + "'");
+    }
+
+    const auto Image = Context->loadBinary(DeviceBinsDirectory, BinaryName);
+
+    return Context->getKernel<KernelSignature>(Image,
+                                               FunctionConfig::KernelName);
+  }
+
+  [[nodiscard]] auto createBuffers(std::size_t BufferSize) const {
+    auto InBuffersTuple = std::apply(
+        [&](auto... InTypeIdentities) {
+          return std::make_tuple(
+              Context->createManagedBuffer<
+                  typename decltype(InTypeIdentities)::type>(BufferSize)...);
+        },
+        InTypeIdentitiesTuple{});
+    auto OutBuffer = Context->createManagedBuffer<OutType>(BufferSize);
+
+    return std::make_pair(std::move(InBuffersTuple), std::move(OutBuffer));
+  }
+
+  template <typename InBuffersTupleType>
+  [[nodiscard]] ResultType
+  processBatch(const InBuffersTupleType &InBuffersTuple,
+               ManagedBuffer<OutType> &OutBuffer, std::size_t BatchSize,
+               uint32_t GroupSize) const noexcept {
+    const uint32_t NumGroups = (BatchSize + GroupSize - 1) / GroupSize;
+    const auto KernelArgsTuple = std::apply(
+        [&](const auto &...InBuffers) {
+          return std::make_tuple(InBuffers.data()..., OutBuffer.data(),
+                                 BatchSize);
+        },
+        InBuffersTuple);
+
+    std::apply(
+        [&](const auto &...KernelArgs) {
+          Context->launchKernel(Kernel, NumGroups, GroupSize, KernelArgs...);
+        },
+        KernelArgsTuple);
+
+    return check(InBuffersTuple, OutBuffer, BatchSize);
+  }
+
+  template <typename InBuffersTupleType>
+  [[nodiscard]] static ResultType
+  check(const InBuffersTupleType &InBuffersTuple,
+        const ManagedBuffer<OutType> &OutBuffer,
+        std::size_t BatchSize) noexcept {
+    const auto InViewsTuple = std::apply(
+        [&](auto &...InBuffers) {
+          return std::make_tuple(
+              llvm::ArrayRef(InBuffers.data(), BatchSize)...);
+        },
+        InBuffersTuple);
+    const auto OutView = llvm::ArrayRef<OutType>(OutBuffer.data(), BatchSize);
+
+    return Checker::check(InViewsTuple, OutView);
+  }
+
+  std::shared_ptr<DeviceContext> Context;
+  DeviceKernel<KernelSignature> Kernel;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
new file mode 100644
index 0000000000000..c45137d652df2
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/HostRefChecker.hpp
@@ -0,0 +1,82 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+#include "mathtest/Support.hpp"
+#include "mathtest/TestResult.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/Support/Parallel.h"
+
+#include <cstddef>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <auto Func> class HostRefChecker {
+  using FuncTraits = FunctionTraits<Func>;
+  using InTypesTuple = typename FuncTraits::ArgTypesTuple;
+
+  using FuncConfig = FunctionConfig<Func>;
+
+  template <typename... Ts>
+  using BuffersTupleType = std::tuple<llvm::ArrayRef<Ts>...>;
+
+public:
+  using OutType = typename FuncTraits::ReturnType;
+
+private:
+  template <typename... Ts>
+  using PartialResultType = TestResult<OutType, Ts...>;
+
+public:
+  using ResultType = ApplyTupleTypes_t<InTypesTuple, PartialResultType>;
+  using InBuffersTupleType = ApplyTupleTypes_t<InTypesTuple, BuffersTupleType>;
+
+  HostRefChecker() = delete;
+
+  static ResultType check(InBuffersTupleType InBuffersTuple,
+                          llvm::ArrayRef<OutType> OutBuffer) noexcept {
+    const std::size_t BufferSize = OutBuffer.size();
+    std::apply(
+        [&](const auto &...InBuffers) {
+          assert(
+              ((InBuffers.size() == BufferSize) && ...) &&
+              "All input buffers must have the same size as the output buffer");
+        },
+        InBuffersTuple);
+
+    assert((BufferSize != 0) && "Buffer size cannot be zero");
+
+    ResultType Init;
+
+    auto Transform = [&](std::size_t Index) {
+      auto CurrentInputsTuple = std::apply(
+          [&](const auto &...InBuffers) {
+            return std::make_tuple(InBuffers[Index]...);
+          },
+          InBuffersTuple);
+
+      const OutType Actual = OutBuffer[Index];
+      const OutType Expected = std::apply(Func, CurrentInputsTuple);
+
+      const auto UlpDistance = computeUlpDistance(Actual, Expected);
+      const bool IsFailure = UlpDistance > FuncConfig::UlpTolerance;
+
+      return ResultType(UlpDistance, IsFailure,
+                        typename ResultType::TestCase(
+                            std::move(CurrentInputsTuple), Actual, Expected));
+    };
+
+    auto Reduce = [](ResultType A, const ResultType &B) {
+      A.accumulate(B);
+      return A;
+    };
+
+    const auto Indexes = llvm::seq(BufferSize);
+    return llvm::parallelTransformReduce(Indexes.begin(), Indexes.end(), Init,
+                                         Reduce, Transform);
+  }
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
new file mode 100644
index 0000000000000..0f33978c8d30d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/IndexedRange.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace mathtest {
+
+template <typename T> class [[nodiscard]] IndexedRange {
+  static_assert(IsFloatingPoint_v<T> || std::is_integral_v<T>,
+                "Type T must be an integral or floating-point type");
+  static_assert(sizeof(T) <= sizeof(uint64_t),
+                "Type T must be no wider than uint64_t");
+
+public:
+  constexpr IndexedRange() noexcept
+      : IndexedRange(getMinOrNegInf<T>(), getMaxOrInf<T>(), true) {}
+
+  explicit constexpr IndexedRange(T Begin, T End, bool Inclusive) noexcept
+      : MappedBegin(mapToOrderedUnsigned(Begin)),
+        MappedEnd(mapToOrderedUnsigned(End)) {
+    if (Inclusive) {
+      assert((Begin <= End) && "Begin must be less than or equal to End");
+    } else {
+      assert((Begin < End) && "Begin must be less than End");
+      --MappedEnd;
+    }
+
+    assert(((MappedEnd - MappedBegin) < std::numeric_limits<uint64_t>::max()) &&
+           "The range is too large to index");
+  }
+
+  [[nodiscard]] constexpr uint64_t getSize() const noexcept {
+    return static_cast<uint64_t>(MappedEnd) - MappedBegin + 1;
+  }
+
+  [[nodiscard]] constexpr T operator[](uint64_t Index) const noexcept {
+    assert((Index < getSize()) && "Index is out of range");
+
+    StorageType MappedValue = MappedBegin + Index;
+    return mapFromOrderedUnsigned(MappedValue);
+  }
+
+private:
+  using StorageType = StorageTypeOf_t<T>;
+
+  // Linearise T values into an ordered unsigned space:
+  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  //  * The difference |map(a) − map(b)| equals the number of representable
+  //    values between a and b within the same type
+  static constexpr StorageType mapToOrderedUnsigned(T Value) {
+    if constexpr (IsFloatingPoint_v<T>) {
+      StorageType SignMask = FPUtils<T>::SignMask;
+      StorageType Bits = FPUtils<T>::getAsBits(Value);
+      return (Bits & SignMask) ? SignMask - (Bits - SignMask) - 1
+                               : SignMask + Bits;
+    }
+
+    if constexpr (std::is_signed_v<T>) {
+      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      return __builtin_bit_cast(StorageType, Value) ^ SignMask;
+    }
+
+    return Value;
+  }
+
+  static constexpr T mapFromOrderedUnsigned(StorageType MappedValue) {
+    if constexpr (IsFloatingPoint_v<T>) {
+      StorageType SignMask = FPUtils<T>::SignMask;
+      StorageType Bits = (MappedValue < SignMask)
+                             ? (SignMask - MappedValue) + SignMask - 1
+                             : MappedValue - SignMask;
+
+      return FPUtils<T>::createFromBits(Bits);
+    }
+
+    if constexpr (std::is_signed_v<T>) {
+      StorageType SignMask = maskLeadingOnes<StorageType, 1>();
+      return __builtin_bit_cast(T, MappedValue ^ SignMask);
+    }
+
+    return MappedValue;
+  }
+
+  StorageType MappedBegin;
+  StorageType MappedEnd;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
new file mode 100644
index 0000000000000..d9365d4b14423
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/InputGenerator.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace mathtest {
+
+template <typename... InTypes> class InputGenerator {
+public:
+  virtual ~InputGenerator() noexcept = default;
+
+  [[nodiscard]] virtual size_t
+  fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept = 0;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/Numerics.hpp b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
new file mode 100644
index 0000000000000..f43a26974dda7
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Numerics.hpp
@@ -0,0 +1,235 @@
+#pragma once
+
+#include "mathtest/Support.hpp"
+#include "mathtest/TypeExtras.hpp"
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <math.h>
+#include <type_traits>
+
+namespace mathtest {
+
+//===----------------------------------------------------------------------===//
+// Type Traits
+//===----------------------------------------------------------------------===//
+
+template <typename T> struct StorageTypeOf {
+private:
+  static constexpr auto getStorageType() noexcept {
+    if constexpr (std::is_unsigned_v<T>) {
+      return TypeIdentityOf<T>{};
+    } else if constexpr (std::is_signed_v<T>) {
+      return TypeIdentityOf<std::make_unsigned_t<T>>{};
+    } else {
+      static_assert(!std::is_same_v<T, T>, "Unsupported type");
+    }
+  }
+
+public:
+  using type = typename decltype(getStorageType())::type;
+};
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct StorageTypeOf<float16> {
+  using type = uint16_t;
+};
+#endif
+
+template <> struct StorageTypeOf<float> {
+  using type = uint32_t;
+};
+
+template <> struct StorageTypeOf<double> {
+  using type = uint64_t;
+};
+
+template <typename T> using StorageTypeOf_t = typename StorageTypeOf<T>::type;
+
+template <typename T> struct IsFloatingPoint : std::is_floating_point<T> {};
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct IsFloatingPoint<float16> : std::true_type {};
+#endif
+
+template <typename T>
+inline constexpr bool IsFloatingPoint_v // NOLINT(readability-identifier-naming)
+    = IsFloatingPoint<T>::value;
+
+//===----------------------------------------------------------------------===//
+// Bitmask Utilities
+//===----------------------------------------------------------------------===//
+
+template <typename UIntType, std::size_t Count>
+[[nodiscard]] constexpr UIntType maskLeadingOnes() noexcept {
+  static_assert(std::is_unsigned_v<UIntType>,
+                "UIntType must be an unsigned integer type");
+
+  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
+  static_assert(
+      Count <= TotalBits,
+      "Count must be less than or equal to the bit width of UIntType");
+
+  return Count == 0 ? UIntType(0) : (~UIntType(0) << (TotalBits - Count));
+  ;
+}
+
+template <typename UIntType, std::size_t Count>
+[[nodiscard]] constexpr UIntType maskTrailingOnes() noexcept {
+  static_assert(std::is_unsigned_v<UIntType>,
+                "UIntType must be an unsigned integer type");
+
+  constexpr unsigned TotalBits = CHAR_BIT * sizeof(UIntType);
+  static_assert(
+      Count <= TotalBits,
+      "Count must be less than or equal to the bit width of UIntType");
+
+  return Count == 0 ? UIntType(0) : (~UIntType(0) >> (TotalBits - Count));
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Utilities
+//===----------------------------------------------------------------------===//
+
+template <typename FloatType> struct FPLayout;
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> struct FPLayout<float16> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 5;
+  static constexpr std::size_t FractionLen = 10;
+};
+#endif
+
+template <> struct FPLayout<float> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 8;
+  static constexpr std::size_t FractionLen = 23;
+};
+
+template <> struct FPLayout<double> {
+  static constexpr std::size_t SignLen = 1;
+  static constexpr std::size_t ExponentLen = 11;
+  static constexpr std::size_t FractionLen = 52;
+};
+
+template <typename FloatType> struct FPUtils : public FPLayout<FloatType> {
+  using FPLayout = FPLayout<FloatType>;
+  using StorageType = StorageTypeOf_t<FloatType>;
+  using FPLayout::ExponentLen;
+  using FPLayout::FractionLen;
+  using FPLayout::SignLen;
+
+  static constexpr StorageType SignMask =
+      maskTrailingOnes<StorageType, SignLen>() << (ExponentLen + FractionLen);
+
+  FPUtils() = delete;
+
+  [[nodiscard]] static constexpr FloatType
+  createFromBits(StorageType Bits) noexcept {
+    return __builtin_bit_cast(FloatType, Bits);
+  }
+
+  [[nodiscard]] static constexpr StorageType
+  getAsBits(FloatType Value) noexcept {
+    return __builtin_bit_cast(StorageType, Value);
+  }
+
+  [[nodiscard]] static constexpr bool isNaN(FloatType Value) noexcept {
+    return __builtin_isnan(Value);
+  }
+
+  [[nodiscard]] static constexpr bool getSignBit(FloatType Value) noexcept {
+    return getAsBits(Value) & SignMask;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Numeric Functions
+//===----------------------------------------------------------------------===//
+
+template <typename T> [[nodiscard]] constexpr T getMinOrNegInf() noexcept {
+  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
+
+  if constexpr (std::is_floating_point_v<T> &&
+                std::numeric_limits<T>::has_infinity) {
+    return -std::numeric_limits<T>::infinity();
+  }
+
+  return std::numeric_limits<T>::lowest();
+}
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> [[nodiscard]] constexpr float16 getMinOrNegInf<float16>() noexcept {
+  using StorageType = StorageTypeOf_t<float16>;
+
+  return __builtin_bit_cast(float16, static_cast<StorageType>(0xFC00U));
+}
+#endif
+
+template <typename T> [[nodiscard]] constexpr T getMaxOrInf() noexcept {
+  static_assert(std::is_arithmetic_v<T>, "Type T must be an arithmetic type");
+
+  if constexpr (std::is_floating_point_v<T> &&
+                std::numeric_limits<T>::has_infinity) {
+    return std::numeric_limits<T>::infinity();
+  }
+
+  return std::numeric_limits<T>::max();
+}
+
+#ifdef MATHTEST_HAS_FLOAT16
+template <> [[nodiscard]] constexpr float16 getMaxOrInf<float16>() noexcept {
+  using StorageType = StorageTypeOf_t<float16>;
+
+  return __builtin_bit_cast(float16, static_cast<StorageType>(0x7C00U));
+}
+#endif
+
+template <typename FloatType>
+[[nodiscard]] uint64_t computeUlpDistance(FloatType X, FloatType Y) noexcept {
+  static_assert(IsFloatingPoint_v<FloatType>,
+                "FloatType must be a floating-point type");
+  using FPUtils = FPUtils<FloatType>;
+  using StorageType = typename FPUtils::StorageType;
+
+  if (X == Y) {
+    if (FPUtils::getSignBit(X) != FPUtils::getSignBit(Y)) [[unlikely]] {
+      // When X == Y, different sign bits imply that X and Y are +0.0 and -0.0
+      // (in any order). Since we want to treat them as unequal in the context
+      // of accuracy testing of mathematical functions, we return the smallest
+      // non-zero value
+      return 1;
+    }
+    return 0;
+  }
+
+  const bool XIsNaN = FPUtils::isNaN(X);
+  const bool YIsNaN = FPUtils::isNaN(Y);
+
+  if (XIsNaN && YIsNaN) {
+    return 0;
+  }
+  if (XIsNaN || YIsNaN) {
+    return std::numeric_limits<uint64_t>::max();
+  }
+
+  constexpr StorageType SignMask = FPUtils::SignMask;
+
+  // Linearise FloatType values into an ordered unsigned space:
+  //  * The mapping is monotonic: a >= b if, and only if, map(a) >= map(b)
+  //  * The difference |map(a) − map(b)| equals the number of std::nextafter
+  //    steps between a and b within the same type
+  auto MapToOrderedUnsigned = [](FloatType Value) {
+    const StorageType Bits = FPUtils::getAsBits(Value);
+    return (Bits & SignMask) ? SignMask - (Bits - SignMask) : SignMask + Bits;
+  };
+
+  const StorageType MappedX = MapToOrderedUnsigned(X);
+  const StorageType MappedY = MapToOrderedUnsigned(Y);
+  return static_cast<uint64_t>(MappedX > MappedY ? MappedX - MappedY
+                                                 : MappedY - MappedX);
+}
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
new file mode 100644
index 0000000000000..099b86af2929d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/OffloadForward.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ol_error_struct_t;
+typedef const ol_error_struct_t *ol_result_t;
+#define OL_SUCCESS (static_cast<ol_result_t>(nullptr))
+
+struct ol_device_impl_t;
+typedef struct ol_device_impl_t *ol_device_handle_t;
+
+struct ol_program_impl_t;
+typedef struct ol_program_impl_t *ol_program_handle_t;
+
+struct ol_symbol_impl_t;
+typedef struct ol_symbol_impl_t *ol_symbol_handle_t;
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/offload/unittests/Conformance/include/mathtest/Support.hpp b/offload/unittests/Conformance/include/mathtest/Support.hpp
new file mode 100644
index 0000000000000..e2b41f9dec9ab
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/Support.hpp
@@ -0,0 +1,138 @@
+#pragma once
+
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace mathtest {
+
+//===----------------------------------------------------------------------===//
+// Function & Type Traits
+//===----------------------------------------------------------------------===//
+
+namespace detail {
+
+template <typename T> struct FunctionTraitsImpl;
+
+template <typename RetType, typename... ArgTypes>
+struct FunctionTraitsImpl<RetType(ArgTypes...)> {
+  using ReturnType = RetType;
+  using ArgTypesTuple = std::tuple<ArgTypes...>;
+};
+
+template <typename RetType, typename... ArgTypes>
+struct FunctionTraitsImpl<RetType(ArgTypes...) noexcept>
+    : FunctionTraitsImpl<RetType(ArgTypes...)> {};
+
+template <typename FuncType>
+struct FunctionTraitsImpl<FuncType *> : FunctionTraitsImpl<FuncType> {};
+} // namespace detail
+
+template <auto Func>
+using FunctionTraits = detail::FunctionTraitsImpl<
+    std::remove_pointer_t<std::decay_t<decltype(Func)>>>;
+
+template <typename FuncType>
+using FunctionTypeTraits = detail::FunctionTraitsImpl<FuncType>;
+
+template <typename T> struct TypeIdentityOf {
+  using type = T;
+};
+
+template <typename TupleTypes, template <typename...> class Template>
+struct ApplyTupleTypes;
+
+template <template <typename...> class Template, typename... Ts>
+struct ApplyTupleTypes<std::tuple<Ts...>, Template> {
+  using type = Template<Ts...>;
+};
+
+template <typename TupleTypes, template <typename...> class Template>
+using ApplyTupleTypes_t = typename ApplyTupleTypes<TupleTypes, Template>::type;
+
+namespace detail {
+
+template <typename T> struct KernelSignatureOfImpl;
+
+template <typename RetType, typename... ArgTypes>
+struct KernelSignatureOfImpl<RetType(ArgTypes...)> {
+  using type = void(const std::decay_t<ArgTypes> *..., RetType *, std::size_t);
+};
+
+template <typename RetType, typename... ArgTypes>
+struct KernelSignatureOfImpl<RetType(ArgTypes...) noexcept>
+    : KernelSignatureOfImpl<RetType(ArgTypes...)> {};
+} // namespace detail
+
+template <auto Func>
+using KernelSignatureOf = detail::KernelSignatureOfImpl<
+    std::remove_pointer_t<std::decay_t<decltype(Func)>>>;
+
+template <auto Func>
+using KernelSignatureOf_t = typename KernelSignatureOf<Func>::type;
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Packing
+//===----------------------------------------------------------------------===//
+
+template <typename... ArgTypes> struct KernelArgsPack;
+
+template <typename ArgType> struct KernelArgsPack<ArgType> {
+  std::decay_t<ArgType> Arg;
+
+  constexpr KernelArgsPack(ArgType &&Arg) : Arg(std::forward<ArgType>(Arg)) {}
+};
+
+template <typename ArgType0, typename ArgType1, typename... ArgTypes>
+struct KernelArgsPack<ArgType0, ArgType1, ArgTypes...> {
+  std::decay_t<ArgType0> Arg0;
+  KernelArgsPack<ArgType1, ArgTypes...> Args;
+
+  constexpr KernelArgsPack(ArgType0 &&Arg0, ArgType1 &&Arg1, ArgTypes &&...Args)
+      : Arg0(std::forward<ArgType0>(Arg0)),
+        Args(std::forward<ArgType1>(Arg1), std::forward<ArgTypes>(Args)...) {}
+};
+
+template <typename... ArgTypes>
+KernelArgsPack<ArgTypes...> makeKernelArgsPack(ArgTypes &&...Args) {
+  return KernelArgsPack<ArgTypes...>(std::forward<ArgTypes>(Args)...);
+}
+
+//===----------------------------------------------------------------------===//
+// Configuration Helpers
+//===----------------------------------------------------------------------===//
+
+template <auto Func> struct FunctionConfig;
+
+namespace detail {
+
+template <typename... BufferTypes>
+static constexpr std::size_t getDefaultBufferSize() {
+  static_assert(sizeof...(BufferTypes) > 0,
+                "At least one buffer type must be provided");
+
+  constexpr std::size_t TotalMemoryInBytes = 512ULL << 20; // 512 MiB
+  constexpr std::size_t ElementTupleSize = (sizeof(BufferTypes) + ...);
+
+  static_assert(ElementTupleSize > 0,
+                "Cannot calculate buffer size for empty types");
+
+  return TotalMemoryInBytes / ElementTupleSize;
+}
+} // namespace detail
+
+template <typename BufferType, typename BufferTupleTypes>
+struct DefaultBufferSizeFor;
+
+template <typename BufferType, typename... BufferTypes>
+struct DefaultBufferSizeFor<BufferType, std::tuple<BufferTypes...>> {
+  static constexpr std::size_t value // NOLINT(readability-identifier-naming)
+      = detail::getDefaultBufferSize<BufferType, BufferTypes...>();
+};
+
+template <typename OutType, typename InTypesTuple>
+inline constexpr std::size_t
+    DefaultBufferSizeFor_v // NOLINT(readability-identifier-naming)
+    = DefaultBufferSizeFor<OutType, InTypesTuple>::value;
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TestResult.hpp b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
new file mode 100644
index 0000000000000..cdb4f2fa09fa1
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TestResult.hpp
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <tuple>
+#include <utility>
+
+namespace mathtest {
+
+template <typename OutType, typename... InTypes>
+class [[nodiscard]] TestResult {
+public:
+  struct [[nodiscard]] TestCase {
+    std::tuple<InTypes...> Inputs;
+    OutType Actual;
+    OutType Expected;
+
+    explicit constexpr TestCase(std::tuple<InTypes...> &&Inputs, OutType Actual,
+                                OutType Expected) noexcept
+        : Inputs(std::move(Inputs)), Actual(std::move(Actual)),
+          Expected(std::move(Expected)) {}
+  };
+
+  TestResult() = default;
+
+  explicit TestResult(uint64_t UlpDistance, bool IsFailure,
+                      TestCase &&Case) noexcept
+      : MaxUlpDistance(UlpDistance), FailureCount(IsFailure ? 1 : 0),
+        TestCaseCount(1) {
+    if (IsFailure) {
+      WorstFailingCase.emplace(std::move(Case));
+    }
+  }
+
+  void accumulate(const TestResult &Other) noexcept {
+    if (Other.MaxUlpDistance > MaxUlpDistance) {
+      MaxUlpDistance = Other.MaxUlpDistance;
+      WorstFailingCase = Other.WorstFailingCase;
+    }
+
+    FailureCount += Other.FailureCount;
+    TestCaseCount += Other.TestCaseCount;
+  }
+
+  [[nodiscard]] bool hasPassed() const noexcept { return FailureCount == 0; }
+
+  [[nodiscard]] uint64_t getMaxUlpDistance() const noexcept {
+    return MaxUlpDistance;
+  }
+
+  [[nodiscard]] uint64_t getFailureCount() const noexcept {
+    return FailureCount;
+  }
+
+  [[nodiscard]] uint64_t getTestCaseCount() const noexcept {
+    return TestCaseCount;
+  }
+
+  [[nodiscard]] const std::optional<TestCase> &
+  getWorstFailingCase() const noexcept {
+    return WorstFailingCase;
+  }
+
+private:
+  uint64_t MaxUlpDistance = 0;
+  uint64_t FailureCount = 0;
+  uint64_t TestCaseCount = 0;
+  std::optional<TestCase> WorstFailingCase;
+};
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
new file mode 100644
index 0000000000000..764642647e84b
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -0,0 +1,118 @@
+#pragma once
+
+#include "mathtest/Numerics.hpp"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <chrono>
+#include <tuple>
+
+namespace mathtest {
+namespace detail {
+
+template <typename T>
+void printValue(llvm::raw_ostream &OS, const T &Value) noexcept {
+  if constexpr (IsFloatingPoint_v<T>) {
+    using FPUtils = FPUtils<T>;
+
+    if constexpr (sizeof(T) < sizeof(float)) {
+      OS << float(Value);
+    } else {
+      OS << Value;
+    }
+
+    OS << llvm::formatv(" (0x{0})",
+                        llvm::Twine::utohexstr(FPUtils::getAsBits(Value)));
+  } else {
+    OS << Value;
+  }
+}
+
+template <typename... Ts>
+void printValues(llvm::raw_ostream &OS,
+                 const std::tuple<Ts...> &ValuesTuple) noexcept {
+  std::apply(
+      [&OS](const auto &...Values) {
+        bool IsFirst = true;
+        auto Print = [&](const auto &Value) {
+          if (!IsFirst) {
+            OS << ", ";
+          }
+          printValue(OS, Value);
+          IsFirst = false;
+        };
+        (Print(Values), ...);
+      },
+      ValuesTuple);
+}
+
+template <typename TestCaseType>
+void printWorstFailingCase(llvm::raw_ostream &OS,
+                           const TestCaseType &TestCase) noexcept {
+  OS << "--- Worst Failing Case ---\n";
+  OS << llvm::formatv("  {0,-14} : ", "Input(s)");
+  printValues(OS, TestCase.Inputs);
+  OS << "\n";
+
+  OS << llvm::formatv("  {0,-14} : ", "Actual");
+  printValue(OS, TestCase.Actual);
+  OS << "\n";
+
+  OS << llvm::formatv("  {0,-14} : ", "Expected");
+  printValue(OS, TestCase.Expected);
+  OS << "\n";
+}
+
+template <typename TestType, typename ResultType>
+void printReport(const TestType &Test, const ResultType &Result,
+                 const std::chrono::steady_clock::duration &Duration) noexcept {
+  using FunctionConfig = typename TestType::FunctionConfig;
+
+  const bool Passed = Result.hasPassed();
+  const auto ElapsedMilliseconds =
+      std::chrono::duration_cast<std::chrono::milliseconds>(Duration).count();
+
+  llvm::errs() << llvm::formatv("=== Test Report for '{0}' === \n",
+                                FunctionConfig::Name);
+  llvm::errs() << llvm::formatv("{0,-17}: {1} ({2})\n", "Device",
+                                Test.getContext().getName(),
+                                Test.getContext().getPlatform());
+  llvm::errs() << llvm::formatv("{0,-17}: {1} ms\n", "Elapsed time",
+                                ElapsedMilliseconds);
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "ULP tolerance",
+                                FunctionConfig::UlpTolerance);
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Max ULP distance",
+                                Result.getMaxUlpDistance());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Test cases",
+                                Result.getTestCaseCount());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Failures",
+                                Result.getFailureCount());
+  llvm::errs() << llvm::formatv("{0,-17}: {1}\n", "Status",
+                                Passed ? "PASSED" : "FAILED");
+
+  if (auto Worst = Result.getWorstFailingCase()) {
+    printWorstFailingCase(llvm::errs(), Worst.value());
+  }
+
+  llvm::errs().flush();
+}
+} // namespace detail
+
+template <typename TestType>
+[[nodiscard]] bool
+runTest(const TestType &Test,
+        typename TestType::GeneratorType &Generator) noexcept {
+  const auto StartTime = std::chrono::steady_clock::now();
+
+  auto Result = Test.run(Generator);
+
+  const auto EndTime = std::chrono::steady_clock::now();
+  const auto Duration = EndTime - StartTime;
+
+  detail::printReport(Test, Result, Duration);
+
+  return Result.hasPassed();
+}
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
new file mode 100644
index 0000000000000..3242349ce6b4d
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/TypeExtras.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+namespace mathtest {
+
+#ifdef __FLT16_MAX__
+#define MATHTEST_HAS_FLOAT16
+typedef _Float16 float16;
+#endif
+} // namespace mathtest
diff --git a/offload/unittests/Conformance/lib/CMakeLists.txt b/offload/unittests/Conformance/lib/CMakeLists.txt
new file mode 100644
index 0000000000000..a0402a54fbadf
--- /dev/null
+++ b/offload/unittests/Conformance/lib/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(MathTest STATIC DeviceContext.cpp DeviceResources.cpp ErrorHandling.cpp)
+
+target_include_directories(MathTest PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
+target_compile_options(MathTest PUBLIC -fno-rtti)
+target_link_libraries(MathTest PUBLIC LLVMOffload LLVMSupport LLVMDemangle)
diff --git a/offload/unittests/Conformance/lib/DeviceContext.cpp b/offload/unittests/Conformance/lib/DeviceContext.cpp
new file mode 100644
index 0000000000000..a0ca8bab3ddf9
--- /dev/null
+++ b/offload/unittests/Conformance/lib/DeviceContext.cpp
@@ -0,0 +1,201 @@
+#include "mathtest/DeviceContext.hpp"
+
+#include "mathtest/ErrorHandling.hpp"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+#include <OffloadAPI.h>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+
+using namespace mathtest;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// The static 'Wrapper' instance ensures olInit() is called once at program
+// startup and olShutDown() is called once at program termination
+struct OffloadInitWrapper {
+  OffloadInitWrapper() { OL_CHECK(olInit()); }
+  ~OffloadInitWrapper() { OL_CHECK(olShutDown()); }
+};
+static OffloadInitWrapper Wrapper{};
+
+[[nodiscard]] ol_platform_backend_t
+getBackend(ol_device_handle_t DeviceHandle) noexcept {
+  ol_platform_handle_t Platform;
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
+                           sizeof(Platform), &Platform));
+  ol_platform_backend_t Backend = OL_PLATFORM_BACKEND_UNKNOWN;
+  OL_CHECK(olGetPlatformInfo(Platform, OL_PLATFORM_INFO_BACKEND,
+                             sizeof(Backend), &Backend));
+  return Backend;
+}
+
+const std::vector<ol_device_handle_t> &getDevices() {
+  // Thread-safe initialization of a static local variable
+  static std::vector<ol_device_handle_t> Devices =
+      []() -> std::vector<ol_device_handle_t> {
+    std::vector<ol_device_handle_t> TmpDevices;
+
+    // Discovers all devices that are not the host
+    const auto *const ResultFromIterate = olIterateDevices(
+        [](ol_device_handle_t DeviceHandle, void *Data) {
+          if (getBackend(DeviceHandle) != OL_PLATFORM_BACKEND_HOST) {
+            static_cast<std::vector<ol_device_handle_t> *>(Data)->push_back(
+                DeviceHandle);
+          }
+          return true;
+        },
+        &TmpDevices);
+
+    OL_CHECK(ResultFromIterate);
+
+    return TmpDevices;
+  }();
+
+  return Devices;
+}
+} // namespace
+
+std::size_t mathtest::countDevices() { return getDevices().size(); }
+
+void detail::allocManagedMemory(ol_device_handle_t DeviceHandle,
+                                std::size_t Size,
+                                void **AllocationOut) noexcept {
+  OL_CHECK(
+      olMemAlloc(DeviceHandle, OL_ALLOC_TYPE_MANAGED, Size, AllocationOut));
+}
+
+//===----------------------------------------------------------------------===//
+// DeviceContext
+//===----------------------------------------------------------------------===//
+
+DeviceContext::DeviceContext(std::size_t DeviceId)
+    : DeviceId(DeviceId), DeviceHandle(nullptr) {
+  const auto &Devices = getDevices();
+
+  if (DeviceId >= Devices.size()) {
+    FATAL_ERROR("Invalid DeviceId: " + llvm::Twine(DeviceId) + ", but only " +
+                llvm::Twine(Devices.size()) + " devices are available");
+  }
+
+  DeviceHandle = Devices[DeviceId];
+}
+
+[[nodiscard]] std::shared_ptr<DeviceImage>
+DeviceContext::loadBinary(llvm::StringRef Directory, llvm::StringRef BinaryName,
+                          llvm::StringRef Extension) const {
+  llvm::SmallString<128> FullPath(Directory);
+  llvm::sys::path::append(FullPath, llvm::Twine(BinaryName) + Extension);
+
+  // For simplicity, this implementation intentionally reads the binary from
+  // disk on every call.
+  //
+  // Other use cases could benefit from a global, thread-safe cache to avoid
+  // redundant file I/O and GPU program creation.
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
+      llvm::MemoryBuffer::getFile(FullPath);
+  if (std::error_code ErrorCode = FileOrErr.getError()) {
+    FATAL_ERROR(llvm::Twine("Failed to read device binary file '") + FullPath +
+                "': " + ErrorCode.message());
+  }
+  std::unique_ptr<llvm::MemoryBuffer> &BinaryData = *FileOrErr;
+
+  ol_program_handle_t ProgramHandle = nullptr;
+  OL_CHECK(olCreateProgram(DeviceHandle, BinaryData->getBufferStart(),
+                           BinaryData->getBufferSize(), &ProgramHandle));
+
+  return std::shared_ptr<DeviceImage>(
+      new DeviceImage(DeviceHandle, ProgramHandle));
+}
+
+[[nodiscard]] std::shared_ptr<DeviceImage>
+DeviceContext::loadBinary(llvm::StringRef Directory,
+                          llvm::StringRef BinaryName) const {
+  llvm::StringRef Extension;
+
+  switch (getBackend(DeviceHandle)) {
+  case OL_PLATFORM_BACKEND_AMDGPU:
+    Extension = ".amdgpu.bin";
+    break;
+  case OL_PLATFORM_BACKEND_CUDA:
+    Extension = ".nvptx64.bin";
+    break;
+  default:
+    llvm_unreachable("Unsupported backend to infer binary extension");
+  }
+
+  return loadBinary(Directory, BinaryName, Extension);
+}
+
+void DeviceContext::getKernelImpl(
+    ol_program_handle_t ProgramHandle, llvm::StringRef KernelName,
+    ol_symbol_handle_t *KernelHandle) const noexcept {
+  llvm::SmallString<32> KernelNameBuffer(KernelName);
+  OL_CHECK(olGetSymbol(ProgramHandle, KernelNameBuffer.c_str(),
+                       OL_SYMBOL_KIND_KERNEL, KernelHandle));
+}
+
+void DeviceContext::launchKernelImpl(
+    ol_symbol_handle_t KernelHandle, const Dim &NumGroups, const Dim &GroupSize,
+    const void *KernelArgs, std::size_t KernelArgsSize) const noexcept {
+  ol_kernel_launch_size_args_t LaunchArgs;
+  LaunchArgs.Dimensions = 3; // It seems this field is not used anywhere.
+                             // Defaulting to the safest value
+  LaunchArgs.NumGroups = {NumGroups[0], NumGroups[1], NumGroups[2]};
+  LaunchArgs.GroupSize = {GroupSize[0], GroupSize[1], GroupSize[2]};
+  LaunchArgs.DynSharedMemory = 0;
+
+  OL_CHECK(olLaunchKernel(nullptr, DeviceHandle, KernelHandle, KernelArgs,
+                          KernelArgsSize, &LaunchArgs, nullptr));
+}
+
+[[nodiscard]] std::string DeviceContext::getName() const {
+  std::size_t PropSize = 0;
+  OL_CHECK(olGetDeviceInfoSize(DeviceHandle, OL_DEVICE_INFO_NAME, &PropSize));
+
+  if (PropSize == 0) {
+    return "";
+  }
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_NAME, PropSize,
+                           PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return PropValue;
+}
+
+[[nodiscard]] std::string DeviceContext::getPlatform() const {
+  ol_platform_handle_t PlatformHandle = nullptr;
+  OL_CHECK(olGetDeviceInfo(DeviceHandle, OL_DEVICE_INFO_PLATFORM,
+                           sizeof(ol_platform_handle_t), &PlatformHandle));
+
+  std::size_t PropSize = 0;
+  OL_CHECK(
+      olGetPlatformInfoSize(PlatformHandle, OL_PLATFORM_INFO_NAME, &PropSize));
+
+  if (PropSize == 0) {
+    return "";
+  }
+
+  std::string PropValue(PropSize, '\0');
+  OL_CHECK(olGetPlatformInfo(PlatformHandle, OL_PLATFORM_INFO_NAME, PropSize,
+                             PropValue.data()));
+  PropValue.pop_back(); // Remove the null terminator
+
+  return PropValue;
+}
diff --git a/offload/unittests/Conformance/lib/DeviceResources.cpp b/offload/unittests/Conformance/lib/DeviceResources.cpp
new file mode 100644
index 0000000000000..5d1c94dca7677
--- /dev/null
+++ b/offload/unittests/Conformance/lib/DeviceResources.cpp
@@ -0,0 +1,54 @@
+#include "mathtest/DeviceResources.hpp"
+
+#include "mathtest/ErrorHandling.hpp"
+
+#include <OffloadAPI.h>
+
+using namespace mathtest;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+void detail::freeDeviceMemory(void *Address) noexcept {
+  if (Address) {
+    OL_CHECK(olMemFree(Address));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DeviceImage
+//===----------------------------------------------------------------------===//
+
+DeviceImage::~DeviceImage() noexcept {
+  if (Handle) {
+    OL_CHECK(olDestroyProgram(Handle));
+  }
+}
+
+DeviceImage &DeviceImage::operator=(DeviceImage &&Other) noexcept {
+  if (this == &Other)
+    return *this;
+
+  if (Handle) {
+    OL_CHECK(olDestroyProgram(Handle));
+  }
+
+  DeviceHandle = Other.DeviceHandle;
+  Handle = Other.Handle;
+
+  Other.DeviceHandle = nullptr;
+  Other.Handle = nullptr;
+
+  return *this;
+}
+
+DeviceImage::DeviceImage(DeviceImage &&Other) noexcept
+    : DeviceHandle(Other.DeviceHandle), Handle(Other.Handle) {
+  Other.DeviceHandle = nullptr;
+  Other.Handle = nullptr;
+}
+
+DeviceImage::DeviceImage(ol_device_handle_t DeviceHandle,
+                         ol_program_handle_t Handle) noexcept
+    : DeviceHandle(DeviceHandle), Handle(Handle) {}
diff --git a/offload/unittests/Conformance/lib/ErrorHandling.cpp b/offload/unittests/Conformance/lib/ErrorHandling.cpp
new file mode 100644
index 0000000000000..0f85260bc7926
--- /dev/null
+++ b/offload/unittests/Conformance/lib/ErrorHandling.cpp
@@ -0,0 +1,37 @@
+#include "mathtest/ErrorHandling.hpp"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <OffloadAPI.h>
+
+using namespace mathtest;
+
+[[noreturn]] void detail::reportFatalError(const llvm::Twine &Message,
+                                           const char *File, int Line,
+                                           const char *FuncName) {
+  // clang-format off
+  llvm::report_fatal_error(
+      llvm::Twine("Fatal error in '") + FuncName +
+          "' at " + File + ":" + llvm::Twine(Line) +
+          "\n  Message: " + Message,
+      /*gen_crash_diag=*/false);
+  // clang-format on
+}
+
+[[noreturn]] void detail::reportOffloadError(const char *ResultExpr,
+                                             ol_result_t Result,
+                                             const char *File, int Line,
+                                             const char *FuncName) {
+  // clang-format off
+  llvm::report_fatal_error(
+      llvm::Twine("OL_CHECK failed") +
+          "\n  Location: " + File + ":" + llvm::Twine(Line) +
+          "\n  Function: " + FuncName +
+          "\n  Expression: " + ResultExpr +
+          "\n  Error code: " + llvm::Twine(Result->Code) +
+          "\n  Details: " +
+          (Result->Details ? Result->Details : "No details provided"),
+      /*gen_crash_diag=*/false);
+  // clang-format on
+}
diff --git a/offload/unittests/Conformance/sin.cpp b/offload/unittests/Conformance/sin.cpp
deleted file mode 100644
index 9e15690a9e9d7..0000000000000
--- a/offload/unittests/Conformance/sin.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <OffloadAPI.h>
-#include <math.h>
-
-llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
-
-int main() { llvm::errs() << sin(0.0) << "\n"; }
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
new file mode 100644
index 0000000000000..b5da56f46fc05
--- /dev/null
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_conformance_test(Hypotf16Test Hypotf16Test.cpp)
+add_conformance_test(LogfTest LogfTest.cpp)
diff --git a/offload/unittests/Conformance/tests/Hypotf16Test.cpp b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
new file mode 100644
index 0000000000000..8c6b5054e2d4c
--- /dev/null
+++ b/offload/unittests/Conformance/tests/Hypotf16Test.cpp
@@ -0,0 +1,51 @@
+#include "mathtest/TypeExtras.hpp"
+
+#ifdef MATHTEST_HAS_FLOAT16
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/GpuMathTest.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+#include <memory>
+
+using namespace mathtest;
+
+extern "C" {
+
+float16 hypotf16(float16, float16);
+}
+
+namespace mathtest {
+
+template <> struct FunctionConfig<hypotf16> {
+  static constexpr llvm::StringRef Name = "hypotf16";
+  static constexpr llvm::StringRef KernelName = "hypotf16Kernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 69 (Full Profile), Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 2;
+};
+} // namespace mathtest
+
+int main() {
+  // TODO: Add command-line arguments parsing for test configuration.
+  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
+  const llvm::StringRef Provider = "llvm-libm";
+  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+
+  GpuMathTest<hypotf16> Hypotf16Test(Context, Provider, DeviceBinsDirectory);
+
+  IndexedRange<float16> RangeX;
+  IndexedRange<float16> RangeY;
+  ExhaustiveGenerator<float16, float16> Generator(RangeX, RangeY);
+
+  const auto Passed = runTest(Hypotf16Test, Generator);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+#endif // MATHTEST_HAS_FLOAT16
diff --git a/offload/unittests/Conformance/tests/LogfTest.cpp b/offload/unittests/Conformance/tests/LogfTest.cpp
new file mode 100644
index 0000000000000..1af5e844ccdb1
--- /dev/null
+++ b/offload/unittests/Conformance/tests/LogfTest.cpp
@@ -0,0 +1,44 @@
+#include "mathtest/DeviceContext.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/GpuMathTest.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <limits>
+#include <math.h>
+#include <memory>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<logf> {
+  static constexpr llvm::StringRef Name = "logf";
+  static constexpr llvm::StringRef KernelName = "logfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 3;
+};
+} // namespace mathtest
+
+int main() {
+  using namespace mathtest;
+
+  // TODO: Add command-line arguments parsing for test configuration.
+  auto Context = std::make_shared<DeviceContext>(/*DeviceId=*/0);
+  const llvm::StringRef Provider = "llvm-libm";
+  const llvm::StringRef DeviceBinsDirectory = DEVICE_CODE_PATH;
+
+  GpuMathTest<logf> LogfTest(Context, Provider, DeviceBinsDirectory);
+
+  IndexedRange<float> Range(/*Begin=*/0.0f,
+                            /*End=*/std::numeric_limits<float>::infinity(),
+                            /*Inclusive=*/true);
+  ExhaustiveGenerator<float> Generator(Range);
+
+  const auto Passed = runTest(LogfTest, Generator);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
\ No newline at end of file