[llvm-branch-commits] [compiler-rt] [Instrumentor] Add runtime examples: [2/N] A FP precision analysis (PR #205229)

Mon Jun 22 17:57:02 PDT 2026

https://github.com/jdoerfert created https://github.com/llvm/llvm-project/pull/205229

Second example:
Check all floating point operations and track if they could be done at lower precision.

Partially developped by Claude (AI), tested and verified by me.

>From 0dddce7d4464f4ffa664f0ea1ab8eb91baf8eea5 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert.llvm at gmail.com>
Date: Mon, 22 Jun 2026 17:52:58 -0700
Subject: [PATCH] [Instrumentor] Add runtime examples: [2/N] A FP precision
 analysis

Second example:
Check all floating point operations and track if they could be done at
lower precision.

Partially developped by Claude (AI), tested and verified by me.
---
 .../lib/instrumentor-examples/CMakeLists.txt  |   1 +
 .../lib/instrumentor-examples/README.md       |  26 +
 .../precision-analysis/CMakeLists.txt         |  68 +++
 .../precision_analysis_config.json            |  43 ++
 .../precision_analysis_runtime.cpp            | 569 ++++++++++++++++++
 .../test/instrumentor-examples/CMakeLists.txt |   1 +
 .../test/instrumentor-examples/lit.cfg.py     |   6 +
 .../precision_detailed.c                      |  76 +++
 .../precision_fp16_overflow.c                 |  91 +++
 .../instrumentor-examples/precision_mixed.c   |  66 ++
 .../instrumentor-examples/simple_precision.c  |  56 ++
 11 files changed, 1003 insertions(+)
 create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
 create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
 create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
 create mode 100644 compiler-rt/test/instrumentor-examples/precision_detailed.c
 create mode 100644 compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
 create mode 100644 compiler-rt/test/instrumentor-examples/precision_mixed.c
 create mode 100644 compiler-rt/test/instrumentor-examples/simple_precision.c

diff --git a/compiler-rt/lib/instrumentor-examples/CMakeLists.txt b/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
index b7f9f5cb627ff..693f90b29b1ae 100644
--- a/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
+++ b/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
@@ -9,3 +9,4 @@ add_compiler_rt_component(instrumentor-examples)
 
 # Add subdirectories for specific examples
 add_subdirectory(flop-counter)
+add_subdirectory(precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-examples/README.md b/compiler-rt/lib/instrumentor-examples/README.md
index 4dda5b7033efb..3c8c1234d63cd 100644
--- a/compiler-rt/lib/instrumentor-examples/README.md
+++ b/compiler-rt/lib/instrumentor-examples/README.md
@@ -37,6 +37,32 @@ clang -O2 -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=con
 # At program exit, FLOP statistics will be printed
 ```
 
+### Precision Analysis (`precision-analysis/`)
+
+Analyzes the minimum floating-point precision needed for each operation while
+maintaining acceptable accuracy.
+
+**Features:**
+- Per-operation precision requirement analysis
+- Multi-level precision checking:
+  - **Double operations**: Checks Float first, then FP16 if Float works
+  - **Float operations**: Checks FP16
+- Tracks relative error with configurable threshold (default: 0.1%)
+- Distinguishes input special values from lowering-induced overflow/underflow
+- Reports which operations can use FP16, which need Float, and which need Double
+- IEEE 754 half-precision (fp16) software emulation
+- Provides detailed recommendations for precision optimization
+
+**Usage:**
+```bash
+# Compile your program with instrumentor
+clang -O2 -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=precision_analysis_config.json -lclang_rt.precision_analysis -o your_program
+
+# Run it
+./your_program
+# At program exit, precision analysis results will be printed
+```
+
 ## Building
 
 The instrumentor examples are built as part of the compiler-rt build:
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt b/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
new file mode 100644
index 0000000000000..76560699d2ed2
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
@@ -0,0 +1,68 @@
+# CMakeLists.txt for Precision Analysis Example
+#
+# This example demonstrates analyzing whether floating-point operations
+# could use lower precision while maintaining acceptable accuracy using
+# the Instrumentor pass.
+
+add_compiler_rt_component(precision-analysis)
+
+set(PRECISION_ANALYSIS_SOURCES
+  precision_analysis_runtime.cpp
+  )
+
+set(PRECISION_ANALYSIS_HEADERS
+  precision_analysis_runtime.h
+  )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(PRECISION_ANALYSIS_CFLAGS
+  ${COMPILER_RT_COMMON_CFLAGS}
+  -std=c++17
+  )
+
+# Determine supported architectures
+if(APPLE)
+  # On Darwin, use the darwin OSX architectures
+  set(PRECISION_ANALYSIS_SUPPORTED_ARCH arm64)
+  if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+    set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+  endif()
+  if(DARWIN_osx_ARCHS)
+    set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+  endif()
+else()
+  # For non-Apple platforms, use the default target architecture
+  set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH})
+endif()
+
+message(STATUS "Precision Analysis supported architectures: ${PRECISION_ANALYSIS_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+  add_compiler_rt_runtime(clang_rt.precision_analysis
+    STATIC
+    OS osx
+    ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+    CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+    SOURCES ${PRECISION_ANALYSIS_SOURCES}
+    ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+    PARENT_TARGET precision-analysis)
+else()
+  add_compiler_rt_runtime(clang_rt.precision_analysis
+    STATIC
+    ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+    CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+    SOURCES ${PRECISION_ANALYSIS_SOURCES}
+    ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+    PARENT_TARGET precision-analysis)
+endif()
+
+# Install the configuration file as a resource
+install(FILES precision_analysis_config.json
+        DESTINATION share/llvm/instrumentor-configs
+        COMPONENT precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
new file mode 100644
index 0000000000000..16a9298c98bee
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
@@ -0,0 +1,43 @@
+{
+  "configuration": {
+    "runtime_prefix": "__precision_analysis_",
+    "runtime_prefix.description": "The runtime API prefix.",
+    "runtime_stubs_file": "rt",
+    "target_regex": "",
+    "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.",
+    "function_regex": "",
+    "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.",
+    "demangle_function_names": true,
+    "demangle_function_names.description": "Demangle functions names passed to the runtime.",
+    "host_enabled": true,
+    "host_enabled.description": "Instrument non-GPU targets",
+    "gpu_enabled": false,
+    "gpu_enabled.description": "Instrument GPU targets"
+  },
+  "instruction_post": {
+    "numeric": {
+      "enabled": true,
+      "filter": "(type_id == 2 || type_id == 3) || ((type_id == 17 || type_id == 18) && (sub_type_id == 2 || sub_type_id == 3))",
+      "filter.description": "Only instrument float (type_id 2) and double (type_id 3) operations, or vectors of those types.",
+      "type_id": true,
+      "type_id.description": "The operation's type id.",
+      "sub_type_id": true,
+      "sub_type_id.description": "The operation's sub-type id (for vectors).",
+      "size": true,
+      "size.description": "The operation's type size.",
+      "opcode": true,
+      "opcode.description": "The instruction opcode.",
+      "left": true,
+      "left.description": "The operation's left operand.",
+      "right": true,
+      "right.description": "The operation's right operand. This value is poison for unary operations.",
+      "result": true,
+      "result.replace": false,
+      "result.description": "Result of the operation.",
+      "flags": true,
+      "flags.description": "A bitmask value signaling which instruction flags are present.",
+      "id": true,
+      "id.description": "A unique ID associated with the given instrumentor call"
+    }
+  }
+}
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
new file mode 100644
index 0000000000000..7057eb9229ec0
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
@@ -0,0 +1,569 @@
+//===-- precision_analysis_runtime.cpp - Precision Analysis Runtime ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements runtime for analyzing whether floating-point operations
+// could be performed with lower precision while maintaining acceptable
+// accuracy. It instruments FP operations, simulates them with lower precision,
+// and compares results to determine if precision reduction is viable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <mutex>
+
+// Configuration: relative error threshold for "acceptable" lower precision
+// A result is considered acceptable if |result_lower - result_higher| /
+// |result_higher| < threshold
+static constexpr double DEFAULT_RELATIVE_ERROR_THRESHOLD = 1e-3; // 0.1%
+
+// Per-operation statistics - tracks separately by original precision
+struct OperationStats {
+  uint64_t total_count; // Total number of times this operation executed
+
+  // Double-precision operations (started as double)
+  uint64_t double_to_fp16;      // Double ops that can use fp16
+  uint64_t double_to_float;     // Double ops that can use float (but not fp16)
+  uint64_t double_needs_double; // Double ops that need double precision
+
+  // Float-precision operations (started as float)
+  uint64_t float_to_fp16;     // Float ops that can use fp16
+  uint64_t float_needs_float; // Float ops that need float precision
+
+  // Special values
+  uint64_t
+      input_special_values; // Times when inputs had special values (NaN, Inf)
+  uint64_t lowering_special_values; // Times when lowering created special
+                                    // values (overflow/underflow)
+};
+
+// Helper functions to get statistics map and mutex
+// Using function-local statics ensures proper initialization order
+// and avoids static destruction order fiasco.
+//
+// IMPORTANT: We use heap allocation (new) without delete to intentionally
+// "leak" these objects. This ensures they remain valid when the destructor
+// function runs at program exit, even if it runs after static destructors.
+// For a profiling tool that runs once and exits, this is acceptable.
+static std::map<int32_t, OperationStats> &get_operation_stats() {
+  static std::map<int32_t, OperationStats> *stats =
+      new std::map<int32_t, OperationStats>();
+  return *stats;
+}
+
+static std::mutex &get_stats_mutex() {
+  static std::mutex *mutex = new std::mutex();
+  return *mutex;
+}
+
+enum {
+  LLVM_OPCODE_FAdd = 15,
+  LLVM_OPCODE_FSub = 17,
+  LLVM_OPCODE_FMul = 19,
+  LLVM_OPCODE_FDiv = 22,
+  LLVM_OPCODE_FRem = 25,
+  LLVM_OPCODE_FNeg = 13,
+};
+
+// Helper: Convert float to fp16 (IEEE 754 half precision) and back
+// fp16 format: 1 sign bit, 5 exponent bits, 10 mantissa bits
+static inline float simulate_fp16_precision(float value) {
+  // Handle special cases
+  if (std::isnan(value) || std::isinf(value)) {
+    return value;
+  }
+
+  uint32_t bits;
+  std::memcpy(&bits, &value, sizeof(float));
+
+  uint32_t sign = bits & 0x80000000u;
+  int32_t exponent = ((bits >> 23) & 0xFF) - 127;
+  uint32_t mantissa = bits & 0x7FFFFFu;
+
+  // fp16 range: exponent -14 to +15 (biased 1 to 30)
+  // Underflow to zero
+  if (exponent < -14) {
+    return sign ? -0.0f : 0.0f;
+  }
+
+  // Overflow to infinity
+  if (exponent > 15) {
+    return sign ? -INFINITY : INFINITY;
+  }
+
+  // Round mantissa from 23 bits to 10 bits
+  uint32_t fp16_mantissa = (mantissa + 0x1000u) >> 13;
+  if (fp16_mantissa > 0x3FF) {
+    // Rounding caused overflow
+    fp16_mantissa = 0;
+    exponent++;
+    if (exponent > 15) {
+      return sign ? -INFINITY : INFINITY;
+    }
+  }
+
+  // Reconstruct float with reduced precision
+  uint32_t fp16_exponent = (exponent + 127) & 0xFF;
+  uint32_t result_bits = sign | (fp16_exponent << 23) | (fp16_mantissa << 13);
+
+  float result;
+  std::memcpy(&result, &result_bits, sizeof(float));
+  return result;
+}
+
+// Helper: Check if value is special (NaN or Inf)
+static inline bool is_special_value(double value) {
+  return std::isnan(value) || std::isinf(value);
+}
+
+static inline bool is_special_value(float value) {
+  return std::isnan(value) || std::isinf(value);
+}
+
+// Helper: Compute relative error
+static inline double compute_relative_error(double reference, double test) {
+  if (reference == 0.0) {
+    return (test == 0.0) ? 0.0 : INFINITY;
+  }
+  return std::fabs((test - reference) / reference);
+}
+
+// Helper: Perform operation with lower precision (double → float)
+static double simulate_lower_precision_op(int32_t opcode, double left,
+                                          double right) {
+  float left_f = static_cast<float>(left);
+  float right_f = static_cast<float>(right);
+  float result_f = 0.0f;
+
+  switch (opcode) {
+  case LLVM_OPCODE_FAdd:
+    result_f = left_f + right_f;
+    break;
+  case LLVM_OPCODE_FSub:
+    result_f = left_f - right_f;
+    break;
+  case LLVM_OPCODE_FMul:
+    result_f = left_f * right_f;
+    break;
+  case LLVM_OPCODE_FDiv:
+    result_f = left_f / right_f;
+    break;
+  case LLVM_OPCODE_FRem:
+    result_f = std::fmod(left_f, right_f);
+    break;
+  case LLVM_OPCODE_FNeg:
+    result_f = -left_f;
+    break;
+  default:
+    // For unknown operations, assume lower precision is not ok
+    return NAN;
+  }
+
+  return static_cast<double>(result_f);
+}
+
+// Helper: Perform operation with fp16 precision (float → fp16)
+static float simulate_fp16_op(int32_t opcode, float left, float right) {
+  float left_fp16 = simulate_fp16_precision(left);
+  float right_fp16 = simulate_fp16_precision(right);
+  float result_fp16 = 0.0f;
+
+  switch (opcode) {
+  case LLVM_OPCODE_FAdd:
+    result_fp16 = left_fp16 + right_fp16;
+    break;
+  case LLVM_OPCODE_FSub:
+    result_fp16 = left_fp16 - right_fp16;
+    break;
+  case LLVM_OPCODE_FMul:
+    result_fp16 = left_fp16 * right_fp16;
+    break;
+  case LLVM_OPCODE_FDiv:
+    result_fp16 = left_fp16 / right_fp16;
+    break;
+  case LLVM_OPCODE_FRem:
+    result_fp16 = std::fmod(left_fp16, right_fp16);
+    break;
+  case LLVM_OPCODE_FNeg:
+    result_fp16 = -left_fp16;
+    break;
+  default:
+    return NAN;
+  }
+
+  // Apply fp16 precision to result as well
+  return simulate_fp16_precision(result_fp16);
+}
+
+// Analyze a double-precision operation
+// Check if float precision would suffice, and if so, also check if fp16 would
+// work
+static void analyze_double_operation(int32_t opcode, double left, double right,
+                                     double result, int32_t id) {
+  std::lock_guard<std::mutex> lock(get_stats_mutex());
+
+  OperationStats &stats = get_operation_stats()[id];
+  stats.total_count++;
+
+  // Check for special values in inputs or result
+  if (is_special_value(result) || is_special_value(left) ||
+      is_special_value(right)) {
+    stats.input_special_values++;
+    return;
+  }
+
+  // First, try double → float
+  double float_result = simulate_lower_precision_op(opcode, left, right);
+  printf("%i :: %lf = %lf <> %lf\n", opcode, result, left, right);
+  printf("%i :: %lf = %lf <> %lf\n", opcode, float_result, (double)(float)left,
+         (double)(float)right);
+
+  // Check if lowering to float created special values (overflow/underflow)
+  if (is_special_value(float_result)) {
+    stats.lowering_special_values++;
+    return;
+  }
+
+  // Compare double vs float results
+  double float_error = compute_relative_error(result, float_result);
+
+  if (float_error >= DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+    // Float precision is not sufficient, need double
+    stats.double_needs_double++;
+    return;
+  }
+
+  // Float precision is acceptable. Now check if fp16 would also work.
+  // Convert operands to float, then simulate fp16 operation
+  float left_f = static_cast<float>(left);
+  float right_f = static_cast<float>(right);
+  float result_f = static_cast<float>(result);
+
+  float fp16_result = simulate_fp16_op(opcode, left_f, right_f);
+
+  // Check if lowering to fp16 created special values
+  if (is_special_value(fp16_result)) {
+    // fp16 causes overflow/underflow, but float works (double → float)
+    stats.double_to_float++;
+    return;
+  }
+
+  // Compare float vs fp16 results
+  double fp16_error = compute_relative_error(static_cast<double>(result_f),
+                                             static_cast<double>(fp16_result));
+
+  if (fp16_error < DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+    // fp16 precision is sufficient (double → fp16)
+    stats.double_to_fp16++;
+  } else {
+    // Need float precision but not double (double → float)
+    stats.double_to_float++;
+  }
+}
+
+// Analyze a float-precision operation (check if half precision would work)
+static void analyze_float_operation(int32_t opcode, float left, float right,
+                                    float result, int32_t id) {
+  std::lock_guard<std::mutex> lock(get_stats_mutex());
+
+  OperationStats &stats = get_operation_stats()[id];
+  stats.total_count++;
+
+  // Check for special values in inputs or result
+  if (is_special_value(result) || is_special_value(left) ||
+      is_special_value(right)) {
+    stats.input_special_values++;
+    return;
+  }
+
+  // Simulate operation with fp16 precision
+  float lower_precision_result = simulate_fp16_op(opcode, left, right);
+
+  // Check if lowering precision created special values (overflow/underflow to
+  // inf)
+  if (is_special_value(lower_precision_result)) {
+    stats.lowering_special_values++;
+    return;
+  }
+
+  // Compare results
+  double relative_error = compute_relative_error(
+      static_cast<double>(result), static_cast<double>(lower_precision_result));
+
+  if (relative_error < DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+    // fp16 precision is sufficient (float → fp16)
+    stats.float_to_fp16++;
+  } else {
+    // Need to keep float precision (float → float)
+    stats.float_needs_float++;
+  }
+}
+
+extern "C" {
+
+__attribute__((destructor(1000))) void
+__precision_analysis_post_module(char *module_name, char *target_triple,
+                                 int32_t id) {
+  std::printf("\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf("            Floating-Point Precision Analysis Results\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf(
+      "This analysis checks minimum precision needed (error < %.2f%%):\n",
+      DEFAULT_RELATIVE_ERROR_THRESHOLD * 100);
+  std::printf("  - Double operations: Try Float, then FP16 if Float works\n");
+  std::printf("  - Float operations: Try FP16\n");
+  std::printf("================================================================"
+              "==========\n\n");
+
+  std::map<int32_t, OperationStats> &operation_stats = get_operation_stats();
+
+  if (operation_stats.empty()) {
+    std::printf("No operations analyzed.\n");
+    std::printf("=============================================================="
+                "============\n");
+    return;
+  }
+
+  uint64_t total_ops = 0;
+  uint64_t total_double_to_fp16 = 0;
+  uint64_t total_double_to_float = 0;
+  uint64_t total_double_needs_double = 0;
+  uint64_t total_float_to_fp16 = 0;
+  uint64_t total_float_needs_float = 0;
+  uint64_t total_input_special = 0;
+  uint64_t total_lowering_special = 0;
+
+  std::printf("Per-Operation Results:\n");
+  std::printf("%-5s %8s %9s %8s %6s %9s %6s %8s %8s\n", "Op ID", "Total",
+              "D->FP16", "D->F32", "D->D", "F->FP16", "F->F", "InpNaN",
+              "LowNaN");
+  std::printf("----------------------------------------------------------------"
+              "-------------\n");
+
+  for (const auto &entry : operation_stats) {
+    int32_t op_id = entry.first;
+    const OperationStats &stats = entry.second;
+
+    total_ops += stats.total_count;
+    total_double_to_fp16 += stats.double_to_fp16;
+    total_double_to_float += stats.double_to_float;
+    total_double_needs_double += stats.double_needs_double;
+    total_float_to_fp16 += stats.float_to_fp16;
+    total_float_needs_float += stats.float_needs_float;
+    total_input_special += stats.input_special_values;
+    total_lowering_special += stats.lowering_special_values;
+
+    std::printf("%-5d %8llu %9llu %8llu %6llu %9llu %6llu %8llu %8llu\n", op_id,
+                stats.total_count, stats.double_to_fp16, stats.double_to_float,
+                stats.double_needs_double, stats.float_to_fp16,
+                stats.float_needs_float, stats.input_special_values,
+                stats.lowering_special_values);
+  }
+
+  std::printf("----------------------------------------------------------------"
+              "-------------\n");
+  std::printf("%-5s %8llu %9llu %8llu %6llu %9llu %6llu %8llu %8llu\n", "TOTAL",
+              total_ops, total_double_to_fp16, total_double_to_float,
+              total_double_needs_double, total_float_to_fp16,
+              total_float_needs_float, total_input_special,
+              total_lowering_special);
+
+  std::printf("\n");
+  std::printf("Column Legend:\n");
+  std::printf("  D->FP16:  Double ops that can use FP16 (16-bit)\n");
+  std::printf(
+      "  D->F32:   Double ops that can use Float (32-bit) but not FP16\n");
+  std::printf("  D->D:     Double ops that require Double (64-bit)\n");
+  std::printf("  F->FP16:  Float ops that can use FP16 (16-bit)\n");
+  std::printf("  F->F:     Float ops that must stay Float (32-bit)\n");
+  std::printf("  InpNaN:   Operations with NaN/Inf in inputs or result\n");
+  std::printf(
+      "  LowNaN:   Operations where lowering caused overflow/underflow\n");
+
+  uint64_t total_double_ops =
+      total_double_to_fp16 + total_double_to_float + total_double_needs_double;
+  uint64_t total_float_ops = total_float_to_fp16 + total_float_needs_float;
+  uint64_t analyzed_total = total_double_ops + total_float_ops;
+
+  std::printf("\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf("Summary by Original Precision:\n");
+  std::printf("================================================================"
+              "==========\n");
+
+  if (total_double_ops > 0) {
+    std::printf("\nDOUBLE Operations (started as 64-bit double):\n");
+    std::printf("  Total:                              %llu\n",
+                total_double_ops);
+    std::printf("  Can reduce to FP16 (16-bit):        %llu (%.1f%%)\n",
+                total_double_to_fp16,
+                100.0 * total_double_to_fp16 / total_double_ops);
+    std::printf("  Can reduce to Float (32-bit):       %llu (%.1f%%)\n",
+                total_double_to_float,
+                100.0 * total_double_to_float / total_double_ops);
+    std::printf("  Must keep Double (64-bit):          %llu (%.1f%%)\n",
+                total_double_needs_double,
+                100.0 * total_double_needs_double / total_double_ops);
+
+    uint64_t double_convertible = total_double_to_fp16 + total_double_to_float;
+    std::printf("  → Total convertible to lower:       %llu (%.1f%%)\n",
+                double_convertible,
+                100.0 * double_convertible / total_double_ops);
+  }
+
+  if (total_float_ops > 0) {
+    std::printf("\nFLOAT Operations (started as 32-bit float):\n");
+    std::printf("  Total:                              %llu\n",
+                total_float_ops);
+    std::printf("  Can reduce to FP16 (16-bit):        %llu (%.1f%%)\n",
+                total_float_to_fp16,
+                100.0 * total_float_to_fp16 / total_float_ops);
+    std::printf("  Must keep Float (32-bit):           %llu (%.1f%%)\n",
+                total_float_needs_float,
+                100.0 * total_float_needs_float / total_float_ops);
+  }
+
+  std::printf("\nOVERALL Statistics:\n");
+  std::printf("  Total analyzed operations:          %llu\n", analyzed_total);
+  std::printf("  Operations with input NaN/Inf:      %llu\n",
+              total_input_special);
+  std::printf("  Operations causing overflow:        %llu\n",
+              total_lowering_special);
+
+  if (analyzed_total > 0) {
+    uint64_t total_to_fp16 = total_double_to_fp16 + total_float_to_fp16;
+    std::printf("\n  ALL operations reducible to FP16:   %llu (%.1f%%)\n",
+                total_to_fp16, 100.0 * total_to_fp16 / analyzed_total);
+  }
+
+  // Provide recommendations based on results
+  std::printf("\n=============================================================="
+              "============\n");
+  std::printf("Recommendations:\n");
+  std::printf("================================================================"
+              "==========\n");
+
+  if (total_double_ops > 0) {
+    double double_to_lower = 100.0 *
+                             (total_double_to_fp16 + total_double_to_float) /
+                             total_double_ops;
+    std::printf("\nFor DOUBLE operations:\n");
+    if (double_to_lower > 80.0) {
+      std::printf(
+          "  ✓ %.1f%% can use lower precision - strong conversion candidate\n",
+          double_to_lower);
+      if (total_double_to_fp16 > total_double_to_float) {
+        std::printf("  ✓ Many can go directly to FP16 - consider aggressive "
+                    "downcasting\n");
+      } else {
+        std::printf(
+            "  ✓ Most need Float - consider using f32 instead of f64\n");
+      }
+    } else if (double_to_lower > 50.0) {
+      std::printf(
+          "  ~ %.1f%% can use lower precision - mixed precision recommended\n",
+          double_to_lower);
+    } else {
+      std::printf("  ✗ Only %.1f%% can use lower precision - keep double\n",
+                  double_to_lower);
+    }
+  }
+
+  if (total_float_ops > 0) {
+    double float_to_fp16_pct = 100.0 * total_float_to_fp16 / total_float_ops;
+    std::printf("\nFor FLOAT operations:\n");
+    if (float_to_fp16_pct > 80.0) {
+      std::printf(
+          "  ✓ %.1f%% can use FP16 - strong FP16 conversion candidate\n",
+          float_to_fp16_pct);
+    } else if (float_to_fp16_pct > 50.0) {
+      std::printf("  ~ %.1f%% can use FP16 - selective FP16 use recommended\n",
+                  float_to_fp16_pct);
+    } else {
+      std::printf("  ✗ Only %.1f%% can use FP16 - keep float\n",
+                  float_to_fp16_pct);
+    }
+  }
+
+  if (total_lowering_special > 0) {
+    std::printf("\n⚠ Warning: %llu operations caused overflow/underflow with "
+                "lower precision.\n",
+                total_lowering_special);
+    std::printf(
+        "  These operations have values outside the lower precision range.\n");
+  }
+
+  std::printf("================================================================"
+              "==========\n");
+}
+
+void __precision_analysis_post_numeric(int32_t type_id, int32_t sub_type_id,
+                                       int32_t size, int32_t opcode,
+                                       int64_t left, int64_t right,
+                                       int64_t result, int64_t flags,
+                                       int32_t id) {
+  // Handle vector types by looking at sub_type_id
+  bool is_vector = false;
+  int32_t element_type_id = type_id;
+
+  switch (type_id) {
+  case FixedVectorTyID:
+  case ScalableVectorTyID:
+    is_vector = true;
+    element_type_id = sub_type_id;
+    break;
+  default:
+    break;
+  }
+
+  // For vector operations, we'd need to extract each element
+  // For now, skip vector operations (they're more complex)
+  if (is_vector) {
+    return;
+  }
+
+  printf("ETI %i:: %i : %i\n", element_type_id, DoubleTyID, FloatTyID);
+  // Analyze based on type
+  if (element_type_id == DoubleTyID) {
+    // Double precision operation - check if float would suffice
+    double left_val = *reinterpret_cast<double *>(&left);
+    double right_val = *reinterpret_cast<double *>(&right);
+    double result_val = *reinterpret_cast<double *>(&result);
+
+    analyze_double_operation(opcode, left_val, right_val, result_val, id);
+  } else if (element_type_id == FloatTyID) {
+    // Float precision operation - could check if half would suffice
+    float left_val = *reinterpret_cast<float *>(&left);
+    float right_val = *reinterpret_cast<float *>(&right);
+    float result_val = *reinterpret_cast<float *>(&result);
+
+    analyze_float_operation(opcode, left_val, right_val, result_val, id);
+  }
+  // Skip other types (half, bfloat, extended precision)
+}
+
+void __precision_analysis_post_numeric_ind(int32_t type_id, int32_t sub_type_id,
+                                           int32_t size, int32_t opcode,
+                                           int64_t *left_ptr,
+                                           int64_t *right_ptr,
+                                           int64_t *result_ptr, int64_t flags,
+                                           int32_t id) {}
+
+} // extern "C"
diff --git a/compiler-rt/test/instrumentor-examples/CMakeLists.txt b/compiler-rt/test/instrumentor-examples/CMakeLists.txt
index 6818c984f7a2c..7be899377cb35 100644
--- a/compiler-rt/test/instrumentor-examples/CMakeLists.txt
+++ b/compiler-rt/test/instrumentor-examples/CMakeLists.txt
@@ -5,6 +5,7 @@ set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 set(INSTRUMENTOR_TESTSUITES)
 set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter)
+list(APPEND INSTRUMENTOR_TEST_DEPS precision-analysis)
 
 # Check if INSTRUMENTOR_SUPPORTED_ARCH is defined
 if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH)
diff --git a/compiler-rt/test/instrumentor-examples/lit.cfg.py b/compiler-rt/test/instrumentor-examples/lit.cfg.py
index 67cf4db671af3..92f63300180c4 100644
--- a/compiler-rt/test/instrumentor-examples/lit.cfg.py
+++ b/compiler-rt/test/instrumentor-examples/lit.cfg.py
@@ -59,6 +59,12 @@ def make_lib_name(name):
 flop_counter_lib = make_lib_name("flop_counter")
 config.substitutions.append(("%flop_counter_lib", flop_counter_lib))
 
+# Add path to Precision Analysis runtime library
+config.substitutions.append(("%precision_analysis_lib_dir", config.compiler_rt_libdir))
+
+precision_analysis_lib = make_lib_name("precision_analysis")
+config.substitutions.append(("%precision_analysis_lib", precision_analysis_lib))
+
 # Add path to instrumentor config files
 instrumentor_config_dir = os.path.join(
     config.test_source_root, "..", "..", "lib", "instrumentor-examples", "flop-counter"
diff --git a/compiler-rt/test/instrumentor-examples/precision_detailed.c b/compiler-rt/test/instrumentor-examples/precision_detailed.c
new file mode 100644
index 0000000000000..ddc30ac537a9b
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_detailed.c
@@ -0,0 +1,76 @@
+// Test precision analysis with detailed per-operation tracking
+//
+// This test demonstrates how the precision analysis tracks each operation
+// separately by ID and shows detailed statistics.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: TOTAL
+// CHECK: Column Legend:
+// CHECK: D->FP16:{{.*}}Double ops that can use FP16
+// CHECK: Summary by Original Precision:
+
+#include <stdio.h>
+
+// Each of these operations will get a unique ID
+// We can track their precision requirements separately
+
+double operation_a(double x, double y) {
+  // Simple addition - should work well with float
+  return x + y;
+}
+
+double operation_b(double x, double y) {
+  // Multiplication - should work well with float for normal ranges
+  return x * y;
+}
+
+double operation_c(double x, double y) {
+  // Division - might need more precision depending on values
+  return x / y;
+}
+
+double operation_d(double x) {
+  // Subtraction of close values - might need double precision
+  double y = x + 1e-6;
+  return y - x;
+}
+
+double complex_operation(double a, double b) {
+  // Multiple operations in sequence
+  double temp1 = a * b;     // Op 1
+  double temp2 = temp1 + a; // Op 2
+  double temp3 = temp2 / b; // Op 3
+  return temp3;
+}
+
+int main(void) {
+  double result = 0.0;
+
+  // Execute operations multiple times
+  // Each call site gets a unique operation ID
+  for (int i = 1; i < 20; i++) {
+    result += operation_a(i * 1.0, i * 2.0);
+    result += operation_b(i * 1.5, i * 0.5);
+    result += operation_c(i * 10.0, i * 2.0);
+    result += operation_d(i * 100.0);
+    result += complex_operation(i * 1.5, i * 2.5);
+  }
+
+  // Some operations with different value ranges
+  for (int i = 1; i < 10; i++) {
+    // Very small values - might need double precision
+    result += operation_a(i * 1e-5, i * 1e-5);
+    // Large values - might work with float
+    result += operation_b(i * 1e5, i * 1e-5);
+  }
+
+  if (result != 0.0) {
+    printf("Result: %.10f\n", result);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c b/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
new file mode 100644
index 0000000000000..8bb0029d664d2
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
@@ -0,0 +1,91 @@
+// Test precision analysis with fp16 overflow/underflow detection
+//
+// This test specifically exercises float operations that would overflow or
+// underflow when converted to fp16, verifying that the runtime correctly
+// distinguishes between input special values and lowering-induced special values.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F{{.*}}InpNaN{{.*}}LowNaN
+// CHECK: LowNaN:{{.*}}Operations where lowering caused overflow/underflow
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations with values that work in fp16 range
+// fp16 max is about 65504
+float small_float_ops(float a, float b) {
+  // These should be fine in fp16
+  return a + b;
+}
+
+// Float operations that will overflow in fp16
+float large_float_ops(float a, float b) {
+  // fp16 max is ~65504, these will overflow to inf
+  return a * b;
+}
+
+// Float operations that will underflow in fp16
+// fp16 min normal is about 6.1e-5
+float tiny_float_ops(float a, float b) {
+  // These will underflow to zero in fp16
+  return a * b;
+}
+
+// Operations with actual NaN/Inf inputs
+float special_input_ops(float a, float b) {
+  // These have special values in inputs
+  return a / b;
+}
+
+// Double operations with large values
+double large_double_ops(double a, double b) {
+  // float max is about 3.4e38, these will overflow
+  return a * b;
+}
+
+int main(void) {
+  float result_f = 0.0f;
+  double result_d = 0.0;
+
+  // Small float operations (should work in fp16)
+  for (int i = 1; i < 20; i++) {
+    result_f += small_float_ops(i * 1.5f, i * 2.5f);
+  }
+
+  // Large float operations (will overflow to inf in fp16)
+  for (int i = 1; i < 15; i++) {
+    float big = 10000.0f * i;
+    result_f += large_float_ops(big, big); // Result > 65504
+  }
+
+  // Tiny float operations (will underflow to 0 in fp16)
+  for (int i = 1; i < 15; i++) {
+    float tiny = 1e-4f / i;
+    result_f += tiny_float_ops(tiny, tiny); // Result < 6e-5
+  }
+
+  // Operations with NaN/Inf inputs
+  result_f += special_input_ops(1.0f, 0.0f); // Inf
+  result_f += special_input_ops(0.0f, 0.0f); // NaN
+
+  // Double operations that overflow in float
+  for (int i = 1; i < 10; i++) {
+    double huge = 1e38 * i;
+    result_d += large_double_ops(huge, huge); // Result > float_max
+  }
+
+  // Some normal double operations
+  for (int i = 1; i < 30; i++) {
+    result_d += i * 1.5 + i * 2.5;
+  }
+
+  if (!isnan(result_f) && !isnan(result_d)) {
+    printf("Computation complete\n");
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/precision_mixed.c b/compiler-rt/test/instrumentor-examples/precision_mixed.c
new file mode 100644
index 0000000000000..b0a9f37ae61c3
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_mixed.c
@@ -0,0 +1,66 @@
+// Test precision analysis with mixed float and double operations
+//
+// This test uses both float and double operations to verify that the
+// precision analysis handles both types correctly.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations (already using lower precision)
+float compute_float_distance(float x1, float y1, float x2, float y2) {
+  float dx = x2 - x1;
+  float dy = y2 - y1;
+  return sqrtf(dx * dx + dy * dy);
+}
+
+// Double operations (analyze if float would suffice)
+double compute_double_distance(double x1, double y1, double x2, double y2) {
+  double dx = x2 - x1;
+  double dy = y2 - y1;
+  return sqrt(dx * dx + dy * dy);
+}
+
+// Mixed precision computation
+double mixed_computation(float a, double b) {
+  // Implicit conversion from float to double
+  double a_double = a;
+  return a_double * b + a_double / b;
+}
+
+int main(void) {
+  float float_result = 0.0f;
+  double double_result = 0.0;
+
+  // Float operations
+  for (int i = 0; i < 50; i++) {
+    float_result += compute_float_distance(i * 0.1f, i * 0.2f, (i + 1) * 0.1f,
+                                           (i + 1) * 0.2f);
+  }
+
+  // Double operations with values that should work well in float
+  for (int i = 0; i < 50; i++) {
+    double_result +=
+        compute_double_distance(i * 0.1, i * 0.2, (i + 1) * 0.1, (i + 1) * 0.2);
+  }
+
+  // Mixed precision
+  for (int i = 1; i < 30; i++) {
+    double_result += mixed_computation(i * 1.5f, i * 2.5);
+  }
+
+  // Prevent optimization
+  if (float_result > 0.0f && double_result > 0.0) {
+    printf("Float result: %f, Double result: %f\n", float_result,
+           double_result);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/simple_precision.c b/compiler-rt/test/instrumentor-examples/simple_precision.c
new file mode 100644
index 0000000000000..1c4323ddaae15
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/simple_precision.c
@@ -0,0 +1,56 @@
+// Test basic precision analysis functionality
+//
+// This test verifies that the precision analysis runtime correctly identifies
+// operations that could use lower precision with acceptable accuracy.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Simple operations with large enough values that float precision is sufficient
+double simple_add(double a, double b) { return a + b; }
+
+double simple_mul(double a, double b) { return a * b; }
+
+double simple_div(double a, double b) { return a / b; }
+
+// Function that uses values where precision matters more
+double precise_computation(double x) {
+  // These operations on small differences might need double precision
+  double y = x + 1e-8;
+  double z = y - x;
+  return z * 1e8;
+}
+
+int main(void) {
+  double result = 0.0;
+
+  // Simple operations with "normal" range values
+  // These should generally work fine with float precision
+  for (int i = 0; i < 100; i++) {
+    result += simple_add(i * 1.5, i * 2.5);
+    result += simple_mul(i * 0.5, i * 0.5);
+    if (i > 0) {
+      result += simple_div(i * 10.0, i * 2.0);
+    }
+  }
+
+  // Operations that might require more precision
+  for (int i = 1; i < 50; i++) {
+    result += precise_computation(i * 1.0);
+  }
+
+  // Prevent optimization from removing the computations
+  if (result > 0.0) {
+    printf("Computation complete: %f\n", result);
+  }
+
+  return 0;
+}