[llvm-branch-commits] [compiler-rt] [Instrumentor] Add runtime examples: [2/N] A FP precision analysis (PR #205229)

Tue Jun 23 19:56:14 PDT 2026

https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/205229

>From 793557cad1832580a490c5f59ea976954cc3e18e Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert.llvm at gmail.com>
Date: Mon, 22 Jun 2026 17:52:58 -0700
Subject: [PATCH] [Instrumentor] Add runtime examples: [2/N] A FP precision
 analysis

Second example:
Check all floating point operations and track if they could be done at
lower precision.

Partially developped by Claude (AI), tested and verified by me.
---
 .../lib/instrumentor-tools/CMakeLists.txt     |   1 +
 .../precision-analysis/CMakeLists.txt         |  67 ++
 .../precision_analysis_config.json            |  43 ++
 .../precision_analysis_runtime.cpp            | 603 ++++++++++++++++++
 .../test/instrumentor-tools/CMakeLists.txt    |   1 +
 .../test/instrumentor-tools/lit.cfg.py        |   7 +-
 .../instrumentor-tools/precision_detailed.c   |  76 +++
 .../precision_fp16_overflow.c                 |  92 +++
 .../test/instrumentor-tools/precision_mixed.c |  66 ++
 .../instrumentor-tools/simple_precision.c     |  56 ++
 10 files changed, 1010 insertions(+), 2 deletions(-)
 create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
 create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
 create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
 create mode 100644 compiler-rt/test/instrumentor-tools/precision_detailed.c
 create mode 100644 compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
 create mode 100644 compiler-rt/test/instrumentor-tools/precision_mixed.c
 create mode 100644 compiler-rt/test/instrumentor-tools/simple_precision.c

diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
index 6f8e2fe352f5a..70b4b899124ad 100644
--- a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
+++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
@@ -9,3 +9,4 @@ add_compiler_rt_component(instrumentor-tools)
 
 # Add subdirectories for specific examples
 add_subdirectory(flop-counter)
+add_subdirectory(precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
new file mode 100644
index 0000000000000..42c4a8573df1c
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
@@ -0,0 +1,67 @@
+# CMakeLists.txt for Precision Analysis Example
+#
+# This example demonstrates analyzing whether floating-point operations
+# could use lower precision while maintaining acceptable accuracy using
+# the Instrumentor pass.
+
+add_compiler_rt_component(precision-analysis)
+
+set(PRECISION_ANALYSIS_SOURCES
+  precision_analysis_runtime.cpp
+  )
+
+set(PRECISION_ANALYSIS_HEADERS
+  )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(PRECISION_ANALYSIS_CFLAGS
+  ${COMPILER_RT_COMMON_CFLAGS}
+  -std=c++17
+  )
+
+# Determine supported architectures
+if(APPLE)
+  # On Darwin, use the darwin OSX architectures
+  set(PRECISION_ANALYSIS_SUPPORTED_ARCH arm64)
+  if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+    set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+  endif()
+  if(DARWIN_osx_ARCHS)
+    set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+  endif()
+else()
+  # For non-Apple platforms, use the default target architecture
+  set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH})
+endif()
+
+message(STATUS "Precision Analysis supported architectures: ${PRECISION_ANALYSIS_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+  add_compiler_rt_runtime(clang_rt.precision_analysis
+    STATIC
+    OS osx
+    ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+    CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+    SOURCES ${PRECISION_ANALYSIS_SOURCES}
+    ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+    PARENT_TARGET precision-analysis)
+else()
+  add_compiler_rt_runtime(clang_rt.precision_analysis
+    STATIC
+    ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+    CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+    SOURCES ${PRECISION_ANALYSIS_SOURCES}
+    ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+    PARENT_TARGET precision-analysis)
+endif()
+
+# Install the configuration file as a resource
+install(FILES precision_analysis_config.json
+        DESTINATION share/llvm/instrumentor-configs
+        COMPONENT precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
new file mode 100644
index 0000000000000..16a9298c98bee
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
@@ -0,0 +1,43 @@
+{
+  "configuration": {
+    "runtime_prefix": "__precision_analysis_",
+    "runtime_prefix.description": "The runtime API prefix.",
+    "runtime_stubs_file": "rt",
+    "target_regex": "",
+    "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.",
+    "function_regex": "",
+    "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.",
+    "demangle_function_names": true,
+    "demangle_function_names.description": "Demangle functions names passed to the runtime.",
+    "host_enabled": true,
+    "host_enabled.description": "Instrument non-GPU targets",
+    "gpu_enabled": false,
+    "gpu_enabled.description": "Instrument GPU targets"
+  },
+  "instruction_post": {
+    "numeric": {
+      "enabled": true,
+      "filter": "(type_id == 2 || type_id == 3) || ((type_id == 17 || type_id == 18) && (sub_type_id == 2 || sub_type_id == 3))",
+      "filter.description": "Only instrument float (type_id 2) and double (type_id 3) operations, or vectors of those types.",
+      "type_id": true,
+      "type_id.description": "The operation's type id.",
+      "sub_type_id": true,
+      "sub_type_id.description": "The operation's sub-type id (for vectors).",
+      "size": true,
+      "size.description": "The operation's type size.",
+      "opcode": true,
+      "opcode.description": "The instruction opcode.",
+      "left": true,
+      "left.description": "The operation's left operand.",
+      "right": true,
+      "right.description": "The operation's right operand. This value is poison for unary operations.",
+      "result": true,
+      "result.replace": false,
+      "result.description": "Result of the operation.",
+      "flags": true,
+      "flags.description": "A bitmask value signaling which instruction flags are present.",
+      "id": true,
+      "id.description": "A unique ID associated with the given instrumentor call"
+    }
+  }
+}
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
new file mode 100644
index 0000000000000..78f7ef9c0e491
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
@@ -0,0 +1,603 @@
+//===-- precision_analysis_runtime.cpp - Precision Analysis Runtime ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements runtime for analyzing whether floating-point operations
+// could be performed with lower precision while maintaining acceptable
+// accuracy. It instruments FP operations, simulates them with lower precision,
+// and compares results to determine if precision reduction is viable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <mutex>
+
+// Configuration: relative error threshold for "acceptable" lower precision
+// A result is considered acceptable if |result_lower - result_higher| /
+// |result_higher| < threshold
+static constexpr double DefaultRelativeErrorThreshold = 1e-3; // 0.1%
+
+// Per-operation statistics - tracks separately by original precision
+struct OperationStats {
+  uint64_t TotalCount; // Total number of times this operation executed
+
+  // Double-precision operations (started as double)
+  uint64_t DoubleToFp16;      // Double ops that can use fp16
+  uint64_t DoubleToFloat;     // Double ops that can use float (but not fp16)
+  uint64_t DoubleNeedsDouble; // Double ops that need double precision
+
+  // Float-precision operations (started as float)
+  uint64_t FloatToFp16;     // Float ops that can use fp16
+  uint64_t FloatNeedsFloat; // Float ops that need float precision
+
+  // Special values
+  uint64_t
+      InputSpecialValues; // Times when inputs had special values (NaN, Inf)
+  uint64_t DoubleLoweringSpecial; // Double ops where lowering caused overflow
+  uint64_t FloatLoweringSpecial;  // Float ops where lowering caused overflow
+};
+
+// Helper functions to get statistics map and mutex
+// Using function-local statics ensures proper initialization order
+// and avoids static destruction order fiasco.
+//
+// IMPORTANT: We use heap allocation (new) without delete to intentionally
+// "leak" these objects. This ensures they remain valid when the destructor
+// function runs at program exit, even if it runs after static destructors.
+// For a profiling tool that runs once and exits, this is acceptable.
+static std::map<int32_t, OperationStats> &getOperationStats() {
+  static std::map<int32_t, OperationStats> *Stats =
+      new std::map<int32_t, OperationStats>();
+  return *Stats;
+}
+
+static std::mutex &getStatsMutex() {
+  static std::mutex *Mutex = new std::mutex();
+  return *Mutex;
+}
+
+enum {
+  LLVM_OPCODE_FAdd = 15,
+  LLVM_OPCODE_FSub = 17,
+  LLVM_OPCODE_FMul = 19,
+  LLVM_OPCODE_FDiv = 22,
+  LLVM_OPCODE_FRem = 25,
+  LLVM_OPCODE_FNeg = 13,
+};
+
+// Helper: Convert float to fp16 (IEEE 754 half precision) and back
+// fp16 format: 1 sign bit, 5 exponent bits, 10 mantissa bits
+static inline float simulateFp16Precision(float Value) {
+  // Handle special cases
+  if (std::isnan(Value) || std::isinf(Value)) {
+    return Value;
+  }
+
+  uint32_t Bits;
+  std::memcpy(&Bits, &Value, sizeof(float));
+
+  uint32_t Sign = Bits & 0x80000000u;
+  int32_t Exponent = ((Bits >> 23) & 0xFF) - 127;
+  uint32_t Mantissa = Bits & 0x7FFFFFu;
+
+  // fp16 range: exponent -14 to +15 (biased 1 to 30)
+  // Underflow to zero
+  if (Exponent < -14) {
+    return Sign ? -0.0f : 0.0f;
+  }
+
+  // Overflow to infinity
+  if (Exponent > 15) {
+    return Sign ? -INFINITY : INFINITY;
+  }
+
+  // Round mantissa from 23 bits to 10 bits
+  uint32_t Fp16Mantissa = (Mantissa + 0x1000u) >> 13;
+  if (Fp16Mantissa > 0x3FF) {
+    // Rounding caused overflow
+    Fp16Mantissa = 0;
+    Exponent++;
+    if (Exponent > 15) {
+      return Sign ? -INFINITY : INFINITY;
+    }
+  }
+
+  // Reconstruct float with reduced precision
+  uint32_t Fp16Exponent = (Exponent + 127) & 0xFF;
+  uint32_t ResultBits = Sign | (Fp16Exponent << 23) | (Fp16Mantissa << 13);
+
+  float Result;
+  std::memcpy(&Result, &ResultBits, sizeof(float));
+  return Result;
+}
+
+// Helper: Check if value is special (NaN or Inf)
+static inline bool isSpecialValue(double Value) {
+  return std::isnan(Value) || std::isinf(Value);
+}
+
+static inline bool isSpecialValue(float Value) {
+  return std::isnan(Value) || std::isinf(Value);
+}
+
+// Helper: Compute relative error
+static inline double computeRelativeError(double Reference, double Test) {
+  if (Reference == 0.0) {
+    return (Test == 0.0) ? 0.0 : INFINITY;
+  }
+  return std::fabs((Test - Reference) / Reference);
+}
+
+// Helper: Perform operation with lower precision (double → float)
+static double simulateLowerPrecisionOp(int32_t Opcode, double Left,
+                                       double Right) {
+  float LeftF = static_cast<float>(Left);
+  float RightF = static_cast<float>(Right);
+  float ResultF = 0.0f;
+
+  switch (Opcode) {
+  case LLVM_OPCODE_FAdd:
+    ResultF = LeftF + RightF;
+    break;
+  case LLVM_OPCODE_FSub:
+    ResultF = LeftF - RightF;
+    break;
+  case LLVM_OPCODE_FMul:
+    ResultF = LeftF * RightF;
+    break;
+  case LLVM_OPCODE_FDiv:
+    ResultF = LeftF / RightF;
+    break;
+  case LLVM_OPCODE_FRem:
+    ResultF = std::fmod(LeftF, RightF);
+    break;
+  case LLVM_OPCODE_FNeg:
+    ResultF = -LeftF;
+    break;
+  default:
+    // For unknown operations, assume lower precision is not ok
+    return NAN;
+  }
+
+  return static_cast<double>(ResultF);
+}
+
+// Helper: Perform operation with fp16 precision (float → fp16)
+static float simulateFp16Op(int32_t Opcode, float Left, float Right) {
+  float LeftFp16 = simulateFp16Precision(Left);
+  float RightFp16 = simulateFp16Precision(Right);
+  float ResultFp16 = 0.0f;
+
+  switch (Opcode) {
+  case LLVM_OPCODE_FAdd:
+    ResultFp16 = LeftFp16 + RightFp16;
+    break;
+  case LLVM_OPCODE_FSub:
+    ResultFp16 = LeftFp16 - RightFp16;
+    break;
+  case LLVM_OPCODE_FMul:
+    ResultFp16 = LeftFp16 * RightFp16;
+    break;
+  case LLVM_OPCODE_FDiv:
+    ResultFp16 = LeftFp16 / RightFp16;
+    break;
+  case LLVM_OPCODE_FRem:
+    ResultFp16 = std::fmod(LeftFp16, RightFp16);
+    break;
+  case LLVM_OPCODE_FNeg:
+    ResultFp16 = -LeftFp16;
+    break;
+  default:
+    return NAN;
+  }
+
+  // Apply fp16 precision to result as well
+  return simulateFp16Precision(ResultFp16);
+}
+
+// Analyze a double-precision operation
+// Check if float precision would suffice, and if so, also check if fp16 would
+// work
+static void analyzeDoubleOperation(int32_t Opcode, double Left, double Right,
+                                   double Result, int32_t Id) {
+  std::lock_guard<std::mutex> Lock(getStatsMutex());
+
+  OperationStats &Stats = getOperationStats()[Id];
+  Stats.TotalCount++;
+
+  // Check for special values in inputs or result
+  if (isSpecialValue(Result) || isSpecialValue(Left) || isSpecialValue(Right)) {
+    Stats.InputSpecialValues++;
+    return;
+  }
+
+  // First, try double → float
+  double FloatResult = simulateLowerPrecisionOp(Opcode, Left, Right);
+
+  // Check if lowering to float created special values (overflow/underflow)
+  if (isSpecialValue(FloatResult)) {
+    Stats.DoubleNeedsDouble++;     // Float doesn't work, need to keep double
+    Stats.DoubleLoweringSpecial++; // Record that overflow occurred
+    return;
+  }
+
+  // Compare double vs float results
+  double FloatError = computeRelativeError(Result, FloatResult);
+
+  if (FloatError >= DefaultRelativeErrorThreshold) {
+    // Float precision is not sufficient, need double
+    Stats.DoubleNeedsDouble++;
+    return;
+  }
+
+  // Float precision is acceptable. Now check if fp16 would also work.
+  // Convert operands to float, then simulate fp16 operation
+  float LeftF = static_cast<float>(Left);
+  float RightF = static_cast<float>(Right);
+  float ResultF = static_cast<float>(Result);
+
+  float Fp16Result = simulateFp16Op(Opcode, LeftF, RightF);
+
+  // Check if lowering to fp16 created special values
+  if (isSpecialValue(Fp16Result)) {
+    // fp16 causes overflow/underflow, but float works (double → float)
+    Stats.DoubleToFloat++;         // Float is the lowest we can go
+    Stats.DoubleLoweringSpecial++; // Record that fp16 overflow occurred
+    return;
+  }
+
+  // Compare float vs fp16 results
+  double Fp16Error = computeRelativeError(static_cast<double>(ResultF),
+                                          static_cast<double>(Fp16Result));
+
+  if (Fp16Error < DefaultRelativeErrorThreshold) {
+    // fp16 precision is sufficient (double → fp16)
+    Stats.DoubleToFp16++;
+  } else {
+    // Need float precision but not double (double → float)
+    Stats.DoubleToFloat++;
+  }
+}
+
+// Analyze a float-precision operation (check if half precision would work)
+static void analyzeFloatOperation(int32_t Opcode, float Left, float Right,
+                                  float Result, int32_t Id) {
+  std::lock_guard<std::mutex> Lock(getStatsMutex());
+
+  OperationStats &Stats = getOperationStats()[Id];
+  Stats.TotalCount++;
+
+  // Check for special values in inputs or result
+  if (isSpecialValue(Result) || isSpecialValue(Left) || isSpecialValue(Right)) {
+    Stats.InputSpecialValues++;
+    return;
+  }
+
+  // Simulate operation with fp16 precision
+  float LowerPrecisionResult = simulateFp16Op(Opcode, Left, Right);
+
+  // Check if lowering precision created special values (overflow/underflow to
+  // inf)
+  if (isSpecialValue(LowerPrecisionResult)) {
+    Stats.FloatNeedsFloat++;      // FP16 doesn't work, need to keep float
+    Stats.FloatLoweringSpecial++; // Record that overflow occurred
+    return;
+  }
+
+  // Compare results
+  double RelativeError = computeRelativeError(
+      static_cast<double>(Result), static_cast<double>(LowerPrecisionResult));
+
+  if (RelativeError < DefaultRelativeErrorThreshold) {
+    // fp16 precision is sufficient (float → fp16)
+    Stats.FloatToFp16++;
+  } else {
+    // Need to keep float precision (float → float)
+    Stats.FloatNeedsFloat++;
+  }
+}
+
+extern "C" {
+
+__attribute__((destructor(1000))) void __precision_analysis_finalize() {
+  std::printf("\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf("            Floating-Point Precision Analysis Results\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf(
+      "This analysis checks minimum precision needed (error < %.2f%%):\n",
+      DefaultRelativeErrorThreshold * 100);
+  std::printf("  - Double operations: Try Float, then FP16 if Float works\n");
+  std::printf("  - Float operations: Try FP16\n");
+  std::printf("================================================================"
+              "==========\n\n");
+
+  std::map<int32_t, OperationStats> &OperationStatsMap = getOperationStats();
+
+  if (OperationStatsMap.empty()) {
+    std::printf("No operations analyzed.\n");
+    std::printf("=============================================================="
+                "============\n");
+    return;
+  }
+
+  uint64_t TotalOps = 0;
+  uint64_t TotalDoubleToFp16 = 0;
+  uint64_t TotalDoubleToFloat = 0;
+  uint64_t TotalDoubleNeedsDouble = 0;
+  uint64_t TotalFloatToFp16 = 0;
+  uint64_t TotalFloatNeedsFloat = 0;
+  uint64_t TotalInputSpecial = 0;
+  uint64_t TotalDoubleLoweringSpecial = 0;
+  uint64_t TotalFloatLoweringSpecial = 0;
+
+  std::printf("Per-Operation Results:\n");
+  std::printf("%-5s %8s %9s %8s %6s %9s %6s %8s %7s %7s\n", "Op ID", "Total",
+              "D->FP16", "D->F32", "D->D", "F->FP16", "F->F", "InpNaN",
+              "D-OvFl", "F-OvFl");
+  std::printf(
+      "-------------------------------------------------------------------"
+      "-------------\n");
+
+  for (const auto &Entry : OperationStatsMap) {
+    int32_t OpId = Entry.first;
+    const OperationStats &Stats = Entry.second;
+
+    TotalOps += Stats.TotalCount;
+    TotalDoubleToFp16 += Stats.DoubleToFp16;
+    TotalDoubleToFloat += Stats.DoubleToFloat;
+    TotalDoubleNeedsDouble += Stats.DoubleNeedsDouble;
+    TotalFloatToFp16 += Stats.FloatToFp16;
+    TotalFloatNeedsFloat += Stats.FloatNeedsFloat;
+    TotalInputSpecial += Stats.InputSpecialValues;
+    TotalDoubleLoweringSpecial += Stats.DoubleLoweringSpecial;
+    TotalFloatLoweringSpecial += Stats.FloatLoweringSpecial;
+
+    std::printf("%-5d %8llu %9llu %8llu %6llu %9llu %6llu %8llu %7llu %7llu\n",
+                OpId, Stats.TotalCount, Stats.DoubleToFp16, Stats.DoubleToFloat,
+                Stats.DoubleNeedsDouble, Stats.FloatToFp16,
+                Stats.FloatNeedsFloat, Stats.InputSpecialValues,
+                Stats.DoubleLoweringSpecial, Stats.FloatLoweringSpecial);
+  }
+
+  std::printf(
+      "-------------------------------------------------------------------"
+      "-------------\n");
+  std::printf("%-5s %8llu %9llu %8llu %6llu %9llu %6llu %8llu %7llu %7llu\n",
+              "TOTAL", TotalOps, TotalDoubleToFp16, TotalDoubleToFloat,
+              TotalDoubleNeedsDouble, TotalFloatToFp16, TotalFloatNeedsFloat,
+              TotalInputSpecial, TotalDoubleLoweringSpecial,
+              TotalFloatLoweringSpecial);
+
+  std::printf("\n");
+  std::printf("Column Legend:\n");
+  std::printf("  D->FP16:  Double ops that can use FP16 (16-bit)\n");
+  std::printf(
+      "  D->F32:   Double ops that can use Float (32-bit) but not FP16\n");
+  std::printf("  D->D:     Double ops that require Double (64-bit)\n");
+  std::printf("  F->FP16:  Float ops that can use FP16 (16-bit)\n");
+  std::printf("  F->F:     Float ops that must stay Float (32-bit)\n");
+  std::printf("  InpNaN:   Operations with NaN/Inf in inputs or result\n");
+  std::printf("  D-OvFl:   Double ops where lowering caused overflow\n");
+  std::printf("  F-OvFl:   Float ops where lowering to FP16 caused overflow\n");
+
+  uint64_t TotalDoubleOps =
+      TotalDoubleToFp16 + TotalDoubleToFloat + TotalDoubleNeedsDouble;
+  uint64_t TotalFloatOps = TotalFloatToFp16 + TotalFloatNeedsFloat;
+  uint64_t AnalyzedTotal = TotalDoubleOps + TotalFloatOps;
+
+  std::printf("\n");
+  std::printf("================================================================"
+              "==========\n");
+  std::printf("Summary by Original Precision:\n");
+  std::printf("================================================================"
+              "==========\n");
+
+  if (TotalDoubleOps > 0) {
+    std::printf("\nDOUBLE Operations (started as 64-bit double):\n");
+    std::printf("  Total:                              %llu\n", TotalDoubleOps);
+    std::printf("  Can reduce to FP16 (16-bit):        %llu (%.1f%%)\n",
+                TotalDoubleToFp16, 100.0 * TotalDoubleToFp16 / TotalDoubleOps);
+    std::printf("  Can reduce to Float (32-bit):       %llu (%.1f%%)\n",
+                TotalDoubleToFloat,
+                100.0 * TotalDoubleToFloat / TotalDoubleOps);
+    std::printf("  Must keep Double (64-bit):          %llu (%.1f%%)\n",
+                TotalDoubleNeedsDouble,
+                100.0 * TotalDoubleNeedsDouble / TotalDoubleOps);
+
+    uint64_t DoubleConvertible = TotalDoubleToFp16 + TotalDoubleToFloat;
+    std::printf("  → Total convertible to lower:       %llu (%.1f%%)\n",
+                DoubleConvertible, 100.0 * DoubleConvertible / TotalDoubleOps);
+  }
+
+  if (TotalFloatOps > 0) {
+    std::printf("\nFLOAT Operations (started as 32-bit float):\n");
+    std::printf("  Total:                              %llu\n", TotalFloatOps);
+    std::printf("  Can reduce to FP16 (16-bit):        %llu (%.1f%%)\n",
+                TotalFloatToFp16, 100.0 * TotalFloatToFp16 / TotalFloatOps);
+    std::printf("  Must keep Float (32-bit):           %llu (%.1f%%)\n",
+                TotalFloatNeedsFloat,
+                100.0 * TotalFloatNeedsFloat / TotalFloatOps);
+  }
+
+  std::printf("\nOVERALL Statistics:\n");
+  std::printf("  Total analyzed operations:          %llu\n", AnalyzedTotal);
+  std::printf("  Operations with input NaN/Inf:      %llu\n",
+              TotalInputSpecial);
+  std::printf("  Double ops causing overflow:        %llu\n",
+              TotalDoubleLoweringSpecial);
+  std::printf("  Float ops causing overflow:         %llu\n",
+              TotalFloatLoweringSpecial);
+
+  if (AnalyzedTotal > 0) {
+    uint64_t TotalToFp16 = TotalDoubleToFp16 + TotalFloatToFp16;
+    std::printf("\n  ALL operations reducible to FP16:   %llu (%.1f%%)\n",
+                TotalToFp16, 100.0 * TotalToFp16 / AnalyzedTotal);
+  }
+
+  // Provide recommendations based on results
+  std::printf("\n=============================================================="
+              "============\n");
+  std::printf("Recommendations:\n");
+  std::printf("================================================================"
+              "==========\n");
+
+  if (TotalDoubleOps > 0) {
+    // Include overflow operations in total for realistic assessment
+    uint64_t TotalDoubleWithOverflow =
+        TotalDoubleOps + TotalDoubleLoweringSpecial;
+    double DoubleToLower = 100.0 * (TotalDoubleToFp16 + TotalDoubleToFloat) /
+                           TotalDoubleWithOverflow;
+    double OverflowPct =
+        100.0 * TotalDoubleLoweringSpecial / TotalDoubleWithOverflow;
+
+    std::printf("\nFor DOUBLE operations:\n");
+    std::printf("  Analyzed: %llu (%.1f%% overflow, not convertible)\n",
+                TotalDoubleWithOverflow, OverflowPct);
+
+    if (DoubleToLower > 80.0) {
+      std::printf(
+          "  ✓ %.1f%% can use lower precision - strong conversion candidate\n",
+          DoubleToLower);
+      if (TotalDoubleToFp16 > TotalDoubleToFloat) {
+        std::printf("  ✓ Many can go directly to FP16 - consider aggressive "
+                    "downcasting\n");
+      } else {
+        std::printf(
+            "  ✓ Most need Float - consider using f32 instead of f64\n");
+      }
+      if (TotalDoubleLoweringSpecial > 0 && OverflowPct > 5.0) {
+        std::printf(
+            "  ⚠ %.1f%% overflow - may need value scaling/normalization\n",
+            OverflowPct);
+      }
+    } else if (DoubleToLower > 50.0) {
+      std::printf(
+          "  ~ %.1f%% can use lower precision - mixed precision recommended\n",
+          DoubleToLower);
+      if (TotalDoubleLoweringSpecial > 0) {
+        std::printf("  ⚠ %.1f%% overflow - limits conversion opportunities\n",
+                    OverflowPct);
+      }
+    } else {
+      std::printf("  ✗ Only %.1f%% can use lower precision - keep double\n",
+                  DoubleToLower);
+      if (TotalDoubleLoweringSpecial > TotalDoubleNeedsDouble) {
+        std::printf("  ! Most failures due to overflow (%.1f%%) rather than "
+                    "accuracy (%llu ops)\n",
+                    OverflowPct, TotalDoubleNeedsDouble);
+        std::printf("  → Problem is value range, not precision\n");
+      }
+    }
+  }
+
+  if (TotalFloatOps > 0) {
+    // Include overflow operations in total for realistic assessment
+    uint64_t TotalFloatWithOverflow = TotalFloatOps + TotalFloatLoweringSpecial;
+    double FloatToFp16Pct = 100.0 * TotalFloatToFp16 / TotalFloatWithOverflow;
+    double FloatOverflowPct =
+        100.0 * TotalFloatLoweringSpecial / TotalFloatWithOverflow;
+
+    std::printf("\nFor FLOAT operations:\n");
+    std::printf("  Analyzed: %llu (%.1f%% overflow to FP16)\n",
+                TotalFloatWithOverflow, FloatOverflowPct);
+
+    if (FloatToFp16Pct > 80.0) {
+      std::printf(
+          "  ✓ %.1f%% can use FP16 - strong FP16 conversion candidate\n",
+          FloatToFp16Pct);
+      if (TotalFloatLoweringSpecial > 0 && FloatOverflowPct > 5.0) {
+        std::printf("  ⚠ %.1f%% overflow (values exceed FP16 range ±65504)\n",
+                    FloatOverflowPct);
+      }
+    } else if (FloatToFp16Pct > 50.0) {
+      std::printf("  ~ %.1f%% can use FP16 - selective FP16 use recommended\n",
+                  FloatToFp16Pct);
+      if (TotalFloatLoweringSpecial > 0) {
+        std::printf("  ⚠ %.1f%% overflow - limits FP16 opportunities\n",
+                    FloatOverflowPct);
+      }
+    } else {
+      std::printf("  ✗ Only %.1f%% can use FP16 - keep float\n",
+                  FloatToFp16Pct);
+      if (TotalFloatLoweringSpecial > TotalFloatNeedsFloat) {
+        std::printf("  ! Most failures due to FP16 overflow (%.1f%%) rather "
+                    "than accuracy (%llu ops)\n",
+                    FloatOverflowPct, TotalFloatNeedsFloat);
+        std::printf("  → Problem: Values exceed FP16 range (±65504)\n");
+        std::printf("  → Solution: Scale values or use Float\n");
+      }
+    }
+  }
+
+  std::printf("================================================================"
+              "==========\n");
+}
+
+void __precision_analysis_post_numeric(int32_t type_id, int32_t sub_type_id,
+                                       int32_t size, int32_t opcode,
+                                       int64_t left, int64_t right,
+                                       int64_t result, int64_t flags,
+                                       int32_t id) {
+  // Handle vector types by looking at sub_type_id
+  bool IsVector = false;
+  int32_t ElementTypeId = type_id;
+
+  switch (type_id) {
+  case FixedVectorTyID:
+  case ScalableVectorTyID:
+    IsVector = true;
+    ElementTypeId = sub_type_id;
+    break;
+  default:
+    break;
+  }
+
+  // For vector operations, we'd need to extract each element
+  // For now, skip vector operations (they're more complex)
+  if (IsVector) {
+    return;
+  }
+
+  // Analyze based on type
+  if (ElementTypeId == DoubleTyID) {
+    // Double precision operation - check if float would suffice
+    double LeftVal = *reinterpret_cast<double *>(&left);
+    double RightVal = *reinterpret_cast<double *>(&right);
+    double ResultVal = *reinterpret_cast<double *>(&result);
+
+    analyzeDoubleOperation(opcode, LeftVal, RightVal, ResultVal, id);
+  } else if (ElementTypeId == FloatTyID) {
+    // Float precision operation - could check if half would suffice
+    float LeftVal = *reinterpret_cast<float *>(&left);
+    float RightVal = *reinterpret_cast<float *>(&right);
+    float ResultVal = *reinterpret_cast<float *>(&result);
+
+    analyzeFloatOperation(opcode, LeftVal, RightVal, ResultVal, id);
+  }
+  // Skip other types (half, bfloat, extended precision)
+}
+
+void __precision_analysis_post_numeric_ind(int32_t type_id, int32_t sub_type_id,
+                                           int32_t size, int32_t opcode,
+                                           int64_t *left_ptr,
+                                           int64_t *right_ptr,
+                                           int64_t *result_ptr, int64_t flags,
+                                           int32_t id) {}
+
+} // extern "C"
diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
index cda35a017fed5..402fb644f3d0a 100644
--- a/compiler-rt/test/instrumentor-tools/CMakeLists.txt
+++ b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
@@ -5,6 +5,7 @@ set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
 set(INSTRUMENTOR_TESTSUITES)
 set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter)
+list(APPEND INSTRUMENTOR_TEST_DEPS precision-analysis)
 
 # Check if INSTRUMENTOR_SUPPORTED_ARCH is defined
 if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH)
diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py b/compiler-rt/test/instrumentor-tools/lit.cfg.py
index 24a4897efbbb5..48ea19f01de38 100644
--- a/compiler-rt/test/instrumentor-tools/lit.cfg.py
+++ b/compiler-rt/test/instrumentor-tools/lit.cfg.py
@@ -50,13 +50,16 @@ def make_lib_name(name):
         return "clang_rt." + name
     return "clang_rt." + name + "_osx"
 
+def make_lib_substitutions(name):
+    config.substitutions.append(("%" + name + "_lib", make_lib_name(name)))
 
 # Add clang substitutions.
 config.substitutions.append(("%clang ", build_invocation(clang_cflags)))
 config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags)))
 
-flop_counter_lib = make_lib_name("flop_counter")
-config.substitutions.append(("%flop_counter_lib", flop_counter_lib))
+tools = ["flop_counter", "precision_analysis"]
+for tool in tools:
+    make_lib_substitutions(tool)
 
 config.substitutions.append(("%lib_dir", config.compiler_rt_libdir))
 
diff --git a/compiler-rt/test/instrumentor-tools/precision_detailed.c b/compiler-rt/test/instrumentor-tools/precision_detailed.c
new file mode 100644
index 0000000000000..cde58ab6182c0
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_detailed.c
@@ -0,0 +1,76 @@
+// Test precision analysis with detailed per-operation tracking
+//
+// This test demonstrates how the precision analysis tracks each operation
+// separately by ID and shows detailed statistics.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: TOTAL
+// CHECK: Column Legend:
+// CHECK: D->FP16:{{.*}}Double ops that can use FP16
+// CHECK: Summary by Original Precision:
+
+#include <stdio.h>
+
+// Each of these operations will get a unique ID
+// We can track their precision requirements separately
+
+double operation_a(double x, double y) {
+  // Simple addition - should work well with float
+  return x + y;
+}
+
+double operation_b(double x, double y) {
+  // Multiplication - should work well with float for normal ranges
+  return x * y;
+}
+
+double operation_c(double x, double y) {
+  // Division - might need more precision depending on values
+  return x / y;
+}
+
+double operation_d(double x) {
+  // Subtraction of close values - might need double precision
+  double y = x + 1e-6;
+  return y - x;
+}
+
+double complex_operation(double a, double b) {
+  // Multiple operations in sequence
+  double temp1 = a * b;     // Op 1
+  double temp2 = temp1 + a; // Op 2
+  double temp3 = temp2 / b; // Op 3
+  return temp3;
+}
+
+int main(void) {
+  double result = 0.0;
+
+  // Execute operations multiple times
+  // Each call site gets a unique operation ID
+  for (int i = 1; i < 20; i++) {
+    result += operation_a(i * 1.0, i * 2.0);
+    result += operation_b(i * 1.5, i * 0.5);
+    result += operation_c(i * 10.0, i * 2.0);
+    result += operation_d(i * 100.0);
+    result += complex_operation(i * 1.5, i * 2.5);
+  }
+
+  // Some operations with different value ranges
+  for (int i = 1; i < 10; i++) {
+    // Very small values - might need double precision
+    result += operation_a(i * 1e-5, i * 1e-5);
+    // Large values - might work with float
+    result += operation_b(i * 1e5, i * 1e-5);
+  }
+
+  if (result != 0.0) {
+    printf("Result: %.10f\n", result);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c b/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
new file mode 100644
index 0000000000000..e49853b13fa00
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
@@ -0,0 +1,92 @@
+// Test precision analysis with fp16 overflow/underflow detection
+//
+// This test specifically exercises float operations that would overflow or
+// underflow when converted to fp16, verifying that the runtime correctly
+// distinguishes between input special values and lowering-induced special values.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F{{.*}}InpNaN{{.*}}D-OvFl{{.*}}F-OvFl
+// CHECK: D-OvFl:{{.*}}Double ops where lowering caused overflow
+// CHECK: F-OvFl:{{.*}}Float ops where lowering to FP16 caused overflow
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations with values that work in fp16 range
+// fp16 max is about 65504
+float small_float_ops(float a, float b) {
+  // These should be fine in fp16
+  return a + b;
+}
+
+// Float operations that will overflow in fp16
+float large_float_ops(float a, float b) {
+  // fp16 max is ~65504, these will overflow to inf
+  return a * b;
+}
+
+// Float operations that will underflow in fp16
+// fp16 min normal is about 6.1e-5
+float tiny_float_ops(float a, float b) {
+  // These will underflow to zero in fp16
+  return a * b;
+}
+
+// Operations with actual NaN/Inf inputs
+float special_input_ops(float a, float b) {
+  // These have special values in inputs
+  return a / b;
+}
+
+// Double operations with large values
+double large_double_ops(double a, double b) {
+  // float max is about 3.4e38, these will overflow
+  return a * b;
+}
+
+int main(void) {
+  float result_f = 0.0f;
+  double result_d = 0.0;
+
+  // Small float operations (should work in fp16)
+  for (int i = 1; i < 20; i++) {
+    result_f += small_float_ops(i * 1.5f, i * 2.5f);
+  }
+
+  // Large float operations (will overflow to inf in fp16)
+  for (int i = 1; i < 15; i++) {
+    float big = 10000.0f * i;
+    result_f += large_float_ops(big, big); // Result > 65504
+  }
+
+  // Tiny float operations (will underflow to 0 in fp16)
+  for (int i = 1; i < 15; i++) {
+    float tiny = 1e-4f / i;
+    result_f += tiny_float_ops(tiny, tiny); // Result < 6e-5
+  }
+
+  // Operations with NaN/Inf inputs
+  result_f += special_input_ops(1.0f, 0.0f); // Inf
+  result_f += special_input_ops(0.0f, 0.0f); // NaN
+
+  // Double operations that overflow in float
+  for (int i = 1; i < 10; i++) {
+    double huge = 1e38 * i;
+    result_d += large_double_ops(huge, huge); // Result > float_max
+  }
+
+  // Some normal double operations
+  for (int i = 1; i < 30; i++) {
+    result_d += i * 1.5 + i * 2.5;
+  }
+
+  if (!isnan(result_f) && !isnan(result_d)) {
+    printf("Computation complete\n");
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/precision_mixed.c b/compiler-rt/test/instrumentor-tools/precision_mixed.c
new file mode 100644
index 0000000000000..6475ed9cfed3c
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_mixed.c
@@ -0,0 +1,66 @@
+// Test precision analysis with mixed float and double operations
+//
+// This test uses both float and double operations to verify that the
+// precision analysis handles both types correctly.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations (already using lower precision)
+float compute_float_distance(float x1, float y1, float x2, float y2) {
+  float dx = x2 - x1;
+  float dy = y2 - y1;
+  return sqrtf(dx * dx + dy * dy);
+}
+
+// Double operations (analyze if float would suffice)
+double compute_double_distance(double x1, double y1, double x2, double y2) {
+  double dx = x2 - x1;
+  double dy = y2 - y1;
+  return sqrt(dx * dx + dy * dy);
+}
+
+// Mixed precision computation
+double mixed_computation(float a, double b) {
+  // Implicit conversion from float to double
+  double a_double = a;
+  return a_double * b + a_double / b;
+}
+
+int main(void) {
+  float float_result = 0.0f;
+  double double_result = 0.0;
+
+  // Float operations
+  for (int i = 0; i < 50; i++) {
+    float_result += compute_float_distance(i * 0.1f, i * 0.2f, (i + 1) * 0.1f,
+                                           (i + 1) * 0.2f);
+  }
+
+  // Double operations with values that should work well in float
+  for (int i = 0; i < 50; i++) {
+    double_result +=
+        compute_double_distance(i * 0.1, i * 0.2, (i + 1) * 0.1, (i + 1) * 0.2);
+  }
+
+  // Mixed precision
+  for (int i = 1; i < 30; i++) {
+    double_result += mixed_computation(i * 1.5f, i * 2.5);
+  }
+
+  // Prevent optimization
+  if (float_result > 0.0f && double_result > 0.0) {
+    printf("Float result: %f, Double result: %f\n", float_result,
+           double_result);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/simple_precision.c b/compiler-rt/test/instrumentor-tools/simple_precision.c
new file mode 100644
index 0000000000000..5c03ccaedffee
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/simple_precision.c
@@ -0,0 +1,56 @@
+// Test basic precision analysis functionality
+//
+// This test verifies that the precision analysis runtime correctly identifies
+// operations that could use lower precision with acceptable accuracy.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Simple operations with large enough values that float precision is sufficient
+double simple_add(double a, double b) { return a + b; }
+
+double simple_mul(double a, double b) { return a * b; }
+
+double simple_div(double a, double b) { return a / b; }
+
+// Function that uses values where precision matters more
+double precise_computation(double x) {
+  // These operations on small differences might need double precision
+  double y = x + 1e-8;
+  double z = y - x;
+  return z * 1e8;
+}
+
+int main(void) {
+  double result = 0.0;
+
+  // Simple operations with "normal" range values
+  // These should generally work fine with float precision
+  for (int i = 0; i < 100; i++) {
+    result += simple_add(i * 1.5, i * 2.5);
+    result += simple_mul(i * 0.5, i * 0.5);
+    if (i > 0) {
+      result += simple_div(i * 10.0, i * 2.0);
+    }
+  }
+
+  // Operations that might require more precision
+  for (int i = 1; i < 50; i++) {
+    result += precise_computation(i * 1.0);
+  }
+
+  // Prevent optimization from removing the computations
+  if (result > 0.0) {
+    printf("Computation complete: %f\n", result);
+  }
+
+  return 0;
+}