[llvm-branch-commits] [compiler-rt] [Instrumentor] Add runtime examples: [2/N] A FP precision analysis (PR #205229)
Johannes Doerfert via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jun 23 19:56:14 PDT 2026
https://github.com/jdoerfert updated https://github.com/llvm/llvm-project/pull/205229
>From 793557cad1832580a490c5f59ea976954cc3e18e Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert.llvm at gmail.com>
Date: Mon, 22 Jun 2026 17:52:58 -0700
Subject: [PATCH] [Instrumentor] Add runtime examples: [2/N] A FP precision
analysis
Second example:
Check all floating point operations and track if they could be done at
lower precision.
Partially developped by Claude (AI), tested and verified by me.
---
.../lib/instrumentor-tools/CMakeLists.txt | 1 +
.../precision-analysis/CMakeLists.txt | 67 ++
.../precision_analysis_config.json | 43 ++
.../precision_analysis_runtime.cpp | 603 ++++++++++++++++++
.../test/instrumentor-tools/CMakeLists.txt | 1 +
.../test/instrumentor-tools/lit.cfg.py | 7 +-
.../instrumentor-tools/precision_detailed.c | 76 +++
.../precision_fp16_overflow.c | 92 +++
.../test/instrumentor-tools/precision_mixed.c | 66 ++
.../instrumentor-tools/simple_precision.c | 56 ++
10 files changed, 1010 insertions(+), 2 deletions(-)
create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
create mode 100644 compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
create mode 100644 compiler-rt/test/instrumentor-tools/precision_detailed.c
create mode 100644 compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
create mode 100644 compiler-rt/test/instrumentor-tools/precision_mixed.c
create mode 100644 compiler-rt/test/instrumentor-tools/simple_precision.c
diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
index 6f8e2fe352f5a..70b4b899124ad 100644
--- a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
+++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt
@@ -9,3 +9,4 @@ add_compiler_rt_component(instrumentor-tools)
# Add subdirectories for specific examples
add_subdirectory(flop-counter)
+add_subdirectory(precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
new file mode 100644
index 0000000000000..42c4a8573df1c
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/CMakeLists.txt
@@ -0,0 +1,67 @@
+# CMakeLists.txt for Precision Analysis Example
+#
+# This example demonstrates analyzing whether floating-point operations
+# could use lower precision while maintaining acceptable accuracy using
+# the Instrumentor pass.
+
+add_compiler_rt_component(precision-analysis)
+
+set(PRECISION_ANALYSIS_SOURCES
+ precision_analysis_runtime.cpp
+ )
+
+set(PRECISION_ANALYSIS_HEADERS
+ )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(PRECISION_ANALYSIS_CFLAGS
+ ${COMPILER_RT_COMMON_CFLAGS}
+ -std=c++17
+ )
+
+# Determine supported architectures
+if(APPLE)
+ # On Darwin, use the darwin OSX architectures
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH arm64)
+ if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+ endif()
+ if(DARWIN_osx_ARCHS)
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+ endif()
+else()
+ # For non-Apple platforms, use the default target architecture
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH})
+endif()
+
+message(STATUS "Precision Analysis supported architectures: ${PRECISION_ANALYSIS_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+ add_compiler_rt_runtime(clang_rt.precision_analysis
+ STATIC
+ OS osx
+ ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+ CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+ SOURCES ${PRECISION_ANALYSIS_SOURCES}
+ ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+ PARENT_TARGET precision-analysis)
+else()
+ add_compiler_rt_runtime(clang_rt.precision_analysis
+ STATIC
+ ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+ CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+ SOURCES ${PRECISION_ANALYSIS_SOURCES}
+ ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+ PARENT_TARGET precision-analysis)
+endif()
+
+# Install the configuration file as a resource
+install(FILES precision_analysis_config.json
+ DESTINATION share/llvm/instrumentor-configs
+ COMPONENT precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
new file mode 100644
index 0000000000000..16a9298c98bee
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_config.json
@@ -0,0 +1,43 @@
+{
+ "configuration": {
+ "runtime_prefix": "__precision_analysis_",
+ "runtime_prefix.description": "The runtime API prefix.",
+ "runtime_stubs_file": "rt",
+ "target_regex": "",
+ "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.",
+ "function_regex": "",
+ "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.",
+ "demangle_function_names": true,
+ "demangle_function_names.description": "Demangle functions names passed to the runtime.",
+ "host_enabled": true,
+ "host_enabled.description": "Instrument non-GPU targets",
+ "gpu_enabled": false,
+ "gpu_enabled.description": "Instrument GPU targets"
+ },
+ "instruction_post": {
+ "numeric": {
+ "enabled": true,
+ "filter": "(type_id == 2 || type_id == 3) || ((type_id == 17 || type_id == 18) && (sub_type_id == 2 || sub_type_id == 3))",
+ "filter.description": "Only instrument float (type_id 2) and double (type_id 3) operations, or vectors of those types.",
+ "type_id": true,
+ "type_id.description": "The operation's type id.",
+ "sub_type_id": true,
+ "sub_type_id.description": "The operation's sub-type id (for vectors).",
+ "size": true,
+ "size.description": "The operation's type size.",
+ "opcode": true,
+ "opcode.description": "The instruction opcode.",
+ "left": true,
+ "left.description": "The operation's left operand.",
+ "right": true,
+ "right.description": "The operation's right operand. This value is poison for unary operations.",
+ "result": true,
+ "result.replace": false,
+ "result.description": "Result of the operation.",
+ "flags": true,
+ "flags.description": "A bitmask value signaling which instruction flags are present.",
+ "id": true,
+ "id.description": "A unique ID associated with the given instrumentor call"
+ }
+ }
+}
diff --git a/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
new file mode 100644
index 0000000000000..78f7ef9c0e491
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-tools/precision-analysis/precision_analysis_runtime.cpp
@@ -0,0 +1,603 @@
+//===-- precision_analysis_runtime.cpp - Precision Analysis Runtime ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements runtime for analyzing whether floating-point operations
+// could be performed with lower precision while maintaining acceptable
+// accuracy. It instruments FP operations, simulates them with lower precision,
+// and compares results to determine if precision reduction is viable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <mutex>
+
+// Configuration: relative error threshold for "acceptable" lower precision
+// A result is considered acceptable if |result_lower - result_higher| /
+// |result_higher| < threshold
+static constexpr double DefaultRelativeErrorThreshold = 1e-3; // 0.1%
+
+// Per-operation statistics - tracks separately by original precision
+struct OperationStats {
+ uint64_t TotalCount; // Total number of times this operation executed
+
+ // Double-precision operations (started as double)
+ uint64_t DoubleToFp16; // Double ops that can use fp16
+ uint64_t DoubleToFloat; // Double ops that can use float (but not fp16)
+ uint64_t DoubleNeedsDouble; // Double ops that need double precision
+
+ // Float-precision operations (started as float)
+ uint64_t FloatToFp16; // Float ops that can use fp16
+ uint64_t FloatNeedsFloat; // Float ops that need float precision
+
+ // Special values
+ uint64_t
+ InputSpecialValues; // Times when inputs had special values (NaN, Inf)
+ uint64_t DoubleLoweringSpecial; // Double ops where lowering caused overflow
+ uint64_t FloatLoweringSpecial; // Float ops where lowering caused overflow
+};
+
+// Helper functions to get statistics map and mutex
+// Using function-local statics ensures proper initialization order
+// and avoids static destruction order fiasco.
+//
+// IMPORTANT: We use heap allocation (new) without delete to intentionally
+// "leak" these objects. This ensures they remain valid when the destructor
+// function runs at program exit, even if it runs after static destructors.
+// For a profiling tool that runs once and exits, this is acceptable.
+static std::map<int32_t, OperationStats> &getOperationStats() {
+ static std::map<int32_t, OperationStats> *Stats =
+ new std::map<int32_t, OperationStats>();
+ return *Stats;
+}
+
+static std::mutex &getStatsMutex() {
+ static std::mutex *Mutex = new std::mutex();
+ return *Mutex;
+}
+
+enum {
+ LLVM_OPCODE_FAdd = 15,
+ LLVM_OPCODE_FSub = 17,
+ LLVM_OPCODE_FMul = 19,
+ LLVM_OPCODE_FDiv = 22,
+ LLVM_OPCODE_FRem = 25,
+ LLVM_OPCODE_FNeg = 13,
+};
+
+// Helper: Convert float to fp16 (IEEE 754 half precision) and back
+// fp16 format: 1 sign bit, 5 exponent bits, 10 mantissa bits
+static inline float simulateFp16Precision(float Value) {
+ // Handle special cases
+ if (std::isnan(Value) || std::isinf(Value)) {
+ return Value;
+ }
+
+ uint32_t Bits;
+ std::memcpy(&Bits, &Value, sizeof(float));
+
+ uint32_t Sign = Bits & 0x80000000u;
+ int32_t Exponent = ((Bits >> 23) & 0xFF) - 127;
+ uint32_t Mantissa = Bits & 0x7FFFFFu;
+
+ // fp16 range: exponent -14 to +15 (biased 1 to 30)
+ // Underflow to zero
+ if (Exponent < -14) {
+ return Sign ? -0.0f : 0.0f;
+ }
+
+ // Overflow to infinity
+ if (Exponent > 15) {
+ return Sign ? -INFINITY : INFINITY;
+ }
+
+ // Round mantissa from 23 bits to 10 bits
+ uint32_t Fp16Mantissa = (Mantissa + 0x1000u) >> 13;
+ if (Fp16Mantissa > 0x3FF) {
+ // Rounding caused overflow
+ Fp16Mantissa = 0;
+ Exponent++;
+ if (Exponent > 15) {
+ return Sign ? -INFINITY : INFINITY;
+ }
+ }
+
+ // Reconstruct float with reduced precision
+ uint32_t Fp16Exponent = (Exponent + 127) & 0xFF;
+ uint32_t ResultBits = Sign | (Fp16Exponent << 23) | (Fp16Mantissa << 13);
+
+ float Result;
+ std::memcpy(&Result, &ResultBits, sizeof(float));
+ return Result;
+}
+
+// Helper: Check if value is special (NaN or Inf)
+static inline bool isSpecialValue(double Value) {
+ return std::isnan(Value) || std::isinf(Value);
+}
+
+static inline bool isSpecialValue(float Value) {
+ return std::isnan(Value) || std::isinf(Value);
+}
+
+// Helper: Compute relative error
+static inline double computeRelativeError(double Reference, double Test) {
+ if (Reference == 0.0) {
+ return (Test == 0.0) ? 0.0 : INFINITY;
+ }
+ return std::fabs((Test - Reference) / Reference);
+}
+
+// Helper: Perform operation with lower precision (double → float)
+static double simulateLowerPrecisionOp(int32_t Opcode, double Left,
+ double Right) {
+ float LeftF = static_cast<float>(Left);
+ float RightF = static_cast<float>(Right);
+ float ResultF = 0.0f;
+
+ switch (Opcode) {
+ case LLVM_OPCODE_FAdd:
+ ResultF = LeftF + RightF;
+ break;
+ case LLVM_OPCODE_FSub:
+ ResultF = LeftF - RightF;
+ break;
+ case LLVM_OPCODE_FMul:
+ ResultF = LeftF * RightF;
+ break;
+ case LLVM_OPCODE_FDiv:
+ ResultF = LeftF / RightF;
+ break;
+ case LLVM_OPCODE_FRem:
+ ResultF = std::fmod(LeftF, RightF);
+ break;
+ case LLVM_OPCODE_FNeg:
+ ResultF = -LeftF;
+ break;
+ default:
+ // For unknown operations, assume lower precision is not ok
+ return NAN;
+ }
+
+ return static_cast<double>(ResultF);
+}
+
+// Helper: Perform operation with fp16 precision (float → fp16)
+static float simulateFp16Op(int32_t Opcode, float Left, float Right) {
+ float LeftFp16 = simulateFp16Precision(Left);
+ float RightFp16 = simulateFp16Precision(Right);
+ float ResultFp16 = 0.0f;
+
+ switch (Opcode) {
+ case LLVM_OPCODE_FAdd:
+ ResultFp16 = LeftFp16 + RightFp16;
+ break;
+ case LLVM_OPCODE_FSub:
+ ResultFp16 = LeftFp16 - RightFp16;
+ break;
+ case LLVM_OPCODE_FMul:
+ ResultFp16 = LeftFp16 * RightFp16;
+ break;
+ case LLVM_OPCODE_FDiv:
+ ResultFp16 = LeftFp16 / RightFp16;
+ break;
+ case LLVM_OPCODE_FRem:
+ ResultFp16 = std::fmod(LeftFp16, RightFp16);
+ break;
+ case LLVM_OPCODE_FNeg:
+ ResultFp16 = -LeftFp16;
+ break;
+ default:
+ return NAN;
+ }
+
+ // Apply fp16 precision to result as well
+ return simulateFp16Precision(ResultFp16);
+}
+
+// Analyze a double-precision operation
+// Check if float precision would suffice, and if so, also check if fp16 would
+// work
+static void analyzeDoubleOperation(int32_t Opcode, double Left, double Right,
+ double Result, int32_t Id) {
+ std::lock_guard<std::mutex> Lock(getStatsMutex());
+
+ OperationStats &Stats = getOperationStats()[Id];
+ Stats.TotalCount++;
+
+ // Check for special values in inputs or result
+ if (isSpecialValue(Result) || isSpecialValue(Left) || isSpecialValue(Right)) {
+ Stats.InputSpecialValues++;
+ return;
+ }
+
+ // First, try double → float
+ double FloatResult = simulateLowerPrecisionOp(Opcode, Left, Right);
+
+ // Check if lowering to float created special values (overflow/underflow)
+ if (isSpecialValue(FloatResult)) {
+ Stats.DoubleNeedsDouble++; // Float doesn't work, need to keep double
+ Stats.DoubleLoweringSpecial++; // Record that overflow occurred
+ return;
+ }
+
+ // Compare double vs float results
+ double FloatError = computeRelativeError(Result, FloatResult);
+
+ if (FloatError >= DefaultRelativeErrorThreshold) {
+ // Float precision is not sufficient, need double
+ Stats.DoubleNeedsDouble++;
+ return;
+ }
+
+ // Float precision is acceptable. Now check if fp16 would also work.
+ // Convert operands to float, then simulate fp16 operation
+ float LeftF = static_cast<float>(Left);
+ float RightF = static_cast<float>(Right);
+ float ResultF = static_cast<float>(Result);
+
+ float Fp16Result = simulateFp16Op(Opcode, LeftF, RightF);
+
+ // Check if lowering to fp16 created special values
+ if (isSpecialValue(Fp16Result)) {
+ // fp16 causes overflow/underflow, but float works (double → float)
+ Stats.DoubleToFloat++; // Float is the lowest we can go
+ Stats.DoubleLoweringSpecial++; // Record that fp16 overflow occurred
+ return;
+ }
+
+ // Compare float vs fp16 results
+ double Fp16Error = computeRelativeError(static_cast<double>(ResultF),
+ static_cast<double>(Fp16Result));
+
+ if (Fp16Error < DefaultRelativeErrorThreshold) {
+ // fp16 precision is sufficient (double → fp16)
+ Stats.DoubleToFp16++;
+ } else {
+ // Need float precision but not double (double → float)
+ Stats.DoubleToFloat++;
+ }
+}
+
+// Analyze a float-precision operation (check if half precision would work)
+static void analyzeFloatOperation(int32_t Opcode, float Left, float Right,
+ float Result, int32_t Id) {
+ std::lock_guard<std::mutex> Lock(getStatsMutex());
+
+ OperationStats &Stats = getOperationStats()[Id];
+ Stats.TotalCount++;
+
+ // Check for special values in inputs or result
+ if (isSpecialValue(Result) || isSpecialValue(Left) || isSpecialValue(Right)) {
+ Stats.InputSpecialValues++;
+ return;
+ }
+
+ // Simulate operation with fp16 precision
+ float LowerPrecisionResult = simulateFp16Op(Opcode, Left, Right);
+
+ // Check if lowering precision created special values (overflow/underflow to
+ // inf)
+ if (isSpecialValue(LowerPrecisionResult)) {
+ Stats.FloatNeedsFloat++; // FP16 doesn't work, need to keep float
+ Stats.FloatLoweringSpecial++; // Record that overflow occurred
+ return;
+ }
+
+ // Compare results
+ double RelativeError = computeRelativeError(
+ static_cast<double>(Result), static_cast<double>(LowerPrecisionResult));
+
+ if (RelativeError < DefaultRelativeErrorThreshold) {
+ // fp16 precision is sufficient (float → fp16)
+ Stats.FloatToFp16++;
+ } else {
+ // Need to keep float precision (float → float)
+ Stats.FloatNeedsFloat++;
+ }
+}
+
+extern "C" {
+
+__attribute__((destructor(1000))) void __precision_analysis_finalize() {
+ std::printf("\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf(" Floating-Point Precision Analysis Results\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf(
+ "This analysis checks minimum precision needed (error < %.2f%%):\n",
+ DefaultRelativeErrorThreshold * 100);
+ std::printf(" - Double operations: Try Float, then FP16 if Float works\n");
+ std::printf(" - Float operations: Try FP16\n");
+ std::printf("================================================================"
+ "==========\n\n");
+
+ std::map<int32_t, OperationStats> &OperationStatsMap = getOperationStats();
+
+ if (OperationStatsMap.empty()) {
+ std::printf("No operations analyzed.\n");
+ std::printf("=============================================================="
+ "============\n");
+ return;
+ }
+
+ uint64_t TotalOps = 0;
+ uint64_t TotalDoubleToFp16 = 0;
+ uint64_t TotalDoubleToFloat = 0;
+ uint64_t TotalDoubleNeedsDouble = 0;
+ uint64_t TotalFloatToFp16 = 0;
+ uint64_t TotalFloatNeedsFloat = 0;
+ uint64_t TotalInputSpecial = 0;
+ uint64_t TotalDoubleLoweringSpecial = 0;
+ uint64_t TotalFloatLoweringSpecial = 0;
+
+ std::printf("Per-Operation Results:\n");
+ std::printf("%-5s %8s %9s %8s %6s %9s %6s %8s %7s %7s\n", "Op ID", "Total",
+ "D->FP16", "D->F32", "D->D", "F->FP16", "F->F", "InpNaN",
+ "D-OvFl", "F-OvFl");
+ std::printf(
+ "-------------------------------------------------------------------"
+ "-------------\n");
+
+ for (const auto &Entry : OperationStatsMap) {
+ int32_t OpId = Entry.first;
+ const OperationStats &Stats = Entry.second;
+
+ TotalOps += Stats.TotalCount;
+ TotalDoubleToFp16 += Stats.DoubleToFp16;
+ TotalDoubleToFloat += Stats.DoubleToFloat;
+ TotalDoubleNeedsDouble += Stats.DoubleNeedsDouble;
+ TotalFloatToFp16 += Stats.FloatToFp16;
+ TotalFloatNeedsFloat += Stats.FloatNeedsFloat;
+ TotalInputSpecial += Stats.InputSpecialValues;
+ TotalDoubleLoweringSpecial += Stats.DoubleLoweringSpecial;
+ TotalFloatLoweringSpecial += Stats.FloatLoweringSpecial;
+
+ std::printf("%-5d %8llu %9llu %8llu %6llu %9llu %6llu %8llu %7llu %7llu\n",
+ OpId, Stats.TotalCount, Stats.DoubleToFp16, Stats.DoubleToFloat,
+ Stats.DoubleNeedsDouble, Stats.FloatToFp16,
+ Stats.FloatNeedsFloat, Stats.InputSpecialValues,
+ Stats.DoubleLoweringSpecial, Stats.FloatLoweringSpecial);
+ }
+
+ std::printf(
+ "-------------------------------------------------------------------"
+ "-------------\n");
+ std::printf("%-5s %8llu %9llu %8llu %6llu %9llu %6llu %8llu %7llu %7llu\n",
+ "TOTAL", TotalOps, TotalDoubleToFp16, TotalDoubleToFloat,
+ TotalDoubleNeedsDouble, TotalFloatToFp16, TotalFloatNeedsFloat,
+ TotalInputSpecial, TotalDoubleLoweringSpecial,
+ TotalFloatLoweringSpecial);
+
+ std::printf("\n");
+ std::printf("Column Legend:\n");
+ std::printf(" D->FP16: Double ops that can use FP16 (16-bit)\n");
+ std::printf(
+ " D->F32: Double ops that can use Float (32-bit) but not FP16\n");
+ std::printf(" D->D: Double ops that require Double (64-bit)\n");
+ std::printf(" F->FP16: Float ops that can use FP16 (16-bit)\n");
+ std::printf(" F->F: Float ops that must stay Float (32-bit)\n");
+ std::printf(" InpNaN: Operations with NaN/Inf in inputs or result\n");
+ std::printf(" D-OvFl: Double ops where lowering caused overflow\n");
+ std::printf(" F-OvFl: Float ops where lowering to FP16 caused overflow\n");
+
+ uint64_t TotalDoubleOps =
+ TotalDoubleToFp16 + TotalDoubleToFloat + TotalDoubleNeedsDouble;
+ uint64_t TotalFloatOps = TotalFloatToFp16 + TotalFloatNeedsFloat;
+ uint64_t AnalyzedTotal = TotalDoubleOps + TotalFloatOps;
+
+ std::printf("\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf("Summary by Original Precision:\n");
+ std::printf("================================================================"
+ "==========\n");
+
+ if (TotalDoubleOps > 0) {
+ std::printf("\nDOUBLE Operations (started as 64-bit double):\n");
+ std::printf(" Total: %llu\n", TotalDoubleOps);
+ std::printf(" Can reduce to FP16 (16-bit): %llu (%.1f%%)\n",
+ TotalDoubleToFp16, 100.0 * TotalDoubleToFp16 / TotalDoubleOps);
+ std::printf(" Can reduce to Float (32-bit): %llu (%.1f%%)\n",
+ TotalDoubleToFloat,
+ 100.0 * TotalDoubleToFloat / TotalDoubleOps);
+ std::printf(" Must keep Double (64-bit): %llu (%.1f%%)\n",
+ TotalDoubleNeedsDouble,
+ 100.0 * TotalDoubleNeedsDouble / TotalDoubleOps);
+
+ uint64_t DoubleConvertible = TotalDoubleToFp16 + TotalDoubleToFloat;
+ std::printf(" → Total convertible to lower: %llu (%.1f%%)\n",
+ DoubleConvertible, 100.0 * DoubleConvertible / TotalDoubleOps);
+ }
+
+ if (TotalFloatOps > 0) {
+ std::printf("\nFLOAT Operations (started as 32-bit float):\n");
+ std::printf(" Total: %llu\n", TotalFloatOps);
+ std::printf(" Can reduce to FP16 (16-bit): %llu (%.1f%%)\n",
+ TotalFloatToFp16, 100.0 * TotalFloatToFp16 / TotalFloatOps);
+ std::printf(" Must keep Float (32-bit): %llu (%.1f%%)\n",
+ TotalFloatNeedsFloat,
+ 100.0 * TotalFloatNeedsFloat / TotalFloatOps);
+ }
+
+ std::printf("\nOVERALL Statistics:\n");
+ std::printf(" Total analyzed operations: %llu\n", AnalyzedTotal);
+ std::printf(" Operations with input NaN/Inf: %llu\n",
+ TotalInputSpecial);
+ std::printf(" Double ops causing overflow: %llu\n",
+ TotalDoubleLoweringSpecial);
+ std::printf(" Float ops causing overflow: %llu\n",
+ TotalFloatLoweringSpecial);
+
+ if (AnalyzedTotal > 0) {
+ uint64_t TotalToFp16 = TotalDoubleToFp16 + TotalFloatToFp16;
+ std::printf("\n ALL operations reducible to FP16: %llu (%.1f%%)\n",
+ TotalToFp16, 100.0 * TotalToFp16 / AnalyzedTotal);
+ }
+
+ // Provide recommendations based on results
+ std::printf("\n=============================================================="
+ "============\n");
+ std::printf("Recommendations:\n");
+ std::printf("================================================================"
+ "==========\n");
+
+ if (TotalDoubleOps > 0) {
+ // Include overflow operations in total for realistic assessment
+ uint64_t TotalDoubleWithOverflow =
+ TotalDoubleOps + TotalDoubleLoweringSpecial;
+ double DoubleToLower = 100.0 * (TotalDoubleToFp16 + TotalDoubleToFloat) /
+ TotalDoubleWithOverflow;
+ double OverflowPct =
+ 100.0 * TotalDoubleLoweringSpecial / TotalDoubleWithOverflow;
+
+ std::printf("\nFor DOUBLE operations:\n");
+ std::printf(" Analyzed: %llu (%.1f%% overflow, not convertible)\n",
+ TotalDoubleWithOverflow, OverflowPct);
+
+ if (DoubleToLower > 80.0) {
+ std::printf(
+ " ✓ %.1f%% can use lower precision - strong conversion candidate\n",
+ DoubleToLower);
+ if (TotalDoubleToFp16 > TotalDoubleToFloat) {
+ std::printf(" ✓ Many can go directly to FP16 - consider aggressive "
+ "downcasting\n");
+ } else {
+ std::printf(
+ " ✓ Most need Float - consider using f32 instead of f64\n");
+ }
+ if (TotalDoubleLoweringSpecial > 0 && OverflowPct > 5.0) {
+ std::printf(
+ " ⚠ %.1f%% overflow - may need value scaling/normalization\n",
+ OverflowPct);
+ }
+ } else if (DoubleToLower > 50.0) {
+ std::printf(
+ " ~ %.1f%% can use lower precision - mixed precision recommended\n",
+ DoubleToLower);
+ if (TotalDoubleLoweringSpecial > 0) {
+ std::printf(" ⚠ %.1f%% overflow - limits conversion opportunities\n",
+ OverflowPct);
+ }
+ } else {
+ std::printf(" ✗ Only %.1f%% can use lower precision - keep double\n",
+ DoubleToLower);
+ if (TotalDoubleLoweringSpecial > TotalDoubleNeedsDouble) {
+ std::printf(" ! Most failures due to overflow (%.1f%%) rather than "
+ "accuracy (%llu ops)\n",
+ OverflowPct, TotalDoubleNeedsDouble);
+ std::printf(" → Problem is value range, not precision\n");
+ }
+ }
+ }
+
+ if (TotalFloatOps > 0) {
+ // Include overflow operations in total for realistic assessment
+ uint64_t TotalFloatWithOverflow = TotalFloatOps + TotalFloatLoweringSpecial;
+ double FloatToFp16Pct = 100.0 * TotalFloatToFp16 / TotalFloatWithOverflow;
+ double FloatOverflowPct =
+ 100.0 * TotalFloatLoweringSpecial / TotalFloatWithOverflow;
+
+ std::printf("\nFor FLOAT operations:\n");
+ std::printf(" Analyzed: %llu (%.1f%% overflow to FP16)\n",
+ TotalFloatWithOverflow, FloatOverflowPct);
+
+ if (FloatToFp16Pct > 80.0) {
+ std::printf(
+ " ✓ %.1f%% can use FP16 - strong FP16 conversion candidate\n",
+ FloatToFp16Pct);
+ if (TotalFloatLoweringSpecial > 0 && FloatOverflowPct > 5.0) {
+ std::printf(" ⚠ %.1f%% overflow (values exceed FP16 range ±65504)\n",
+ FloatOverflowPct);
+ }
+ } else if (FloatToFp16Pct > 50.0) {
+ std::printf(" ~ %.1f%% can use FP16 - selective FP16 use recommended\n",
+ FloatToFp16Pct);
+ if (TotalFloatLoweringSpecial > 0) {
+ std::printf(" ⚠ %.1f%% overflow - limits FP16 opportunities\n",
+ FloatOverflowPct);
+ }
+ } else {
+ std::printf(" ✗ Only %.1f%% can use FP16 - keep float\n",
+ FloatToFp16Pct);
+ if (TotalFloatLoweringSpecial > TotalFloatNeedsFloat) {
+ std::printf(" ! Most failures due to FP16 overflow (%.1f%%) rather "
+ "than accuracy (%llu ops)\n",
+ FloatOverflowPct, TotalFloatNeedsFloat);
+ std::printf(" → Problem: Values exceed FP16 range (±65504)\n");
+ std::printf(" → Solution: Scale values or use Float\n");
+ }
+ }
+ }
+
+ std::printf("================================================================"
+ "==========\n");
+}
+
+void __precision_analysis_post_numeric(int32_t type_id, int32_t sub_type_id,
+ int32_t size, int32_t opcode,
+ int64_t left, int64_t right,
+ int64_t result, int64_t flags,
+ int32_t id) {
+ // Handle vector types by looking at sub_type_id
+ bool IsVector = false;
+ int32_t ElementTypeId = type_id;
+
+ switch (type_id) {
+ case FixedVectorTyID:
+ case ScalableVectorTyID:
+ IsVector = true;
+ ElementTypeId = sub_type_id;
+ break;
+ default:
+ break;
+ }
+
+ // For vector operations, we'd need to extract each element
+ // For now, skip vector operations (they're more complex)
+ if (IsVector) {
+ return;
+ }
+
+ // Analyze based on type
+ if (ElementTypeId == DoubleTyID) {
+ // Double precision operation - check if float would suffice
+ double LeftVal = *reinterpret_cast<double *>(&left);
+ double RightVal = *reinterpret_cast<double *>(&right);
+ double ResultVal = *reinterpret_cast<double *>(&result);
+
+ analyzeDoubleOperation(opcode, LeftVal, RightVal, ResultVal, id);
+ } else if (ElementTypeId == FloatTyID) {
+ // Float precision operation - could check if half would suffice
+ float LeftVal = *reinterpret_cast<float *>(&left);
+ float RightVal = *reinterpret_cast<float *>(&right);
+ float ResultVal = *reinterpret_cast<float *>(&result);
+
+ analyzeFloatOperation(opcode, LeftVal, RightVal, ResultVal, id);
+ }
+ // Skip other types (half, bfloat, extended precision)
+}
+
+void __precision_analysis_post_numeric_ind(int32_t type_id, int32_t sub_type_id,
+ int32_t size, int32_t opcode,
+ int64_t *left_ptr,
+ int64_t *right_ptr,
+ int64_t *result_ptr, int64_t flags,
+ int32_t id) {}
+
+} // extern "C"
diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
index cda35a017fed5..402fb644f3d0a 100644
--- a/compiler-rt/test/instrumentor-tools/CMakeLists.txt
+++ b/compiler-rt/test/instrumentor-tools/CMakeLists.txt
@@ -5,6 +5,7 @@ set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(INSTRUMENTOR_TESTSUITES)
set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter)
+list(APPEND INSTRUMENTOR_TEST_DEPS precision-analysis)
# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined
if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH)
diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py b/compiler-rt/test/instrumentor-tools/lit.cfg.py
index 24a4897efbbb5..48ea19f01de38 100644
--- a/compiler-rt/test/instrumentor-tools/lit.cfg.py
+++ b/compiler-rt/test/instrumentor-tools/lit.cfg.py
@@ -50,13 +50,16 @@ def make_lib_name(name):
return "clang_rt." + name
return "clang_rt." + name + "_osx"
+def make_lib_substitutions(name):
+ config.substitutions.append(("%" + name + "_lib", make_lib_name(name)))
# Add clang substitutions.
config.substitutions.append(("%clang ", build_invocation(clang_cflags)))
config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags)))
-flop_counter_lib = make_lib_name("flop_counter")
-config.substitutions.append(("%flop_counter_lib", flop_counter_lib))
+tools = ["flop_counter", "precision_analysis"]
+for tool in tools:
+ make_lib_substitutions(tool)
config.substitutions.append(("%lib_dir", config.compiler_rt_libdir))
diff --git a/compiler-rt/test/instrumentor-tools/precision_detailed.c b/compiler-rt/test/instrumentor-tools/precision_detailed.c
new file mode 100644
index 0000000000000..cde58ab6182c0
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_detailed.c
@@ -0,0 +1,76 @@
+// Test precision analysis with detailed per-operation tracking
+//
+// This test demonstrates how the precision analysis tracks each operation
+// separately by ID and shows detailed statistics.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: TOTAL
+// CHECK: Column Legend:
+// CHECK: D->FP16:{{.*}}Double ops that can use FP16
+// CHECK: Summary by Original Precision:
+
+#include <stdio.h>
+
+// Each of these operations will get a unique ID
+// We can track their precision requirements separately
+
+double operation_a(double x, double y) {
+ // Simple addition - should work well with float
+ return x + y;
+}
+
+double operation_b(double x, double y) {
+ // Multiplication - should work well with float for normal ranges
+ return x * y;
+}
+
+double operation_c(double x, double y) {
+ // Division - might need more precision depending on values
+ return x / y;
+}
+
+double operation_d(double x) {
+ // Subtraction of close values - might need double precision
+ double y = x + 1e-6;
+ return y - x;
+}
+
+double complex_operation(double a, double b) {
+ // Multiple operations in sequence
+ double temp1 = a * b; // Op 1
+ double temp2 = temp1 + a; // Op 2
+ double temp3 = temp2 / b; // Op 3
+ return temp3;
+}
+
+int main(void) {
+ double result = 0.0;
+
+ // Execute operations multiple times
+ // Each call site gets a unique operation ID
+ for (int i = 1; i < 20; i++) {
+ result += operation_a(i * 1.0, i * 2.0);
+ result += operation_b(i * 1.5, i * 0.5);
+ result += operation_c(i * 10.0, i * 2.0);
+ result += operation_d(i * 100.0);
+ result += complex_operation(i * 1.5, i * 2.5);
+ }
+
+ // Some operations with different value ranges
+ for (int i = 1; i < 10; i++) {
+ // Very small values - might need double precision
+ result += operation_a(i * 1e-5, i * 1e-5);
+ // Large values - might work with float
+ result += operation_b(i * 1e5, i * 1e-5);
+ }
+
+ if (result != 0.0) {
+ printf("Result: %.10f\n", result);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c b/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
new file mode 100644
index 0000000000000..e49853b13fa00
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_fp16_overflow.c
@@ -0,0 +1,92 @@
+// Test precision analysis with fp16 overflow/underflow detection
+//
+// This test specifically exercises float operations that would overflow or
+// underflow when converted to fp16, verifying that the runtime correctly
+// distinguishes between input special values and lowering-induced special values.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F{{.*}}InpNaN{{.*}}D-OvFl{{.*}}F-OvFl
+// CHECK: D-OvFl:{{.*}}Double ops where lowering caused overflow
+// CHECK: F-OvFl:{{.*}}Float ops where lowering to FP16 caused overflow
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations with values that work in fp16 range
+// fp16 max is about 65504
+float small_float_ops(float a, float b) {
+ // These should be fine in fp16
+ return a + b;
+}
+
+// Float operations that will overflow in fp16
+float large_float_ops(float a, float b) {
+ // fp16 max is ~65504, these will overflow to inf
+ return a * b;
+}
+
+// Float operations that will underflow in fp16
+// fp16 min normal is about 6.1e-5
+float tiny_float_ops(float a, float b) {
+ // These will underflow to zero in fp16
+ return a * b;
+}
+
+// Operations with actual NaN/Inf inputs
+float special_input_ops(float a, float b) {
+ // These have special values in inputs
+ return a / b;
+}
+
+// Double operations with large values
+double large_double_ops(double a, double b) {
+ // float max is about 3.4e38, these will overflow
+ return a * b;
+}
+
+int main(void) {
+ float result_f = 0.0f;
+ double result_d = 0.0;
+
+ // Small float operations (should work in fp16)
+ for (int i = 1; i < 20; i++) {
+ result_f += small_float_ops(i * 1.5f, i * 2.5f);
+ }
+
+ // Large float operations (will overflow to inf in fp16)
+ for (int i = 1; i < 15; i++) {
+ float big = 10000.0f * i;
+ result_f += large_float_ops(big, big); // Result > 65504
+ }
+
+ // Tiny float operations (will underflow to 0 in fp16)
+ for (int i = 1; i < 15; i++) {
+ float tiny = 1e-4f / i;
+ result_f += tiny_float_ops(tiny, tiny); // Result < 6e-5
+ }
+
+ // Operations with NaN/Inf inputs
+ result_f += special_input_ops(1.0f, 0.0f); // Inf
+ result_f += special_input_ops(0.0f, 0.0f); // NaN
+
+ // Double operations that overflow in float
+ for (int i = 1; i < 10; i++) {
+ double huge = 1e38 * i;
+ result_d += large_double_ops(huge, huge); // Result > float_max
+ }
+
+ // Some normal double operations
+ for (int i = 1; i < 30; i++) {
+ result_d += i * 1.5 + i * 2.5;
+ }
+
+ if (!isnan(result_f) && !isnan(result_d)) {
+ printf("Computation complete\n");
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/precision_mixed.c b/compiler-rt/test/instrumentor-tools/precision_mixed.c
new file mode 100644
index 0000000000000..6475ed9cfed3c
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/precision_mixed.c
@@ -0,0 +1,66 @@
+// Test precision analysis with mixed float and double operations
+//
+// This test uses both float and double operations to verify that the
+// precision analysis handles both types correctly.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations (already using lower precision)
+float compute_float_distance(float x1, float y1, float x2, float y2) {
+ float dx = x2 - x1;
+ float dy = y2 - y1;
+ return sqrtf(dx * dx + dy * dy);
+}
+
+// Double operations (analyze if float would suffice)
+double compute_double_distance(double x1, double y1, double x2, double y2) {
+ double dx = x2 - x1;
+ double dy = y2 - y1;
+ return sqrt(dx * dx + dy * dy);
+}
+
+// Mixed precision computation
+double mixed_computation(float a, double b) {
+ // Implicit conversion from float to double
+ double a_double = a;
+ return a_double * b + a_double / b;
+}
+
+int main(void) {
+ float float_result = 0.0f;
+ double double_result = 0.0;
+
+ // Float operations
+ for (int i = 0; i < 50; i++) {
+ float_result += compute_float_distance(i * 0.1f, i * 0.2f, (i + 1) * 0.1f,
+ (i + 1) * 0.2f);
+ }
+
+ // Double operations with values that should work well in float
+ for (int i = 0; i < 50; i++) {
+ double_result +=
+ compute_double_distance(i * 0.1, i * 0.2, (i + 1) * 0.1, (i + 1) * 0.2);
+ }
+
+ // Mixed precision
+ for (int i = 1; i < 30; i++) {
+ double_result += mixed_computation(i * 1.5f, i * 2.5);
+ }
+
+ // Prevent optimization
+ if (float_result > 0.0f && double_result > 0.0) {
+ printf("Float result: %f, Double result: %f\n", float_result,
+ double_result);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-tools/simple_precision.c b/compiler-rt/test/instrumentor-tools/simple_precision.c
new file mode 100644
index 0000000000000..5c03ccaedffee
--- /dev/null
+++ b/compiler-rt/test/instrumentor-tools/simple_precision.c
@@ -0,0 +1,56 @@
+// Test basic precision analysis functionality
+//
+// This test verifies that the precision analysis runtime correctly identifies
+// operations that could use lower precision with acceptable accuracy.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/precision-analysis/precision_analysis_config.json %s -L%lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Simple operations with large enough values that float precision is sufficient
+double simple_add(double a, double b) { return a + b; }
+
+double simple_mul(double a, double b) { return a * b; }
+
+double simple_div(double a, double b) { return a / b; }
+
+// Function that uses values where precision matters more
+double precise_computation(double x) {
+ // These operations on small differences might need double precision
+ double y = x + 1e-8;
+ double z = y - x;
+ return z * 1e8;
+}
+
+int main(void) {
+ double result = 0.0;
+
+ // Simple operations with "normal" range values
+ // These should generally work fine with float precision
+ for (int i = 0; i < 100; i++) {
+ result += simple_add(i * 1.5, i * 2.5);
+ result += simple_mul(i * 0.5, i * 0.5);
+ if (i > 0) {
+ result += simple_div(i * 10.0, i * 2.0);
+ }
+ }
+
+ // Operations that might require more precision
+ for (int i = 1; i < 50; i++) {
+ result += precise_computation(i * 1.0);
+ }
+
+ // Prevent optimization from removing the computations
+ if (result > 0.0) {
+ printf("Computation complete: %f\n", result);
+ }
+
+ return 0;
+}
More information about the llvm-branch-commits
mailing list