[llvm-branch-commits] [compiler-rt] [Instrumentor] Add runtime examples: [2/N] A FP precision analysis (PR #205229)
Johannes Doerfert via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jun 22 17:57:02 PDT 2026
https://github.com/jdoerfert created https://github.com/llvm/llvm-project/pull/205229
Second example:
Check all floating point operations and track if they could be done at lower precision.
Partially developped by Claude (AI), tested and verified by me.
>From 0dddce7d4464f4ffa664f0ea1ab8eb91baf8eea5 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert.llvm at gmail.com>
Date: Mon, 22 Jun 2026 17:52:58 -0700
Subject: [PATCH] [Instrumentor] Add runtime examples: [2/N] A FP precision
analysis
Second example:
Check all floating point operations and track if they could be done at
lower precision.
Partially developped by Claude (AI), tested and verified by me.
---
.../lib/instrumentor-examples/CMakeLists.txt | 1 +
.../lib/instrumentor-examples/README.md | 26 +
.../precision-analysis/CMakeLists.txt | 68 +++
.../precision_analysis_config.json | 43 ++
.../precision_analysis_runtime.cpp | 569 ++++++++++++++++++
.../test/instrumentor-examples/CMakeLists.txt | 1 +
.../test/instrumentor-examples/lit.cfg.py | 6 +
.../precision_detailed.c | 76 +++
.../precision_fp16_overflow.c | 91 +++
.../instrumentor-examples/precision_mixed.c | 66 ++
.../instrumentor-examples/simple_precision.c | 56 ++
11 files changed, 1003 insertions(+)
create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
create mode 100644 compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
create mode 100644 compiler-rt/test/instrumentor-examples/precision_detailed.c
create mode 100644 compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
create mode 100644 compiler-rt/test/instrumentor-examples/precision_mixed.c
create mode 100644 compiler-rt/test/instrumentor-examples/simple_precision.c
diff --git a/compiler-rt/lib/instrumentor-examples/CMakeLists.txt b/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
index b7f9f5cb627ff..693f90b29b1ae 100644
--- a/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
+++ b/compiler-rt/lib/instrumentor-examples/CMakeLists.txt
@@ -9,3 +9,4 @@ add_compiler_rt_component(instrumentor-examples)
# Add subdirectories for specific examples
add_subdirectory(flop-counter)
+add_subdirectory(precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-examples/README.md b/compiler-rt/lib/instrumentor-examples/README.md
index 4dda5b7033efb..3c8c1234d63cd 100644
--- a/compiler-rt/lib/instrumentor-examples/README.md
+++ b/compiler-rt/lib/instrumentor-examples/README.md
@@ -37,6 +37,32 @@ clang -O2 -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=con
# At program exit, FLOP statistics will be printed
```
+### Precision Analysis (`precision-analysis/`)
+
+Analyzes the minimum floating-point precision needed for each operation while
+maintaining acceptable accuracy.
+
+**Features:**
+- Per-operation precision requirement analysis
+- Multi-level precision checking:
+ - **Double operations**: Checks Float first, then FP16 if Float works
+ - **Float operations**: Checks FP16
+- Tracks relative error with configurable threshold (default: 0.1%)
+- Distinguishes input special values from lowering-induced overflow/underflow
+- Reports which operations can use FP16, which need Float, and which need Double
+- IEEE 754 half-precision (fp16) software emulation
+- Provides detailed recommendations for precision optimization
+
+**Usage:**
+```bash
+# Compile your program with instrumentor
+clang -O2 -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=precision_analysis_config.json -lclang_rt.precision_analysis -o your_program
+
+# Run it
+./your_program
+# At program exit, precision analysis results will be printed
+```
+
## Building
The instrumentor examples are built as part of the compiler-rt build:
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt b/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
new file mode 100644
index 0000000000000..76560699d2ed2
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/CMakeLists.txt
@@ -0,0 +1,68 @@
+# CMakeLists.txt for Precision Analysis Example
+#
+# This example demonstrates analyzing whether floating-point operations
+# could use lower precision while maintaining acceptable accuracy using
+# the Instrumentor pass.
+
+add_compiler_rt_component(precision-analysis)
+
+set(PRECISION_ANALYSIS_SOURCES
+ precision_analysis_runtime.cpp
+ )
+
+set(PRECISION_ANALYSIS_HEADERS
+ precision_analysis_runtime.h
+ )
+
+# Include paths for instrumentor runtime headers
+# The instrumentor runtime headers are in llvm/utils
+include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+# Common flags
+set(PRECISION_ANALYSIS_CFLAGS
+ ${COMPILER_RT_COMMON_CFLAGS}
+ -std=c++17
+ )
+
+# Determine supported architectures
+if(APPLE)
+ # On Darwin, use the darwin OSX architectures
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH arm64)
+ if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "")
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES})
+ endif()
+ if(DARWIN_osx_ARCHS)
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${DARWIN_osx_ARCHS})
+ endif()
+else()
+ # For non-Apple platforms, use the default target architecture
+ set(PRECISION_ANALYSIS_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH})
+endif()
+
+message(STATUS "Precision Analysis supported architectures: ${PRECISION_ANALYSIS_SUPPORTED_ARCH}")
+
+# Build the static runtime library for Apple platforms
+if(APPLE)
+ add_compiler_rt_runtime(clang_rt.precision_analysis
+ STATIC
+ OS osx
+ ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+ CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+ SOURCES ${PRECISION_ANALYSIS_SOURCES}
+ ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+ PARENT_TARGET precision-analysis)
+else()
+ add_compiler_rt_runtime(clang_rt.precision_analysis
+ STATIC
+ ARCHS ${PRECISION_ANALYSIS_SUPPORTED_ARCH}
+ CFLAGS ${PRECISION_ANALYSIS_CFLAGS}
+ SOURCES ${PRECISION_ANALYSIS_SOURCES}
+ ADDITIONAL_HEADERS ${PRECISION_ANALYSIS_HEADERS}
+ PARENT_TARGET precision-analysis)
+endif()
+
+# Install the configuration file as a resource
+install(FILES precision_analysis_config.json
+ DESTINATION share/llvm/instrumentor-configs
+ COMPONENT precision-analysis)
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
new file mode 100644
index 0000000000000..16a9298c98bee
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_config.json
@@ -0,0 +1,43 @@
+{
+ "configuration": {
+ "runtime_prefix": "__precision_analysis_",
+ "runtime_prefix.description": "The runtime API prefix.",
+ "runtime_stubs_file": "rt",
+ "target_regex": "",
+ "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.",
+ "function_regex": "",
+ "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.",
+ "demangle_function_names": true,
+ "demangle_function_names.description": "Demangle functions names passed to the runtime.",
+ "host_enabled": true,
+ "host_enabled.description": "Instrument non-GPU targets",
+ "gpu_enabled": false,
+ "gpu_enabled.description": "Instrument GPU targets"
+ },
+ "instruction_post": {
+ "numeric": {
+ "enabled": true,
+ "filter": "(type_id == 2 || type_id == 3) || ((type_id == 17 || type_id == 18) && (sub_type_id == 2 || sub_type_id == 3))",
+ "filter.description": "Only instrument float (type_id 2) and double (type_id 3) operations, or vectors of those types.",
+ "type_id": true,
+ "type_id.description": "The operation's type id.",
+ "sub_type_id": true,
+ "sub_type_id.description": "The operation's sub-type id (for vectors).",
+ "size": true,
+ "size.description": "The operation's type size.",
+ "opcode": true,
+ "opcode.description": "The instruction opcode.",
+ "left": true,
+ "left.description": "The operation's left operand.",
+ "right": true,
+ "right.description": "The operation's right operand. This value is poison for unary operations.",
+ "result": true,
+ "result.replace": false,
+ "result.description": "Result of the operation.",
+ "flags": true,
+ "flags.description": "A bitmask value signaling which instruction flags are present.",
+ "id": true,
+ "id.description": "A unique ID associated with the given instrumentor call"
+ }
+ }
+}
diff --git a/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
new file mode 100644
index 0000000000000..7057eb9229ec0
--- /dev/null
+++ b/compiler-rt/lib/instrumentor-examples/precision-analysis/precision_analysis_runtime.cpp
@@ -0,0 +1,569 @@
+//===-- precision_analysis_runtime.cpp - Precision Analysis Runtime ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements runtime for analyzing whether floating-point operations
+// could be performed with lower precision while maintaining acceptable
+// accuracy. It instruments FP operations, simulates them with lower precision,
+// and compares results to determine if precision reduction is viable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../instrumentor_runtime.h"
+
+#include <atomic>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <mutex>
+
+// Configuration: relative error threshold for "acceptable" lower precision
+// A result is considered acceptable if |result_lower - result_higher| /
+// |result_higher| < threshold
+static constexpr double DEFAULT_RELATIVE_ERROR_THRESHOLD = 1e-3; // 0.1%
+
+// Per-operation statistics - tracks separately by original precision
+struct OperationStats {
+ uint64_t total_count; // Total number of times this operation executed
+
+ // Double-precision operations (started as double)
+ uint64_t double_to_fp16; // Double ops that can use fp16
+ uint64_t double_to_float; // Double ops that can use float (but not fp16)
+ uint64_t double_needs_double; // Double ops that need double precision
+
+ // Float-precision operations (started as float)
+ uint64_t float_to_fp16; // Float ops that can use fp16
+ uint64_t float_needs_float; // Float ops that need float precision
+
+ // Special values
+ uint64_t
+ input_special_values; // Times when inputs had special values (NaN, Inf)
+ uint64_t lowering_special_values; // Times when lowering created special
+ // values (overflow/underflow)
+};
+
+// Helper functions to get statistics map and mutex
+// Using function-local statics ensures proper initialization order
+// and avoids static destruction order fiasco.
+//
+// IMPORTANT: We use heap allocation (new) without delete to intentionally
+// "leak" these objects. This ensures they remain valid when the destructor
+// function runs at program exit, even if it runs after static destructors.
+// For a profiling tool that runs once and exits, this is acceptable.
+static std::map<int32_t, OperationStats> &get_operation_stats() {
+ static std::map<int32_t, OperationStats> *stats =
+ new std::map<int32_t, OperationStats>();
+ return *stats;
+}
+
+static std::mutex &get_stats_mutex() {
+ static std::mutex *mutex = new std::mutex();
+ return *mutex;
+}
+
+enum {
+ LLVM_OPCODE_FAdd = 15,
+ LLVM_OPCODE_FSub = 17,
+ LLVM_OPCODE_FMul = 19,
+ LLVM_OPCODE_FDiv = 22,
+ LLVM_OPCODE_FRem = 25,
+ LLVM_OPCODE_FNeg = 13,
+};
+
+// Helper: Convert float to fp16 (IEEE 754 half precision) and back
+// fp16 format: 1 sign bit, 5 exponent bits, 10 mantissa bits
+static inline float simulate_fp16_precision(float value) {
+ // Handle special cases
+ if (std::isnan(value) || std::isinf(value)) {
+ return value;
+ }
+
+ uint32_t bits;
+ std::memcpy(&bits, &value, sizeof(float));
+
+ uint32_t sign = bits & 0x80000000u;
+ int32_t exponent = ((bits >> 23) & 0xFF) - 127;
+ uint32_t mantissa = bits & 0x7FFFFFu;
+
+ // fp16 range: exponent -14 to +15 (biased 1 to 30)
+ // Underflow to zero
+ if (exponent < -14) {
+ return sign ? -0.0f : 0.0f;
+ }
+
+ // Overflow to infinity
+ if (exponent > 15) {
+ return sign ? -INFINITY : INFINITY;
+ }
+
+ // Round mantissa from 23 bits to 10 bits
+ uint32_t fp16_mantissa = (mantissa + 0x1000u) >> 13;
+ if (fp16_mantissa > 0x3FF) {
+ // Rounding caused overflow
+ fp16_mantissa = 0;
+ exponent++;
+ if (exponent > 15) {
+ return sign ? -INFINITY : INFINITY;
+ }
+ }
+
+ // Reconstruct float with reduced precision
+ uint32_t fp16_exponent = (exponent + 127) & 0xFF;
+ uint32_t result_bits = sign | (fp16_exponent << 23) | (fp16_mantissa << 13);
+
+ float result;
+ std::memcpy(&result, &result_bits, sizeof(float));
+ return result;
+}
+
+// Helper: Check if value is special (NaN or Inf)
+static inline bool is_special_value(double value) {
+ return std::isnan(value) || std::isinf(value);
+}
+
+static inline bool is_special_value(float value) {
+ return std::isnan(value) || std::isinf(value);
+}
+
+// Helper: Compute relative error
+static inline double compute_relative_error(double reference, double test) {
+ if (reference == 0.0) {
+ return (test == 0.0) ? 0.0 : INFINITY;
+ }
+ return std::fabs((test - reference) / reference);
+}
+
+// Helper: Perform operation with lower precision (double → float)
+static double simulate_lower_precision_op(int32_t opcode, double left,
+ double right) {
+ float left_f = static_cast<float>(left);
+ float right_f = static_cast<float>(right);
+ float result_f = 0.0f;
+
+ switch (opcode) {
+ case LLVM_OPCODE_FAdd:
+ result_f = left_f + right_f;
+ break;
+ case LLVM_OPCODE_FSub:
+ result_f = left_f - right_f;
+ break;
+ case LLVM_OPCODE_FMul:
+ result_f = left_f * right_f;
+ break;
+ case LLVM_OPCODE_FDiv:
+ result_f = left_f / right_f;
+ break;
+ case LLVM_OPCODE_FRem:
+ result_f = std::fmod(left_f, right_f);
+ break;
+ case LLVM_OPCODE_FNeg:
+ result_f = -left_f;
+ break;
+ default:
+ // For unknown operations, assume lower precision is not ok
+ return NAN;
+ }
+
+ return static_cast<double>(result_f);
+}
+
+// Helper: Perform operation with fp16 precision (float → fp16)
+static float simulate_fp16_op(int32_t opcode, float left, float right) {
+ float left_fp16 = simulate_fp16_precision(left);
+ float right_fp16 = simulate_fp16_precision(right);
+ float result_fp16 = 0.0f;
+
+ switch (opcode) {
+ case LLVM_OPCODE_FAdd:
+ result_fp16 = left_fp16 + right_fp16;
+ break;
+ case LLVM_OPCODE_FSub:
+ result_fp16 = left_fp16 - right_fp16;
+ break;
+ case LLVM_OPCODE_FMul:
+ result_fp16 = left_fp16 * right_fp16;
+ break;
+ case LLVM_OPCODE_FDiv:
+ result_fp16 = left_fp16 / right_fp16;
+ break;
+ case LLVM_OPCODE_FRem:
+ result_fp16 = std::fmod(left_fp16, right_fp16);
+ break;
+ case LLVM_OPCODE_FNeg:
+ result_fp16 = -left_fp16;
+ break;
+ default:
+ return NAN;
+ }
+
+ // Apply fp16 precision to result as well
+ return simulate_fp16_precision(result_fp16);
+}
+
+// Analyze a double-precision operation
+// Check if float precision would suffice, and if so, also check if fp16 would
+// work
+static void analyze_double_operation(int32_t opcode, double left, double right,
+ double result, int32_t id) {
+ std::lock_guard<std::mutex> lock(get_stats_mutex());
+
+ OperationStats &stats = get_operation_stats()[id];
+ stats.total_count++;
+
+ // Check for special values in inputs or result
+ if (is_special_value(result) || is_special_value(left) ||
+ is_special_value(right)) {
+ stats.input_special_values++;
+ return;
+ }
+
+ // First, try double → float
+ double float_result = simulate_lower_precision_op(opcode, left, right);
+ printf("%i :: %lf = %lf <> %lf\n", opcode, result, left, right);
+ printf("%i :: %lf = %lf <> %lf\n", opcode, float_result, (double)(float)left,
+ (double)(float)right);
+
+ // Check if lowering to float created special values (overflow/underflow)
+ if (is_special_value(float_result)) {
+ stats.lowering_special_values++;
+ return;
+ }
+
+ // Compare double vs float results
+ double float_error = compute_relative_error(result, float_result);
+
+ if (float_error >= DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+ // Float precision is not sufficient, need double
+ stats.double_needs_double++;
+ return;
+ }
+
+ // Float precision is acceptable. Now check if fp16 would also work.
+ // Convert operands to float, then simulate fp16 operation
+ float left_f = static_cast<float>(left);
+ float right_f = static_cast<float>(right);
+ float result_f = static_cast<float>(result);
+
+ float fp16_result = simulate_fp16_op(opcode, left_f, right_f);
+
+ // Check if lowering to fp16 created special values
+ if (is_special_value(fp16_result)) {
+ // fp16 causes overflow/underflow, but float works (double → float)
+ stats.double_to_float++;
+ return;
+ }
+
+ // Compare float vs fp16 results
+ double fp16_error = compute_relative_error(static_cast<double>(result_f),
+ static_cast<double>(fp16_result));
+
+ if (fp16_error < DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+ // fp16 precision is sufficient (double → fp16)
+ stats.double_to_fp16++;
+ } else {
+ // Need float precision but not double (double → float)
+ stats.double_to_float++;
+ }
+}
+
+// Analyze a float-precision operation (check if half precision would work)
+static void analyze_float_operation(int32_t opcode, float left, float right,
+ float result, int32_t id) {
+ std::lock_guard<std::mutex> lock(get_stats_mutex());
+
+ OperationStats &stats = get_operation_stats()[id];
+ stats.total_count++;
+
+ // Check for special values in inputs or result
+ if (is_special_value(result) || is_special_value(left) ||
+ is_special_value(right)) {
+ stats.input_special_values++;
+ return;
+ }
+
+ // Simulate operation with fp16 precision
+ float lower_precision_result = simulate_fp16_op(opcode, left, right);
+
+ // Check if lowering precision created special values (overflow/underflow to
+ // inf)
+ if (is_special_value(lower_precision_result)) {
+ stats.lowering_special_values++;
+ return;
+ }
+
+ // Compare results
+ double relative_error = compute_relative_error(
+ static_cast<double>(result), static_cast<double>(lower_precision_result));
+
+ if (relative_error < DEFAULT_RELATIVE_ERROR_THRESHOLD) {
+ // fp16 precision is sufficient (float → fp16)
+ stats.float_to_fp16++;
+ } else {
+ // Need to keep float precision (float → float)
+ stats.float_needs_float++;
+ }
+}
+
+extern "C" {
+
+__attribute__((destructor(1000))) void
+__precision_analysis_post_module(char *module_name, char *target_triple,
+ int32_t id) {
+ std::printf("\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf(" Floating-Point Precision Analysis Results\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf(
+ "This analysis checks minimum precision needed (error < %.2f%%):\n",
+ DEFAULT_RELATIVE_ERROR_THRESHOLD * 100);
+ std::printf(" - Double operations: Try Float, then FP16 if Float works\n");
+ std::printf(" - Float operations: Try FP16\n");
+ std::printf("================================================================"
+ "==========\n\n");
+
+ std::map<int32_t, OperationStats> &operation_stats = get_operation_stats();
+
+ if (operation_stats.empty()) {
+ std::printf("No operations analyzed.\n");
+ std::printf("=============================================================="
+ "============\n");
+ return;
+ }
+
+ uint64_t total_ops = 0;
+ uint64_t total_double_to_fp16 = 0;
+ uint64_t total_double_to_float = 0;
+ uint64_t total_double_needs_double = 0;
+ uint64_t total_float_to_fp16 = 0;
+ uint64_t total_float_needs_float = 0;
+ uint64_t total_input_special = 0;
+ uint64_t total_lowering_special = 0;
+
+ std::printf("Per-Operation Results:\n");
+ std::printf("%-5s %8s %9s %8s %6s %9s %6s %8s %8s\n", "Op ID", "Total",
+ "D->FP16", "D->F32", "D->D", "F->FP16", "F->F", "InpNaN",
+ "LowNaN");
+ std::printf("----------------------------------------------------------------"
+ "-------------\n");
+
+ for (const auto &entry : operation_stats) {
+ int32_t op_id = entry.first;
+ const OperationStats &stats = entry.second;
+
+ total_ops += stats.total_count;
+ total_double_to_fp16 += stats.double_to_fp16;
+ total_double_to_float += stats.double_to_float;
+ total_double_needs_double += stats.double_needs_double;
+ total_float_to_fp16 += stats.float_to_fp16;
+ total_float_needs_float += stats.float_needs_float;
+ total_input_special += stats.input_special_values;
+ total_lowering_special += stats.lowering_special_values;
+
+ std::printf("%-5d %8llu %9llu %8llu %6llu %9llu %6llu %8llu %8llu\n", op_id,
+ stats.total_count, stats.double_to_fp16, stats.double_to_float,
+ stats.double_needs_double, stats.float_to_fp16,
+ stats.float_needs_float, stats.input_special_values,
+ stats.lowering_special_values);
+ }
+
+ std::printf("----------------------------------------------------------------"
+ "-------------\n");
+ std::printf("%-5s %8llu %9llu %8llu %6llu %9llu %6llu %8llu %8llu\n", "TOTAL",
+ total_ops, total_double_to_fp16, total_double_to_float,
+ total_double_needs_double, total_float_to_fp16,
+ total_float_needs_float, total_input_special,
+ total_lowering_special);
+
+ std::printf("\n");
+ std::printf("Column Legend:\n");
+ std::printf(" D->FP16: Double ops that can use FP16 (16-bit)\n");
+ std::printf(
+ " D->F32: Double ops that can use Float (32-bit) but not FP16\n");
+ std::printf(" D->D: Double ops that require Double (64-bit)\n");
+ std::printf(" F->FP16: Float ops that can use FP16 (16-bit)\n");
+ std::printf(" F->F: Float ops that must stay Float (32-bit)\n");
+ std::printf(" InpNaN: Operations with NaN/Inf in inputs or result\n");
+ std::printf(
+ " LowNaN: Operations where lowering caused overflow/underflow\n");
+
+ uint64_t total_double_ops =
+ total_double_to_fp16 + total_double_to_float + total_double_needs_double;
+ uint64_t total_float_ops = total_float_to_fp16 + total_float_needs_float;
+ uint64_t analyzed_total = total_double_ops + total_float_ops;
+
+ std::printf("\n");
+ std::printf("================================================================"
+ "==========\n");
+ std::printf("Summary by Original Precision:\n");
+ std::printf("================================================================"
+ "==========\n");
+
+ if (total_double_ops > 0) {
+ std::printf("\nDOUBLE Operations (started as 64-bit double):\n");
+ std::printf(" Total: %llu\n",
+ total_double_ops);
+ std::printf(" Can reduce to FP16 (16-bit): %llu (%.1f%%)\n",
+ total_double_to_fp16,
+ 100.0 * total_double_to_fp16 / total_double_ops);
+ std::printf(" Can reduce to Float (32-bit): %llu (%.1f%%)\n",
+ total_double_to_float,
+ 100.0 * total_double_to_float / total_double_ops);
+ std::printf(" Must keep Double (64-bit): %llu (%.1f%%)\n",
+ total_double_needs_double,
+ 100.0 * total_double_needs_double / total_double_ops);
+
+ uint64_t double_convertible = total_double_to_fp16 + total_double_to_float;
+ std::printf(" → Total convertible to lower: %llu (%.1f%%)\n",
+ double_convertible,
+ 100.0 * double_convertible / total_double_ops);
+ }
+
+ if (total_float_ops > 0) {
+ std::printf("\nFLOAT Operations (started as 32-bit float):\n");
+ std::printf(" Total: %llu\n",
+ total_float_ops);
+ std::printf(" Can reduce to FP16 (16-bit): %llu (%.1f%%)\n",
+ total_float_to_fp16,
+ 100.0 * total_float_to_fp16 / total_float_ops);
+ std::printf(" Must keep Float (32-bit): %llu (%.1f%%)\n",
+ total_float_needs_float,
+ 100.0 * total_float_needs_float / total_float_ops);
+ }
+
+ std::printf("\nOVERALL Statistics:\n");
+ std::printf(" Total analyzed operations: %llu\n", analyzed_total);
+ std::printf(" Operations with input NaN/Inf: %llu\n",
+ total_input_special);
+ std::printf(" Operations causing overflow: %llu\n",
+ total_lowering_special);
+
+ if (analyzed_total > 0) {
+ uint64_t total_to_fp16 = total_double_to_fp16 + total_float_to_fp16;
+ std::printf("\n ALL operations reducible to FP16: %llu (%.1f%%)\n",
+ total_to_fp16, 100.0 * total_to_fp16 / analyzed_total);
+ }
+
+ // Provide recommendations based on results
+ std::printf("\n=============================================================="
+ "============\n");
+ std::printf("Recommendations:\n");
+ std::printf("================================================================"
+ "==========\n");
+
+ if (total_double_ops > 0) {
+ double double_to_lower = 100.0 *
+ (total_double_to_fp16 + total_double_to_float) /
+ total_double_ops;
+ std::printf("\nFor DOUBLE operations:\n");
+ if (double_to_lower > 80.0) {
+ std::printf(
+ " ✓ %.1f%% can use lower precision - strong conversion candidate\n",
+ double_to_lower);
+ if (total_double_to_fp16 > total_double_to_float) {
+ std::printf(" ✓ Many can go directly to FP16 - consider aggressive "
+ "downcasting\n");
+ } else {
+ std::printf(
+ " ✓ Most need Float - consider using f32 instead of f64\n");
+ }
+ } else if (double_to_lower > 50.0) {
+ std::printf(
+ " ~ %.1f%% can use lower precision - mixed precision recommended\n",
+ double_to_lower);
+ } else {
+ std::printf(" ✗ Only %.1f%% can use lower precision - keep double\n",
+ double_to_lower);
+ }
+ }
+
+ if (total_float_ops > 0) {
+ double float_to_fp16_pct = 100.0 * total_float_to_fp16 / total_float_ops;
+ std::printf("\nFor FLOAT operations:\n");
+ if (float_to_fp16_pct > 80.0) {
+ std::printf(
+ " ✓ %.1f%% can use FP16 - strong FP16 conversion candidate\n",
+ float_to_fp16_pct);
+ } else if (float_to_fp16_pct > 50.0) {
+ std::printf(" ~ %.1f%% can use FP16 - selective FP16 use recommended\n",
+ float_to_fp16_pct);
+ } else {
+ std::printf(" ✗ Only %.1f%% can use FP16 - keep float\n",
+ float_to_fp16_pct);
+ }
+ }
+
+ if (total_lowering_special > 0) {
+ std::printf("\n⚠ Warning: %llu operations caused overflow/underflow with "
+ "lower precision.\n",
+ total_lowering_special);
+ std::printf(
+ " These operations have values outside the lower precision range.\n");
+ }
+
+ std::printf("================================================================"
+ "==========\n");
+}
+
+void __precision_analysis_post_numeric(int32_t type_id, int32_t sub_type_id,
+ int32_t size, int32_t opcode,
+ int64_t left, int64_t right,
+ int64_t result, int64_t flags,
+ int32_t id) {
+ // Handle vector types by looking at sub_type_id
+ bool is_vector = false;
+ int32_t element_type_id = type_id;
+
+ switch (type_id) {
+ case FixedVectorTyID:
+ case ScalableVectorTyID:
+ is_vector = true;
+ element_type_id = sub_type_id;
+ break;
+ default:
+ break;
+ }
+
+ // For vector operations, we'd need to extract each element
+ // For now, skip vector operations (they're more complex)
+ if (is_vector) {
+ return;
+ }
+
+ printf("ETI %i:: %i : %i\n", element_type_id, DoubleTyID, FloatTyID);
+ // Analyze based on type
+ if (element_type_id == DoubleTyID) {
+ // Double precision operation - check if float would suffice
+ double left_val = *reinterpret_cast<double *>(&left);
+ double right_val = *reinterpret_cast<double *>(&right);
+ double result_val = *reinterpret_cast<double *>(&result);
+
+ analyze_double_operation(opcode, left_val, right_val, result_val, id);
+ } else if (element_type_id == FloatTyID) {
+ // Float precision operation - could check if half would suffice
+ float left_val = *reinterpret_cast<float *>(&left);
+ float right_val = *reinterpret_cast<float *>(&right);
+ float result_val = *reinterpret_cast<float *>(&result);
+
+ analyze_float_operation(opcode, left_val, right_val, result_val, id);
+ }
+ // Skip other types (half, bfloat, extended precision)
+}
+
+void __precision_analysis_post_numeric_ind(int32_t type_id, int32_t sub_type_id,
+ int32_t size, int32_t opcode,
+ int64_t *left_ptr,
+ int64_t *right_ptr,
+ int64_t *result_ptr, int64_t flags,
+ int32_t id) {}
+
+} // extern "C"
diff --git a/compiler-rt/test/instrumentor-examples/CMakeLists.txt b/compiler-rt/test/instrumentor-examples/CMakeLists.txt
index 6818c984f7a2c..7be899377cb35 100644
--- a/compiler-rt/test/instrumentor-examples/CMakeLists.txt
+++ b/compiler-rt/test/instrumentor-examples/CMakeLists.txt
@@ -5,6 +5,7 @@ set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(INSTRUMENTOR_TESTSUITES)
set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter)
+list(APPEND INSTRUMENTOR_TEST_DEPS precision-analysis)
# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined
if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH)
diff --git a/compiler-rt/test/instrumentor-examples/lit.cfg.py b/compiler-rt/test/instrumentor-examples/lit.cfg.py
index 67cf4db671af3..92f63300180c4 100644
--- a/compiler-rt/test/instrumentor-examples/lit.cfg.py
+++ b/compiler-rt/test/instrumentor-examples/lit.cfg.py
@@ -59,6 +59,12 @@ def make_lib_name(name):
flop_counter_lib = make_lib_name("flop_counter")
config.substitutions.append(("%flop_counter_lib", flop_counter_lib))
+# Add path to Precision Analysis runtime library
+config.substitutions.append(("%precision_analysis_lib_dir", config.compiler_rt_libdir))
+
+precision_analysis_lib = make_lib_name("precision_analysis")
+config.substitutions.append(("%precision_analysis_lib", precision_analysis_lib))
+
# Add path to instrumentor config files
instrumentor_config_dir = os.path.join(
config.test_source_root, "..", "..", "lib", "instrumentor-examples", "flop-counter"
diff --git a/compiler-rt/test/instrumentor-examples/precision_detailed.c b/compiler-rt/test/instrumentor-examples/precision_detailed.c
new file mode 100644
index 0000000000000..ddc30ac537a9b
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_detailed.c
@@ -0,0 +1,76 @@
+// Test precision analysis with detailed per-operation tracking
+//
+// This test demonstrates how the precision analysis tracks each operation
+// separately by ID and shows detailed statistics.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: TOTAL
+// CHECK: Column Legend:
+// CHECK: D->FP16:{{.*}}Double ops that can use FP16
+// CHECK: Summary by Original Precision:
+
+#include <stdio.h>
+
+// Each of these operations will get a unique ID
+// We can track their precision requirements separately
+
+double operation_a(double x, double y) {
+ // Simple addition - should work well with float
+ return x + y;
+}
+
+double operation_b(double x, double y) {
+ // Multiplication - should work well with float for normal ranges
+ return x * y;
+}
+
+double operation_c(double x, double y) {
+ // Division - might need more precision depending on values
+ return x / y;
+}
+
+double operation_d(double x) {
+ // Subtraction of close values - might need double precision
+ double y = x + 1e-6;
+ return y - x;
+}
+
+double complex_operation(double a, double b) {
+ // Multiple operations in sequence
+ double temp1 = a * b; // Op 1
+ double temp2 = temp1 + a; // Op 2
+ double temp3 = temp2 / b; // Op 3
+ return temp3;
+}
+
+int main(void) {
+ double result = 0.0;
+
+ // Execute operations multiple times
+ // Each call site gets a unique operation ID
+ for (int i = 1; i < 20; i++) {
+ result += operation_a(i * 1.0, i * 2.0);
+ result += operation_b(i * 1.5, i * 0.5);
+ result += operation_c(i * 10.0, i * 2.0);
+ result += operation_d(i * 100.0);
+ result += complex_operation(i * 1.5, i * 2.5);
+ }
+
+ // Some operations with different value ranges
+ for (int i = 1; i < 10; i++) {
+ // Very small values - might need double precision
+ result += operation_a(i * 1e-5, i * 1e-5);
+ // Large values - might work with float
+ result += operation_b(i * 1e5, i * 1e-5);
+ }
+
+ if (result != 0.0) {
+ printf("Result: %.10f\n", result);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c b/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
new file mode 100644
index 0000000000000..8bb0029d664d2
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_fp16_overflow.c
@@ -0,0 +1,91 @@
+// Test precision analysis with fp16 overflow/underflow detection
+//
+// This test specifically exercises float operations that would overflow or
+// underflow when converted to fp16, verifying that the runtime correctly
+// distinguishes between input special values and lowering-induced special values.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: Op ID{{.*}}Total{{.*}}D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F{{.*}}InpNaN{{.*}}LowNaN
+// CHECK: LowNaN:{{.*}}Operations where lowering caused overflow/underflow
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations with values that work in fp16 range
+// fp16 max is about 65504
+float small_float_ops(float a, float b) {
+ // These should be fine in fp16
+ return a + b;
+}
+
+// Float operations that will overflow in fp16
+float large_float_ops(float a, float b) {
+ // fp16 max is ~65504, these will overflow to inf
+ return a * b;
+}
+
+// Float operations that will underflow in fp16
+// fp16 min normal is about 6.1e-5
+float tiny_float_ops(float a, float b) {
+ // These will underflow to zero in fp16
+ return a * b;
+}
+
+// Operations with actual NaN/Inf inputs
+float special_input_ops(float a, float b) {
+ // These have special values in inputs
+ return a / b;
+}
+
+// Double operations with large values
+double large_double_ops(double a, double b) {
+ // float max is about 3.4e38, these will overflow
+ return a * b;
+}
+
+int main(void) {
+ float result_f = 0.0f;
+ double result_d = 0.0;
+
+ // Small float operations (should work in fp16)
+ for (int i = 1; i < 20; i++) {
+ result_f += small_float_ops(i * 1.5f, i * 2.5f);
+ }
+
+ // Large float operations (will overflow to inf in fp16)
+ for (int i = 1; i < 15; i++) {
+ float big = 10000.0f * i;
+ result_f += large_float_ops(big, big); // Result > 65504
+ }
+
+ // Tiny float operations (will underflow to 0 in fp16)
+ for (int i = 1; i < 15; i++) {
+ float tiny = 1e-4f / i;
+ result_f += tiny_float_ops(tiny, tiny); // Result < 6e-5
+ }
+
+ // Operations with NaN/Inf inputs
+ result_f += special_input_ops(1.0f, 0.0f); // Inf
+ result_f += special_input_ops(0.0f, 0.0f); // NaN
+
+ // Double operations that overflow in float
+ for (int i = 1; i < 10; i++) {
+ double huge = 1e38 * i;
+ result_d += large_double_ops(huge, huge); // Result > float_max
+ }
+
+ // Some normal double operations
+ for (int i = 1; i < 30; i++) {
+ result_d += i * 1.5 + i * 2.5;
+ }
+
+ if (!isnan(result_f) && !isnan(result_d)) {
+ printf("Computation complete\n");
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/precision_mixed.c b/compiler-rt/test/instrumentor-examples/precision_mixed.c
new file mode 100644
index 0000000000000..b0a9f37ae61c3
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/precision_mixed.c
@@ -0,0 +1,66 @@
+// Test precision analysis with mixed float and double operations
+//
+// This test uses both float and double operations to verify that the
+// precision analysis handles both types correctly.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Float operations (already using lower precision)
+float compute_float_distance(float x1, float y1, float x2, float y2) {
+ float dx = x2 - x1;
+ float dy = y2 - y1;
+ return sqrtf(dx * dx + dy * dy);
+}
+
+// Double operations (analyze if float would suffice)
+double compute_double_distance(double x1, double y1, double x2, double y2) {
+ double dx = x2 - x1;
+ double dy = y2 - y1;
+ return sqrt(dx * dx + dy * dy);
+}
+
+// Mixed precision computation
+double mixed_computation(float a, double b) {
+ // Implicit conversion from float to double
+ double a_double = a;
+ return a_double * b + a_double / b;
+}
+
+int main(void) {
+ float float_result = 0.0f;
+ double double_result = 0.0;
+
+ // Float operations
+ for (int i = 0; i < 50; i++) {
+ float_result += compute_float_distance(i * 0.1f, i * 0.2f, (i + 1) * 0.1f,
+ (i + 1) * 0.2f);
+ }
+
+ // Double operations with values that should work well in float
+ for (int i = 0; i < 50; i++) {
+ double_result +=
+ compute_double_distance(i * 0.1, i * 0.2, (i + 1) * 0.1, (i + 1) * 0.2);
+ }
+
+ // Mixed precision
+ for (int i = 1; i < 30; i++) {
+ double_result += mixed_computation(i * 1.5f, i * 2.5);
+ }
+
+ // Prevent optimization
+ if (float_result > 0.0f && double_result > 0.0) {
+ printf("Float result: %f, Double result: %f\n", float_result,
+ double_result);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/instrumentor-examples/simple_precision.c b/compiler-rt/test/instrumentor-examples/simple_precision.c
new file mode 100644
index 0000000000000..1c4323ddaae15
--- /dev/null
+++ b/compiler-rt/test/instrumentor-examples/simple_precision.c
@@ -0,0 +1,56 @@
+// Test basic precision analysis functionality
+//
+// This test verifies that the precision analysis runtime correctly identifies
+// operations that could use lower precision with acceptable accuracy.
+//
+// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%S/../../lib/instrumentor-examples/precision-analysis/precision_analysis_config.json %s -L%precision_analysis_lib_dir -l%precision_analysis_lib -o %t
+// RUN: %t | FileCheck %s
+//
+// CHECK: Floating-Point Precision Analysis Results
+// CHECK: Double operations: Try Float, then FP16 if Float works
+// CHECK: D->FP16{{.*}}D->F32{{.*}}D->D{{.*}}F->FP16{{.*}}F->F
+// CHECK: Summary by Original Precision:
+
+#include <math.h>
+#include <stdio.h>
+
+// Simple operations with large enough values that float precision is sufficient
+double simple_add(double a, double b) { return a + b; }
+
+double simple_mul(double a, double b) { return a * b; }
+
+double simple_div(double a, double b) { return a / b; }
+
+// Function that uses values where precision matters more
+double precise_computation(double x) {
+ // These operations on small differences might need double precision
+ double y = x + 1e-8;
+ double z = y - x;
+ return z * 1e8;
+}
+
+int main(void) {
+ double result = 0.0;
+
+ // Simple operations with "normal" range values
+ // These should generally work fine with float precision
+ for (int i = 0; i < 100; i++) {
+ result += simple_add(i * 1.5, i * 2.5);
+ result += simple_mul(i * 0.5, i * 0.5);
+ if (i > 0) {
+ result += simple_div(i * 10.0, i * 2.0);
+ }
+ }
+
+ // Operations that might require more precision
+ for (int i = 1; i < 50; i++) {
+ result += precise_computation(i * 1.0);
+ }
+
+ // Prevent optimization from removing the computations
+ if (result > 0.0) {
+ printf("Computation complete: %f\n", result);
+ }
+
+ return 0;
+}
More information about the llvm-branch-commits
mailing list