[llvm] [LLVM][Instrumentation] Add numerical sanitizer (PR #85916)

Thu Jun 6 17:23:51 PDT 2024

================
@@ -0,0 +1,2233 @@
+//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of NumericalStabilitySanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+
+#include <cstdint>
+#include <unordered_map>
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nsan"
+
+STATISTIC(NumInstrumentedFTLoads,
+          "Number of instrumented floating-point loads");
+
+STATISTIC(NumInstrumentedFTCalls,
+          "Number of instrumented floating-point calls");
+STATISTIC(NumInstrumentedFTRets,
+          "Number of instrumented floating-point returns");
+STATISTIC(NumInstrumentedFTStores,
+          "Number of instrumented floating-point stores");
+STATISTIC(NumInstrumentedNonFTStores,
+          "Number of instrumented non floating-point stores");
+STATISTIC(
+    NumInstrumentedNonFTMemcpyStores,
+    "Number of instrumented non floating-point stores with memcpy semantics");
+STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
+
+// Using smaller shadow types types can help improve speed. For example, `dlq`
+// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
+// `dqq`.
+static cl::opt<std::string> ClShadowMapping(
+    "nsan-shadow-type-mapping", cl::init("dqq"),
+    cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
+             "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
+             "ppc_fp128 (extended double) respectively. The default is to "
+             "shadow `float` as `double`, and `double` and `x86_fp80` as "
+             "`fp128`"),
+    cl::Hidden);
+
+static cl::opt<bool>
+    ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
+                     cl::desc("Instrument floating-point comparisons"),
+                     cl::Hidden);
+
+static cl::opt<std::string> ClCheckFunctionsFilter(
+    "check-functions-filter",
+    cl::desc("Only emit checks for arguments of functions "
+             "whose names match the given regular expression"),
+    cl::value_desc("regex"));
+
+static cl::opt<bool> ClTruncateFCmpEq(
+    "nsan-truncate-fcmp-eq", cl::init(true),
+    cl::desc(
+        "This flag controls the behaviour of fcmp equality comparisons:"
+        "For equality comparisons such as `x == 0.0f`, we can perform the "
+        "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
+        " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
+        "catch the case when `x_shadow` is accurate enough (and therefore "
+        "close enough to zero) so that `trunc(x_shadow)` is zero even though "
+        "both `x` and `x_shadow` are not. "),
+    cl::Hidden);
+
+// When there is external, uninstrumented code writing to memory, the shadow
+// memory can get out of sync with the application memory. Enabling this flag
+// emits consistency checks for loads to catch this situation.
+// When everything is instrumented, this is not strictly necessary because any
+// load should have a corresponding store, but can help debug cases when the
+// framework did a bad job at tracking shadow memory modifications by failing on
+// load rather than store.
+// FIXME: provide a way to resume computations from the FT value when the load
+// is inconsistent. This ensures that further computations are not polluted.
+static cl::opt<bool> ClCheckLoads("nsan-check-loads", cl::init(false),
+                                  cl::desc("Check floating-point load"),
+                                  cl::Hidden);
+
+static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
+                                   cl::desc("Check floating-point stores"),
+                                   cl::Hidden);
+
+static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
+                                cl::desc("Check floating-point return values"),
+                                cl::Hidden);
+
+// LLVM may store constant floats as bitcasted ints.
+// It's not really necessary to shadow such stores,
+// if the shadow value is unknown the framework will re-extend it on load
+// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
+// impossible to determine the floating-point type based on the size.
+// However, for debugging purposes it can be useful to model such stores.
+static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
+    "nsan-propagate-non-ft-const-stores-as-ft", cl::init(false),
+    cl::desc(
+        "Propagate non floating-point const stores as floating point values."
+        "For debugging purposes only"),
+    cl::Hidden);
+
+static constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
+static constexpr StringLiteral kNsanInitName("__nsan_init");
+
+// The following values must be kept in sync with the runtime.
+static constexpr const int kShadowScale = 2;
----------------
alexander-shaposhnikov wrote:

Done

https://github.com/llvm/llvm-project/pull/85916