[llvm] Add numerical sanitizer (PR #85916)

Thu May 16 03:33:25 PDT 2024

================
@@ -0,0 +1,2265 @@
+//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of NumericalStabilitySanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+
+#include <cstdint>
+#include <unordered_map>
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nsan"
+
+STATISTIC(NumInstrumentedFTLoads,
+          "Number of instrumented floating-point loads");
+
+STATISTIC(NumInstrumentedFTCalls,
+          "Number of instrumented floating-point calls");
+STATISTIC(NumInstrumentedFTRets,
+          "Number of instrumented floating-point returns");
+STATISTIC(NumInstrumentedFTStores,
+          "Number of instrumented floating-point stores");
+STATISTIC(NumInstrumentedNonFTStores,
+          "Number of instrumented non floating-point stores");
+STATISTIC(
+    NumInstrumentedNonFTMemcpyStores,
+    "Number of instrumented non floating-point stores with memcpy semantics");
+STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
+
+// Using smaller shadow types types can help improve speed. For example, `dlq`
+// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
+// `dqq`.
+static cl::opt<std::string> ClShadowMapping(
+    "nsan-shadow-type-mapping", cl::init("dqq"),
+    cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
+             "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
+             "ppc_fp128 (extended double) respectively. The default is to "
+             "shadow `float` as `double`, and `double` and `x86_fp80` as "
+             "`fp128`"),
+    cl::Hidden);
+
+static cl::opt<bool>
+    ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
+                     cl::desc("Instrument floating-point comparisons"),
+                     cl::Hidden);
+
+static cl::opt<std::string> ClCheckFunctionsFilter(
+    "check-functions-filter",
+    cl::desc("Only emit checks for arguments of functions "
+             "whose names match the given regular expression"),
+    cl::value_desc("regex"));
+
+static cl::opt<bool> ClTruncateFCmpEq(
+    "nsan-truncate-fcmp-eq", cl::init(true),
+    cl::desc(
+        "This flag controls the behaviour of fcmp equality comparisons:"
+        "For equality comparisons such as `x == 0.0f`, we can perform the "
+        "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
+        " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
+        "catch the case when `x_shadow` is accurate enough (and therefore "
+        "close enough to zero) so that `trunc(x_shadow)` is zero even though "
+        "both `x` and `x_shadow` are not. "),
+    cl::Hidden);
+
+// When there is external, uninstrumented code writing to memory, the shadow
+// memory can get out of sync with the application memory. Enabling this flag
+// emits consistency checks for loads to catch this situation.
+// When everything is instrumented, this is not strictly necessary because any
+// load should have a corresponding store, but can help debug cases when the
+// framework did a bad job at tracking shadow memory modifications by failing on
+// load rather than store.
+// FIXME: provide a way to resume computations from the FT value when the load
+// is inconsistent. This ensures that further computations are not polluted.
+static cl::opt<bool> ClCheckLoads("nsan-check-loads", cl::init(false),
+                                  cl::desc("Check floating-point load"),
+                                  cl::Hidden);
+
+static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
+                                   cl::desc("Check floating-point stores"),
+                                   cl::Hidden);
+
+static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
+                                cl::desc("Check floating-point return values"),
+                                cl::Hidden);
+
+static const char *const kNsanModuleCtorName = "nsan.module_ctor";
+static const char *const kNsanInitName = "__nsan_init";
+
+// The following values must be kept in sync with the runtime.
+static constexpr const int kShadowScale = 2;
+static constexpr const int kMaxVectorWidth = 8;
+static constexpr const int kMaxNumArgs = 128;
+static constexpr const int kMaxShadowTypeSizeBytes = 16; // fp128
+
+namespace {
+
+// Defines the characteristics (type id, type, and floating-point semantics)
+// attached for all possible shadow types.
+class ShadowTypeConfig {
+public:
+  static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
+  // The floating-point semantics of the shadow type.
+  virtual const fltSemantics &semantics() const = 0;
+
+  // The LLVM Type corresponding to the shadow type.
+  virtual Type *getType(LLVMContext &Context) const = 0;
+
+  // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
+  virtual char getNsanTypeId() const = 0;
+
+  virtual ~ShadowTypeConfig() {}
+};
+
+template <char NsanTypeId>
+class ShadowTypeConfigImpl : public ShadowTypeConfig {
+public:
+  char getNsanTypeId() const override { return NsanTypeId; }
+  static constexpr const char kNsanTypeId = NsanTypeId;
+};
+
+// `double` (`d`) shadow type.
+class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
+  const fltSemantics &semantics() const override {
+    return APFloat::IEEEdouble();
+  }
+  Type *getType(LLVMContext &Context) const override {
+    return Type::getDoubleTy(Context);
+  }
+};
+
+// `x86_fp80` (`l`) shadow type: X86 long double.
+class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
+  const fltSemantics &semantics() const override {
+    return APFloat::x87DoubleExtended();
+  }
+  Type *getType(LLVMContext &Context) const override {
+    return Type::getX86_FP80Ty(Context);
+  }
+};
+
+// `fp128` (`q`) shadow type.
+class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
+  const fltSemantics &semantics() const override { return APFloat::IEEEquad(); }
+  Type *getType(LLVMContext &Context) const override {
+    return Type::getFP128Ty(Context);
+  }
+};
+
+// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
+class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
+  const fltSemantics &semantics() const override {
+    return APFloat::PPCDoubleDouble();
+  }
+  Type *getType(LLVMContext &Context) const override {
+    return Type::getPPC_FP128Ty(Context);
+  }
+};
+
+// Creates a ShadowTypeConfig given its type id.
+std::unique_ptr<ShadowTypeConfig>
+ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
+  switch (TypeId) {
+  case F64ShadowConfig::kNsanTypeId:
+    return std::make_unique<F64ShadowConfig>();
+  case F80ShadowConfig::kNsanTypeId:
+    return std::make_unique<F80ShadowConfig>();
+  case F128ShadowConfig::kNsanTypeId:
+    return std::make_unique<F128ShadowConfig>();
+  case PPC128ShadowConfig::kNsanTypeId:
+    return std::make_unique<PPC128ShadowConfig>();
+  }
+  errs() << "nsan: invalid shadow type id'" << TypeId << "'\n";
----------------
arsenm wrote:

Can't just print to errs 

https://github.com/llvm/llvm-project/pull/85916