[llvm] Add numerical sanitizer (PR #85916)
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Wed May 29 13:22:07 PDT 2024
================
@@ -0,0 +1,2236 @@
+//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of NumericalStabilitySanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+
+#include <cstdint>
+#include <unordered_map>
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nsan"
+
+STATISTIC(NumInstrumentedFTLoads,
+ "Number of instrumented floating-point loads");
+
+STATISTIC(NumInstrumentedFTCalls,
+ "Number of instrumented floating-point calls");
+STATISTIC(NumInstrumentedFTRets,
+ "Number of instrumented floating-point returns");
+STATISTIC(NumInstrumentedFTStores,
+ "Number of instrumented floating-point stores");
+STATISTIC(NumInstrumentedNonFTStores,
+ "Number of instrumented non floating-point stores");
+STATISTIC(
+ NumInstrumentedNonFTMemcpyStores,
+ "Number of instrumented non floating-point stores with memcpy semantics");
+STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
+
+// Using smaller shadow types types can help improve speed. For example, `dlq`
+// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
+// `dqq`.
+static cl::opt<std::string> ClShadowMapping(
+ "nsan-shadow-type-mapping", cl::init("dqq"),
+ cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
+ "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
+ "ppc_fp128 (extended double) respectively. The default is to "
+ "shadow `float` as `double`, and `double` and `x86_fp80` as "
+ "`fp128`"),
+ cl::Hidden);
+
+static cl::opt<bool>
+ ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
+ cl::desc("Instrument floating-point comparisons"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClCheckFunctionsFilter(
+ "check-functions-filter",
+ cl::desc("Only emit checks for arguments of functions "
+ "whose names match the given regular expression"),
+ cl::value_desc("regex"));
+
+static cl::opt<bool> ClTruncateFCmpEq(
+ "nsan-truncate-fcmp-eq", cl::init(true),
+ cl::desc(
+ "This flag controls the behaviour of fcmp equality comparisons:"
+ "For equality comparisons such as `x == 0.0f`, we can perform the "
+ "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
+ " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
+ "catch the case when `x_shadow` is accurate enough (and therefore "
+ "close enough to zero) so that `trunc(x_shadow)` is zero even though "
+ "both `x` and `x_shadow` are not. "),
+ cl::Hidden);
+
+// When there is external, uninstrumented code writing to memory, the shadow
+// memory can get out of sync with the application memory. Enabling this flag
+// emits consistency checks for loads to catch this situation.
+// When everything is instrumented, this is not strictly necessary because any
+// load should have a corresponding store, but can help debug cases when the
+// framework did a bad job at tracking shadow memory modifications by failing on
+// load rather than store.
+// FIXME: provide a way to resume computations from the FT value when the load
+// is inconsistent. This ensures that further computations are not polluted.
+static cl::opt<bool> ClCheckLoads("nsan-check-loads", cl::init(false),
+ cl::desc("Check floating-point load"),
+ cl::Hidden);
+
+static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
+ cl::desc("Check floating-point stores"),
+ cl::Hidden);
+
+static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
+ cl::desc("Check floating-point return values"),
+ cl::Hidden);
+
+// LLVM may store constant floats as bitcasted ints.
+// It's not really necessary to shadow such stores,
+// if the shadow value is unknown the framework will re-extend it on load
+// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
+// impossible to determine the floating-point type based on the size.
+// However, for debugging purposes it can be useful to model such stores.
+static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
+ "nsan-propagate-non-ft-const-stores-as-ft", cl::init(false),
+ cl::desc(
+ "Propagate non floating-point const stores as floating point values."
+ "For debugging purposes only"),
+ cl::Hidden);
+
+static constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
+static constexpr StringLiteral kNsanInitName("__nsan_init");
+
+// The following values must be kept in sync with the runtime.
+static constexpr const int kShadowScale = 2;
+static constexpr const int kMaxVectorWidth = 8;
+static constexpr const int kMaxNumArgs = 128;
+static constexpr const int kMaxShadowTypeSizeBytes = 16; // fp128
+
+namespace {
+
+// Defines the characteristics (type id, type, and floating-point semantics)
+// attached for all possible shadow types.
+class ShadowTypeConfig {
+public:
+ static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
+
+ // The LLVM Type corresponding to the shadow type.
+ virtual Type *getType(LLVMContext &Context) const = 0;
+
+ // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
+ virtual char getNsanTypeId() const = 0;
+
+ virtual ~ShadowTypeConfig() = default;
+};
+
+template <char NsanTypeId>
+class ShadowTypeConfigImpl : public ShadowTypeConfig {
+public:
+ char getNsanTypeId() const override { return NsanTypeId; }
+ static constexpr const char kNsanTypeId = NsanTypeId;
+};
+
+// `double` (`d`) shadow type.
+class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getDoubleTy(Context);
+ }
+};
+
+// `x86_fp80` (`l`) shadow type: X86 long double.
+class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getX86_FP80Ty(Context);
+ }
+};
+
+// `fp128` (`q`) shadow type.
+class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getFP128Ty(Context);
+ }
+};
+
+// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
+class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getPPC_FP128Ty(Context);
+ }
+};
+
+// Creates a ShadowTypeConfig given its type id.
+std::unique_ptr<ShadowTypeConfig>
+ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
+ switch (TypeId) {
+ case F64ShadowConfig::kNsanTypeId:
+ return std::make_unique<F64ShadowConfig>();
+ case F80ShadowConfig::kNsanTypeId:
+ return std::make_unique<F80ShadowConfig>();
+ case F128ShadowConfig::kNsanTypeId:
+ return std::make_unique<F128ShadowConfig>();
+ case PPC128ShadowConfig::kNsanTypeId:
+ return std::make_unique<PPC128ShadowConfig>();
+ }
+ errs() << "nsan: invalid shadow type id'" << TypeId << "'\n";
+ return nullptr;
+}
+
+// An enum corresponding to shadow value types. Used as indices in arrays, so
+// not an `enum class`.
+enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
+
+// If `FT` corresponds to a primitive FTValueType, return it.
+static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
+ if (FT->isFloatTy())
+ return kFloat;
+ if (FT->isDoubleTy())
+ return kDouble;
+ if (FT->isX86_FP80Ty())
+ return kLongDouble;
+ return {};
+}
+
+// Returns the LLVM type for an FTValueType.
+static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
+ switch (VT) {
+ case kFloat:
+ return Type::getFloatTy(Context);
+ case kDouble:
+ return Type::getDoubleTy(Context);
+ case kLongDouble:
+ return Type::getX86_FP80Ty(Context);
+ case kNumValueTypes:
+ return nullptr;
+ }
+}
+
+// Returns the type name for an FTValueType.
+static const char *typeNameFromFTValueType(FTValueType VT) {
+ switch (VT) {
+ case kFloat:
+ return "float";
+ case kDouble:
+ return "double";
+ case kLongDouble:
+ return "longdouble";
+ case kNumValueTypes:
+ return nullptr;
+ }
+}
+
+// A specific mapping configuration of application type to shadow type for nsan
+// (see -nsan-shadow-mapping flag).
+class MappingConfig {
+public:
+ bool initialize(LLVMContext *C) {
+ if (ClShadowMapping.size() != 3) {
+ errs() << "Invalid nsan mapping: " << ClShadowMapping << "\n";
+ }
+ Context = C;
+ unsigned ShadowTypeSizeBits[kNumValueTypes];
+ for (int VT = 0; VT < kNumValueTypes; ++VT) {
+ auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
+ if (Config == nullptr)
+ return false;
+ const unsigned AppTypeSize =
+ typeFromFTValueType(static_cast<FTValueType>(VT), *C)
+ ->getScalarSizeInBits();
+ const unsigned ShadowTypeSize =
+ Config->getType(*C)->getScalarSizeInBits();
+ // Check that the shadow type size is at most kShadowScale times the
+ // application type size, so that shadow memory compoutations are valid.
+ if (ShadowTypeSize > kShadowScale * AppTypeSize) {
+ errs() << "Invalid nsan mapping f" << AppTypeSize << "->f"
+ << ShadowTypeSize << ": The shadow type size should be at most "
+ << kShadowScale << " times the application type size\n";
+ return false;
+ }
+ ShadowTypeSizeBits[VT] = ShadowTypeSize;
+ Configs[VT] = std::move(Config);
+ }
+
+ // Check that the mapping is monotonous. This is required because if one
+ // does an fpextend of `float->long double` in application code, nsan is
+ // going to do an fpextend of `shadow(float) -> shadow(long double)` in
+ // shadow code. This will fail in `qql` mode, since nsan would be
+ // fpextending `f128->long`, which is invalid.
+ // FIXME: Relax this.
+ if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
+ ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble]) {
+ errs() << "Invalid nsan mapping: { float->f" << ShadowTypeSizeBits[kFloat]
+ << "; double->f" << ShadowTypeSizeBits[kDouble]
+ << "; long double->f" << ShadowTypeSizeBits[kLongDouble] << " }\n";
+ return false;
+ }
+ return true;
+ }
+
+ const ShadowTypeConfig &byValueType(FTValueType VT) const {
+ assert(VT < FTValueType::kNumValueTypes && "invalid value type");
+ return *Configs[VT];
+ }
+
+ // Returns the extended shadow type for a given application type.
+ Type *getExtendedFPType(Type *FT) const {
+ if (const auto VT = ftValueTypeFromType(FT))
+ return Configs[*VT]->getType(*Context);
+ if (FT->isVectorTy()) {
+ auto *VecTy = cast<VectorType>(FT);
+ // FIXME: add support for scalable vector types.
+ if (VecTy->isScalableTy())
+ return nullptr;
+ Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
+ return ExtendedScalar
+ ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
+ : nullptr;
+ }
+ return nullptr;
+ }
+
+private:
+ LLVMContext *Context = nullptr;
+ std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
+};
+
+// The memory extents of a type specifies how many elements of a given
+// FTValueType needs to be stored when storing this type.
+struct MemoryExtents {
+ FTValueType ValueType;
+ uint64_t NumElts;
+};
+static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
+ if (const auto VT = ftValueTypeFromType(FT))
+ return {*VT, 1};
+ if (FT->isVectorTy()) {
+ auto *VecTy = cast<VectorType>(FT);
+ const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
+ return {ScalarExtents.ValueType,
+ ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
+ }
+ llvm_unreachable("invalid value type");
+}
+
+// The location of a check. Passed as parameters to runtime checking functions.
+class CheckLoc {
+public:
+ // Creates a location that references an application memory location.
+ static CheckLoc makeStore(Value *Address) {
+ CheckLoc Result(kStore);
+ Result.Address = Address;
+ return Result;
+ }
+ static CheckLoc makeLoad(Value *Address) {
+ CheckLoc Result(kLoad);
+ Result.Address = Address;
+ return Result;
+ }
+
+ // Creates a location that references an argument, given by id.
+ static CheckLoc makeArg(int ArgId) {
+ CheckLoc Result(kArg);
+ Result.ArgId = ArgId;
+ return Result;
+ }
+
+ // Creates a location that references the return value of a function.
+ static CheckLoc makeRet() { return CheckLoc(kRet); }
+
+ // Creates a location that references a vector insert.
+ static CheckLoc makeInsert() { return CheckLoc(kInsert); }
+
+ // Returns the CheckType of location this refers to, as an integer-typed LLVM
+ // IR value.
+ Value *getType(LLVMContext &C) const {
+ return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
+ }
+
+ // Returns a CheckType-specific value representing details of the location
+ // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
+ // IR value.
+ Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
+ switch (CheckTy) {
+ case kUnknown:
+ llvm_unreachable("unknown type");
+ case kRet:
+ case kInsert:
+ return ConstantInt::get(IntptrTy, 0);
+ case kArg:
+ return ConstantInt::get(IntptrTy, ArgId);
+ case kLoad:
+ case kStore:
+ return Builder.CreatePtrToInt(Address, IntptrTy);
+ }
+ }
+
+private:
+ // Must be kept in sync with the runtime.
+ enum CheckType {
+ kUnknown = 0,
+ kRet,
+ kArg,
+ kLoad,
+ kStore,
+ kInsert,
+ };
+ explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
+
+ const CheckType CheckTy;
+ Value *Address = nullptr;
+ int ArgId = -1;
+};
+
+// A map of LLVM IR values to shadow LLVM IR values.
+class ValueToShadowMap {
+public:
+ explicit ValueToShadowMap(MappingConfig *Config) : Config(Config) {}
+
+ // Sets the shadow value for a value. Asserts that the value does not already
+ // have a value.
+ void setShadow(Value *V, Value *Shadow) {
+ assert(V);
+ assert(Shadow);
+ const bool Inserted = Map.emplace(V, Shadow).second;
+#ifdef LLVM_ENABLE_DUMP
----------------
alexander-shaposhnikov wrote:
Done
https://github.com/llvm/llvm-project/pull/85916
More information about the llvm-commits
mailing list