[llvm] Add numerical sanitizer (PR #85916)
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 04:11:39 PDT 2024
https://github.com/alexander-shaposhnikov updated https://github.com/llvm/llvm-project/pull/85916
>From 429cd89293a79fedb459ef85eb2384bcc1ba21fb Mon Sep 17 00:00:00 2001
From: Alexander Shaposhnikov <ashaposhnikov at google.com>
Date: Mon, 13 May 2024 10:09:22 +0000
Subject: [PATCH] Add numerical sanitizer
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 1 +
llvm/include/llvm/IR/Attributes.td | 4 +
.../NumericalStabilitySanitizer.h | 40 +
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 2 +
.../Transforms/Instrumentation/CMakeLists.txt | 1 +
.../NumericalStabilitySanitizer.cpp | 2256 +++++++++++++++++
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 +
.../NumericalStabilitySanitizer/basic.ll | 931 +++++++
.../NumericalStabilitySanitizer/cfg.ll | 113 +
.../NumericalStabilitySanitizer/invoke.ll | 148 ++
.../NumericalStabilitySanitizer/memory.ll | 405 +++
14 files changed, 3907 insertions(+)
create mode 100644 llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h
create mode 100644 llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll
create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll
create mode 100644 llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 909eb833c601a..1f6c9a33f4730 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -744,6 +744,7 @@ enum AttributeKindCodes {
ATTR_KIND_CORO_ONLY_DESTROY_WHEN_COMPLETE = 90,
ATTR_KIND_DEAD_ON_UNWIND = 91,
ATTR_KIND_RANGE = 92,
+ ATTR_KIND_SANITIZE_NUMERICAL_STABILITY = 93,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index cef8b17769f0d..4aa231a9a4f82 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -285,6 +285,9 @@ def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress", [FnAttr]>;
/// MemTagSanitizer is on.
def SanitizeMemTag : EnumAttr<"sanitize_memtag", [FnAttr]>;
+/// NumericalStabilitySanitizer is on.
+def SanitizeNumericalStability : EnumAttr<"sanitize_numericalstability", [FnAttr]>;
+
/// Speculative Load Hardening is enabled.
///
/// Note that this uses the default compatibility (always compatible during
@@ -372,6 +375,7 @@ def : CompatRule<"isEqual<SanitizeThreadAttr>">;
def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
def : CompatRule<"isEqual<SanitizeMemTagAttr>">;
+def : CompatRule<"isEqual<SanitizeNumericalStabilityAttr>">;
def : CompatRule<"isEqual<SafeStackAttr>">;
def : CompatRule<"isEqual<ShadowCallStackAttr>">;
def : CompatRule<"isEqual<UseSampleProfileAttr>">;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h
new file mode 100644
index 0000000000000..89a6019edd398
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h
@@ -0,0 +1,40 @@
+//===- NumericalStabilitySanitizer.h - NSan Pass ---------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the numerical stability sanitizer (nsan) pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// Inserts NumericalStabilitySanitizer instrumentation.
+// FunctionPass *createNumericalStabilitySanitizerLegacyPassPass();
+
+/// A function pass for nsan instrumentation.
+///
+/// Instruments functions to duplicate floating point computations in a
+/// higher-precision type.
+/// This function pass inserts calls to runtime library functions. If the
+/// functions aren't declared yet, the pass inserts the declarations.
+struct NumericalStabilitySanitizerPass
+ : public PassInfoMixin<NumericalStabilitySanitizerPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index be2381cd7d779..5fa2b7346ad4b 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2106,6 +2106,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::SanitizeThread;
case bitc::ATTR_KIND_SANITIZE_MEMORY:
return Attribute::SanitizeMemory;
+ case bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY:
+ return Attribute::SanitizeNumericalStability;
case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING:
return Attribute::SpeculativeLoadHardening;
case bitc::ATTR_KIND_SWIFT_ERROR:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6d01e3b4d8218..0a17a75c322ef 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -819,6 +819,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_SANITIZE_THREAD;
case Attribute::SanitizeMemory:
return bitc::ATTR_KIND_SANITIZE_MEMORY;
+ case Attribute::SanitizeNumericalStability:
+ return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY;
case Attribute::SpeculativeLoadHardening:
return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING;
case Attribute::SwiftError:
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index e4131706aba01..75b70e5fea7d4 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -177,6 +177,7 @@
#include "llvm/Transforms/Instrumentation/LowerAllowCheckPass.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index e5ce6cb7da649..23a06acef4139 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -94,6 +94,7 @@ MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("module-inline", ModuleInlinerPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("nsan-module", NumericalStabilitySanitizerPass())
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
MODULE_PASS("openmp-opt-postlink",
@@ -386,6 +387,7 @@ FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("nsan", NumericalStabilitySanitizerPass())
FUNCTION_PASS("objc-arc", ObjCARCOptPass())
FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass())
FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass())
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 8d345d394b51a..4e3f9e27e0c34 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMInstrumentation
BlockCoverageInference.cpp
MemProfiler.cpp
MemorySanitizer.cpp
+ NumericalStabilitySanitizer.cpp
IndirectCallPromotion.cpp
Instrumentation.cpp
InstrOrderFile.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
new file mode 100644
index 0000000000000..d29d68845431e
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -0,0 +1,2256 @@
+//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of NumericalStabilitySanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+
+#include <cstdint>
+#include <unordered_map>
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nsan"
+
+STATISTIC(NumInstrumentedFTLoads,
+ "Number of instrumented floating-point loads");
+
+STATISTIC(NumInstrumentedFTCalls,
+ "Number of instrumented floating-point calls");
+STATISTIC(NumInstrumentedFTRets,
+ "Number of instrumented floating-point returns");
+STATISTIC(NumInstrumentedFTStores,
+ "Number of instrumented floating-point stores");
+STATISTIC(NumInstrumentedNonFTStores,
+ "Number of instrumented non floating-point stores");
+STATISTIC(
+ NumInstrumentedNonFTMemcpyStores,
+ "Number of instrumented non floating-point stores with memcpy semantics");
+STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
+
+// Using smaller shadow types types can help improve speed. For example, `dlq`
+// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
+// `dqq`.
+static cl::opt<std::string> ClShadowMapping(
+ "nsan-shadow-type-mapping", cl::init("dqq"),
+ cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
+ "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
+ "ppc_fp128 (extended double) respectively. The default is to "
+ "shadow `float` as `double`, and `double` and `x86_fp80` as "
+ "`fp128`"),
+ cl::Hidden);
+
+static cl::opt<bool>
+ ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
+ cl::desc("Instrument floating-point comparisons"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClCheckFunctionsFilter(
+ "check-functions-filter",
+ cl::desc("Only emit checks for arguments of functions "
+ "whose names match the given regular expression"),
+ cl::value_desc("regex"));
+
+static cl::opt<bool> ClTruncateFCmpEq(
+ "nsan-truncate-fcmp-eq", cl::init(true),
+ cl::desc(
+ "This flag controls the behaviour of fcmp equality comparisons:"
+ "For equality comparisons such as `x == 0.0f`, we can perform the "
+ "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
+ " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
+ "catch the case when `x_shadow` is accurate enough (and therefore "
+ "close enough to zero) so that `trunc(x_shadow)` is zero even though "
+ "both `x` and `x_shadow` are not. "),
+ cl::Hidden);
+
+// When there is external, uninstrumented code writing to memory, the shadow
+// memory can get out of sync with the application memory. Enabling this flag
+// emits consistency checks for loads to catch this situation.
+// When everything is instrumented, this is not strictly necessary because any
+// load should have a corresponding store, but can help debug cases when the
+// framework did a bad job at tracking shadow memory modifications by failing on
+// load rather than store.
+// FIXME: provide a way to resume computations from the FT value when the load
+// is inconsistent. This ensures that further computations are not polluted.
+static cl::opt<bool> ClCheckLoads("nsan-check-loads", cl::init(false),
+ cl::desc("Check floating-point load"),
+ cl::Hidden);
+
+static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
+ cl::desc("Check floating-point stores"),
+ cl::Hidden);
+
+static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
+ cl::desc("Check floating-point return values"),
+ cl::Hidden);
+
+static constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
+static constexpr StringLiteral kNsanInitName("__nsan_init");
+
+// The following values must be kept in sync with the runtime.
+static constexpr const int kShadowScale = 2;
+static constexpr const int kMaxVectorWidth = 8;
+static constexpr const int kMaxNumArgs = 128;
+static constexpr const int kMaxShadowTypeSizeBytes = 16; // fp128
+
+namespace {
+
+// Defines the characteristics (type id, type, and floating-point semantics)
+// attached for all possible shadow types.
+class ShadowTypeConfig {
+public:
+ static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
+ // The floating-point semantics of the shadow type.
+ virtual const fltSemantics &semantics() const = 0;
+
+ // The LLVM Type corresponding to the shadow type.
+ virtual Type *getType(LLVMContext &Context) const = 0;
+
+ // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
+ virtual char getNsanTypeId() const = 0;
+
+ virtual ~ShadowTypeConfig() {}
+};
+
+template <char NsanTypeId>
+class ShadowTypeConfigImpl : public ShadowTypeConfig {
+public:
+ char getNsanTypeId() const override { return NsanTypeId; }
+ static constexpr const char kNsanTypeId = NsanTypeId;
+};
+
+// `double` (`d`) shadow type.
+class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
+ const fltSemantics &semantics() const override {
+ return APFloat::IEEEdouble();
+ }
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getDoubleTy(Context);
+ }
+};
+
+// `x86_fp80` (`l`) shadow type: X86 long double.
+class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
+ const fltSemantics &semantics() const override {
+ return APFloat::x87DoubleExtended();
+ }
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getX86_FP80Ty(Context);
+ }
+};
+
+// `fp128` (`q`) shadow type.
+class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
+ const fltSemantics &semantics() const override { return APFloat::IEEEquad(); }
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getFP128Ty(Context);
+ }
+};
+
+// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
+class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
+ const fltSemantics &semantics() const override {
+ return APFloat::PPCDoubleDouble();
+ }
+ Type *getType(LLVMContext &Context) const override {
+ return Type::getPPC_FP128Ty(Context);
+ }
+};
+
+// Creates a ShadowTypeConfig given its type id.
+std::unique_ptr<ShadowTypeConfig>
+ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
+ switch (TypeId) {
+ case F64ShadowConfig::kNsanTypeId:
+ return std::make_unique<F64ShadowConfig>();
+ case F80ShadowConfig::kNsanTypeId:
+ return std::make_unique<F80ShadowConfig>();
+ case F128ShadowConfig::kNsanTypeId:
+ return std::make_unique<F128ShadowConfig>();
+ case PPC128ShadowConfig::kNsanTypeId:
+ return std::make_unique<PPC128ShadowConfig>();
+ }
+ errs() << "nsan: invalid shadow type id'" << TypeId << "'\n";
+ return nullptr;
+}
+
+// An enum corresponding to shadow value types. Used as indices in arrays, so
+// not an `enum class`.
+enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
+
+static FTValueType semanticsToFTValueType(const fltSemantics &Sem) {
+ if (&Sem == &APFloat::IEEEsingle()) {
+ return kFloat;
+ } else if (&Sem == &APFloat::IEEEdouble()) {
+ return kDouble;
+ } else if (&Sem == &APFloat::x87DoubleExtended()) {
+ return kLongDouble;
+ }
+ llvm_unreachable("semantics are not one of the handled types");
+}
+
+// If `FT` corresponds to a primitive FTValueType, return it.
+static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
+ if (FT->isFloatTy())
+ return kFloat;
+ if (FT->isDoubleTy())
+ return kDouble;
+ if (FT->isX86_FP80Ty())
+ return kLongDouble;
+ return {};
+}
+
+// Returns the LLVM type for an FTValueType.
+static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
+ switch (VT) {
+ case kFloat:
+ return Type::getFloatTy(Context);
+ case kDouble:
+ return Type::getDoubleTy(Context);
+ case kLongDouble:
+ return Type::getX86_FP80Ty(Context);
+ case kNumValueTypes:
+ return nullptr;
+ }
+}
+
+// Returns the type name for an FTValueType.
+static const char *typeNameFromFTValueType(FTValueType VT) {
+ switch (VT) {
+ case kFloat:
+ return "float";
+ case kDouble:
+ return "double";
+ case kLongDouble:
+ return "longdouble";
+ case kNumValueTypes:
+ return nullptr;
+ }
+}
+
+// A specific mapping configuration of application type to shadow type for nsan
+// (see -nsan-shadow-mapping flag).
+class MappingConfig {
+public:
+ bool initialize(LLVMContext *C) {
+ if (ClShadowMapping.size() != 3) {
+ errs() << "Invalid nsan mapping: " << ClShadowMapping << "\n";
+ }
+ Context = C;
+ unsigned ShadowTypeSizeBits[kNumValueTypes];
+ for (int VT = 0; VT < kNumValueTypes; ++VT) {
+ auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
+ if (Config == nullptr)
+ return false;
+ const unsigned AppTypeSize =
+ typeFromFTValueType(static_cast<FTValueType>(VT), *C)
+ ->getScalarSizeInBits();
+ const unsigned ShadowTypeSize =
+ Config->getType(*C)->getScalarSizeInBits();
+ // Check that the shadow type size is at most kShadowScale times the
+ // application type size, so that shadow memory compoutations are valid.
+ if (ShadowTypeSize > kShadowScale * AppTypeSize) {
+ errs() << "Invalid nsan mapping f" << AppTypeSize << "->f"
+ << ShadowTypeSize << ": The shadow type size should be at most "
+ << kShadowScale << " times the application type size\n";
+ return false;
+ }
+ ShadowTypeSizeBits[VT] = ShadowTypeSize;
+ Configs[VT] = std::move(Config);
+ }
+
+ // Check that the mapping is monotonous. This is required because if one
+ // does an fpextend of `float->long double` in application code, nsan is
+ // going to do an fpextend of `shadow(float) -> shadow(long double)` in
+ // shadow code. This will fail in `qql` mode, since nsan would be
+ // fpextending `f128->long`, which is invalid.
+ // FIXME: Relax this.
+ if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
+ ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble]) {
+ errs() << "Invalid nsan mapping: { float->f" << ShadowTypeSizeBits[kFloat]
+ << "; double->f" << ShadowTypeSizeBits[kDouble]
+ << "; long double->f" << ShadowTypeSizeBits[kLongDouble] << " }\n";
+ return false;
+ }
+ return true;
+ }
+
+ const ShadowTypeConfig &byValueType(FTValueType VT) const {
+ assert(VT < FTValueType::kNumValueTypes && "invalid value type");
+ return *Configs[VT];
+ }
+
+ const ShadowTypeConfig &bySemantics(const fltSemantics &Sem) const {
+ return byValueType(semanticsToFTValueType(Sem));
+ }
+
+ // Returns the extended shadow type for a given application type.
+ Type *getExtendedFPType(Type *FT) const {
+ if (const auto VT = ftValueTypeFromType(FT))
+ return Configs[*VT]->getType(*Context);
+ if (FT->isVectorTy()) {
+ auto *VecTy = cast<VectorType>(FT);
+ Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
+ return ExtendedScalar
+ ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
+ : nullptr;
+ }
+ return nullptr;
+ }
+
+private:
+ LLVMContext *Context = nullptr;
+ std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
+};
+
+// The memory extents of a type specifies how many elements of a given
+// FTValueType needs to be stored when storing this type.
+struct MemoryExtents {
+ FTValueType ValueType;
+ uint64_t NumElts;
+};
+static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
+ if (const auto VT = ftValueTypeFromType(FT))
+ return {*VT, 1};
+ if (FT->isVectorTy()) {
+ auto *VecTy = cast<VectorType>(FT);
+ const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
+ return {ScalarExtents.ValueType,
+ ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
+ }
+ llvm_unreachable("invalid value type");
+}
+
+// The location of a check. Passed as parameters to runtime checking functions.
+class CheckLoc {
+public:
+ // Creates a location that references an application memory location.
+ static CheckLoc makeStore(Value *Address) {
+ CheckLoc Result(kStore);
+ Result.Address = Address;
+ return Result;
+ }
+ static CheckLoc makeLoad(Value *Address) {
+ CheckLoc Result(kLoad);
+ Result.Address = Address;
+ return Result;
+ }
+
+ // Creates a location that references an argument, given by id.
+ static CheckLoc makeArg(int ArgId) {
+ CheckLoc Result(kArg);
+ Result.ArgId = ArgId;
+ return Result;
+ }
+
+ // Creates a location that references the return value of a function.
+ static CheckLoc makeRet() { return CheckLoc(kRet); }
+
+ // Creates a location that references a vector insert.
+ static CheckLoc makeInsert() { return CheckLoc(kInsert); }
+
+ // Returns the CheckType of location this refers to, as an integer-typed LLVM
+ // IR value.
+ Value *getType(LLVMContext &C) const {
+ return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
+ }
+
+ // Returns a CheckType-specific value representing details of the location
+ // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
+ // IR value.
+ Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
+ switch (CheckTy) {
+ case kUnknown:
+ llvm_unreachable("unknown type");
+ case kRet:
+ case kInsert:
+ return ConstantInt::get(IntptrTy, 0);
+ case kArg:
+ return ConstantInt::get(IntptrTy, ArgId);
+ case kLoad:
+ case kStore:
+ return Builder.CreatePtrToInt(Address, IntptrTy);
+ }
+ }
+
+private:
+ // Must be kept in sync with the runtime.
+ enum CheckType {
+ kUnknown = 0,
+ kRet,
+ kArg,
+ kLoad,
+ kStore,
+ kInsert,
+ };
+ explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
+
+ const CheckType CheckTy;
+ Value *Address = nullptr;
+ int ArgId = -1;
+};
+
+// A map of LLVM IR values to shadow LLVM IR values.
+class ValueToShadowMap {
+public:
+ explicit ValueToShadowMap(MappingConfig *Config) : Config(Config) {}
+
+ // Sets the shadow value for a value. Asserts that the value does not already
+ // have a value.
+ void setShadow(Value *V, Value *Shadow) {
+ assert(V);
+ assert(Shadow);
+ const bool Inserted = Map.emplace(V, Shadow).second;
+#ifdef LLVM_ENABLE_DUMP
+ if (!Inserted) {
+ if (const auto *const I = dyn_cast<Instruction>(V))
+ I->getParent()->getParent()->dump();
+ errs() << "duplicate shadow (" << V << "): ";
+ V->dump();
+ }
+#endif
+ assert(Inserted && "duplicate shadow");
+ (void)Inserted;
+ }
+
+ // Returns true if the value already has a shadow (including if the value is a
+ // constant). If true, calling getShadow() is valid.
+ bool hasShadow(Value *V) const {
+ return isa<Constant>(V) || (Map.find(V) != Map.end());
+ }
+
+ // Returns the shadow value for a given value. Asserts that the value has
+ // a shadow value. Lazily creates shadows for constant values.
+ Value *getShadow(Value *V) const {
+ assert(V);
+ if (Constant *C = dyn_cast<Constant>(V))
+ return getShadowConstant(C);
+ const auto ShadowValIt = Map.find(V);
+ assert(ShadowValIt != Map.end() && "shadow val does not exist");
+ assert(ShadowValIt->second && "shadow val is null");
+ return ShadowValIt->second;
+ }
+
+ bool empty() const { return Map.empty(); }
+
+private:
+ // Extends a constant application value to its shadow counterpart.
+ APFloat extendConstantFP(APFloat CV) const {
+ bool LosesInfo = false;
+ CV.convert(Config->bySemantics(CV.getSemantics()).semantics(),
+ APFloatBase::rmTowardZero, &LosesInfo);
+ return CV;
+ }
+
+ // Returns the shadow constant for the given application constant.
+ Constant *getShadowConstant(Constant *C) const {
+ if (UndefValue *U = dyn_cast<UndefValue>(C)) {
+ return UndefValue::get(Config->getExtendedFPType(U->getType()));
+ }
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ // Floating-point constants.
+ return ConstantFP::get(Config->getExtendedFPType(CFP->getType()),
+ extendConstantFP(CFP->getValueAPF()));
+ }
+ // Vector, array, or aggregate constants.
+ if (C->getType()->isVectorTy()) {
+ SmallVector<Constant *, 8> Elements;
+ for (int I = 0, E = cast<VectorType>(C->getType())
+ ->getElementCount()
+ .getFixedValue();
+ I < E; ++I)
+ Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
+ return ConstantVector::get(Elements);
+ }
+ llvm_unreachable("unimplemented");
+ }
+
+ MappingConfig *const Config;
+ std::unordered_map<Value *, Value *> Map;
+};
+
+/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
+/// API function declarations into the module if they don't exist already.
+/// Instantiating ensures the __nsan_init function is in the list of global
+/// constructors for the module.
+class NumericalStabilitySanitizer {
+public:
+ bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
+
+private:
+ void initialize(Module &M);
+ bool instrumentMemIntrinsic(MemIntrinsic *MI);
+ void maybeAddSuffixForNsanInterface(CallBase *CI);
+ bool addrPointsToConstantData(Value *Addr);
+ void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
+ ValueToShadowMap &Map);
+ Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
+ const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map);
+ PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
+ void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
+ ValueToShadowMap &Map);
+
+ void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map);
+
+ void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map);
+ Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
+ CheckLoc Loc);
+ Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
+ CheckLoc Loc);
+ void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
+ Value *getCalleeAddress(CallBase &Call, IRBuilder<> &Builder) const;
+
+ // Value creation handlers.
+ Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
+ Value *handleTrunc(FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
+ const ValueToShadowMap &Map);
+ Value *handleExt(FPExtInst &Ext, Type *VT, Type *ExtendedVT,
+ const ValueToShadowMap &Map);
+ Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
+ const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map, IRBuilder<> &Builder);
+ Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
+ const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map,
+ IRBuilder<> &Builder);
+
+ // Value propagation handlers.
+ void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
+ const ValueToShadowMap &Map);
+ void propagateNonFTStore(StoreInst &Store, Type *VT,
+ const ValueToShadowMap &Map);
+
+ MappingConfig Config;
+ LLVMContext *Context = nullptr;
+ IntegerType *IntptrTy = nullptr;
+ FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes];
+ FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes];
+ FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes];
+ FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes];
+ FunctionCallee NsanCopyValues;
+ FunctionCallee NsanSetValueUnknown;
+ FunctionCallee NsanGetRawShadowTypePtr;
+ FunctionCallee NsanGetRawShadowPtr;
+ GlobalValue *NsanShadowRetTag;
+
+ Type *NsanShadowRetType;
+ GlobalValue *NsanShadowRetPtr;
+
+ GlobalValue *NsanShadowArgsTag;
+
+ Type *NsanShadowArgsType;
+ GlobalValue *NsanShadowArgsPtr;
+
+ std::optional<Regex> CheckFunctionsFilter;
+};
+
+void insertModuleCtor(Module &M) {
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
+}
+
+} // end anonymous namespace
+
+PreservedAnalyses
+NumericalStabilitySanitizerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ NumericalStabilitySanitizer Nsan;
+ if (Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses
+NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
+ insertModuleCtor(M);
+ return PreservedAnalyses::none();
+}
+
+static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
+ return dyn_cast<GlobalValue>(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
+ return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, Name, nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ }));
+}
+
+void NumericalStabilitySanitizer::initialize(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ Context = &M.getContext();
+ IntptrTy = DL.getIntPtrType(*Context);
+ Type *PtrTy = PointerType::getUnqual(*Context);
+ Type *Int32Ty = Type::getInt32Ty(*Context);
+ Type *Int1Ty = Type::getInt1Ty(*Context);
+ Type *VoidTy = Type::getVoidTy(*Context);
+
+ AttributeList Attr;
+ Attr = Attr.addFnAttribute(*Context, Attribute::NoUnwind);
+ // Initialize the runtime values (functions and global variables).
+ for (int I = 0; I < kNumValueTypes; ++I) {
+ const FTValueType VT = static_cast<FTValueType>(I);
+ const char *const VTName = typeNameFromFTValueType(VT);
+ Type *const VTTy = typeFromFTValueType(VT, *Context);
+
+ // Load/store.
+ const std::string GetterPrefix =
+ std::string("__nsan_get_shadow_ptr_for_") + VTName;
+ NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
+ GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
+ NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
+ GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
+
+ // Check.
+ const auto &ShadowConfig = Config.byValueType(VT);
+ Type *ShadowTy = ShadowConfig.getType(*Context);
+ NsanCheckValue[VT] =
+ M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
+ "_" + ShadowConfig.getNsanTypeId(),
+ Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
+ NsanFCmpFail[VT] = M.getOrInsertFunction(
+ std::string("__nsan_fcmp_fail_") + VTName + "_" +
+ ShadowConfig.getNsanTypeId(),
+ Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
+ }
+
+ NsanCopyValues = M.getOrInsertFunction("__nsan_copy_values", Attr, VoidTy,
+ PtrTy, PtrTy, IntptrTy);
+ NsanSetValueUnknown = M.getOrInsertFunction("__nsan_set_value_unknown", Attr,
+ VoidTy, PtrTy, IntptrTy);
+
+ // FIXME: Add attributes nofree, nosync, readnone, readonly,
+ NsanGetRawShadowTypePtr = M.getOrInsertFunction(
+ "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
+ NsanGetRawShadowPtr = M.getOrInsertFunction(
+ "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
+
+ NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
+
+ NsanShadowRetType = ArrayType::get(Type::getInt8Ty(*Context),
+ kMaxVectorWidth * kMaxShadowTypeSizeBytes);
+ NsanShadowRetPtr =
+ createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
+
+ NsanShadowArgsTag =
+ createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
+
+ NsanShadowArgsType =
+ ArrayType::get(Type::getInt8Ty(*Context),
+ kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
+
+ NsanShadowArgsPtr =
+ createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
+
+ if (!ClCheckFunctionsFilter.empty()) {
+ Regex R = Regex(ClCheckFunctionsFilter);
+ std::string RegexError;
+ assert(R.isValid(RegexError));
+ CheckFunctionsFilter = std::move(R);
+ }
+}
+
+// Returns true if the given LLVM Value points to constant data (typically, a
+// global variable reference).
+bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
+ // If this is a GEP, just analyze its pointer operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+ Addr = GEP->getPointerOperand();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ return GV->isConstant();
+ }
+ return false;
+}
+
+// This instruments the function entry to create shadow arguments.
+// Pseudocode:
+// if (this_fn_ptr == __nsan_shadow_args_tag) {
+// s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
+// s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
+// ...
+// __nsan_shadow_args_tag = 0;
+// } else {
+// s(arg0) = fext(arg0);
+// s(arg1) = fext(arg1);
+// ...
+// }
+void NumericalStabilitySanitizer::createShadowArguments(
+ Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
+ assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
+
+ // Do not bother if there are no FP args.
+ if (all_of(F.args(), [this](const Argument &Arg) {
+ return Config.getExtendedFPType(Arg.getType()) == nullptr;
+ }))
+ return;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI());
+ // The function has shadow args if the shadow args tag matches the function
+ // address.
+ Value *HasShadowArgs = Builder.CreateICmpEQ(
+ Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
+ Builder.CreatePtrToInt(&F, IntptrTy));
+
+ unsigned ShadowArgsOffsetBytes = 0;
+ for (Argument &Arg : F.args()) {
+ Type *const VT = Arg.getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ continue; // Not an FT value.
+ Value *L = Builder.CreateAlignedLoad(
+ ExtendedVT,
+ Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
+ ShadowArgsOffsetBytes),
+ Align(1), /*isVolatile=*/false);
+ Value *Shadow = Builder.CreateSelect(
+ HasShadowArgs, L,
+ Builder.CreateCast(Instruction::FPExt, &Arg, ExtendedVT));
+ Map.setShadow(&Arg, Shadow);
+ TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
+ assert(!SlotSize.isScalable() && "unsupported");
+ ShadowArgsOffsetBytes += SlotSize.getFixedValue();
+ }
+ Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
+}
+
+// Returns true if the instrumentation should emit code to check arguments
+// before a function call.
+static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
+ const std::optional<Regex> &CheckFunctionsFilter) {
+
+ Function *Fn = CI.getCalledFunction();
+
+ if (CheckFunctionsFilter) {
+ // Skip checking args of indirect calls.
+ if (Fn == nullptr)
+ return false;
+ if (CheckFunctionsFilter->match(Fn->getName()))
+ return true;
+ return false;
+ }
+
+ if (Fn == nullptr)
+ return true; // Always check args of indirect calls.
+
+ // Never check nsan functions, the user called them for a reason.
+ if (Fn->getName().starts_with("__nsan_"))
+ return false;
+
+ const auto ID = Fn->getIntrinsicID();
+ LibFunc LFunc = LibFunc::NumLibFuncs;
+ // Always check args of unknown functions.
+ if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
+ return true;
+
+ // Do not check args of an `fabs` call that is used for a comparison.
+ // This is typically used for `fabs(a-b) < tolerance`, where what matters is
+ // the result of the comparison, which is already caught be the fcmp checks.
+ if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
+ LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
+ for (const auto &U : CI.users())
+ if (isa<CmpInst>(U))
+ return false;
+
+ return true; // Default is check.
+}
+
+// Populates the shadow call stack (which contains shadow values for every
+// floating-point parameter to the function).
+void NumericalStabilitySanitizer::populateShadowStack(
+ CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
+ // Do not create a shadow stack for inline asm.
+ if (CI.isInlineAsm())
+ return;
+
+ // Do not bother if there are no FP args.
+ if (all_of(CI.operands(), [this](const Value *Arg) {
+ return Config.getExtendedFPType(Arg->getType()) == nullptr;
+ }))
+ return;
+
+ IRBuilder<> Builder(&CI);
+ SmallVector<Value *, 8> ArgShadows;
+ const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
+ int ArgId = -1;
+ for (Value *Arg : CI.operands()) {
+ ++ArgId;
+ if (Config.getExtendedFPType(Arg->getType()) == nullptr)
+ continue; // Not an FT value.
+ Value *ArgShadow = Map.getShadow(Arg);
+ ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
+ CheckLoc::makeArg(ArgId))
+ : ArgShadow);
+ }
+
+ // Do not create shadow stacks for intrinsics/known lib funcs.
+ if (Function *Fn = CI.getCalledFunction()) {
+ LibFunc LFunc;
+ if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
+ return;
+ }
+
+ const DataLayout &DL = CI.getModule()->getDataLayout();
+ // Set the shadow stack tag.
+ Builder.CreateStore(getCalleeAddress(CI, Builder), NsanShadowArgsTag);
+ unsigned ShadowArgsOffsetBytes = 0;
+
+ unsigned ShadowArgId = 0;
+ for (const Value *Arg : CI.operands()) {
+ Type *const VT = Arg->getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ continue; // Not an FT value.
+ Builder.CreateAlignedStore(
+ ArgShadows[ShadowArgId++],
+ Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
+ ShadowArgsOffsetBytes),
+ Align(1), /*isVolatile=*/false);
+ TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
+ assert(!SlotSize.isScalable() && "unsupported");
+ ShadowArgsOffsetBytes += SlotSize.getFixedValue();
+ }
+}
+
+// Internal part of emitCheck(). Returns a value that indicates whether
+// computation should continue with the shadow or resume by re-fextending the
+// value.
+enum ContinuationType { // Keep in sync with runtime.
+ kContinueWithShadow = 0,
+ kResumeFromValue = 1,
+};
+Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
+ IRBuilder<> &Builder,
+ CheckLoc Loc) {
+ // Do not emit checks for constant values, this is redundant.
+ if (isa<Constant>(V))
+ return ConstantInt::get(Builder.getInt32Ty(), kContinueWithShadow);
+
+ Type *const Ty = V->getType();
+ if (const auto VT = ftValueTypeFromType(Ty))
+ return Builder.CreateCall(
+ NsanCheckValue[*VT],
+ {V, ShadowV, Loc.getType(*Context), Loc.getValue(IntptrTy, Builder)});
+
+ if (Ty->isVectorTy()) {
+ auto *VecTy = cast<VectorType>(Ty);
+ Value *CheckResult = nullptr;
+ for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
+ // We resume if any element resumes. Another option would be to create a
+ // vector shuffle with the array of ContinueWithShadow, but that is too
+ // complex.
+ Value *ExtractV = Builder.CreateExtractElement(V, I);
+ Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
+ Value *ComponentCheckResult =
+ emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
+ CheckResult = CheckResult
+ ? Builder.CreateOr(CheckResult, ComponentCheckResult)
+ : ComponentCheckResult;
+ }
+ return CheckResult;
+ }
+ if (Ty->isArrayTy()) {
+ Value *CheckResult = nullptr;
+ for (int I = 0, E = Ty->getArrayNumElements(); I < E; ++I) {
+ Value *ExtractV = Builder.CreateExtractElement(V, I);
+ Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
+ Value *ComponentCheckResult =
+ emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
+ CheckResult = CheckResult
+ ? Builder.CreateOr(CheckResult, ComponentCheckResult)
+ : ComponentCheckResult;
+ }
+ return CheckResult;
+ }
+ if (Ty->isStructTy()) {
+ Value *CheckResult = nullptr;
+ for (int I = 0, E = Ty->getStructNumElements(); I < E; ++I) {
+ if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
+ continue; // Only check FT values.
+ Value *ExtractV = Builder.CreateExtractValue(V, I);
+ Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
+ Value *ComponentCheckResult =
+ emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
+ CheckResult = CheckResult
+ ? Builder.CreateOr(CheckResult, ComponentCheckResult)
+ : ComponentCheckResult;
+ }
+ assert(CheckResult && "struct with no FT element");
+ return CheckResult;
+ }
+
+ llvm_unreachable("not implemented");
+}
+
+// Inserts a runtime check of V against its shadow value ShadowV.
+// We check values whenever they escape: on return, call, stores, and
+// insertvalue.
+// Returns the shadow value that should be used to continue the computations,
+// depending on the answer from the runtime.
+// FIXME: Should we check on select ? phi ?
+Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
+ IRBuilder<> &Builder,
+ CheckLoc Loc) {
+ // Do not emit checks for constant values, this is redundant.
+ if (isa<Constant>(V))
+ return ShadowV;
+
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ Function *F = Inst->getFunction();
+ if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
+ return ShadowV;
+ }
+ }
+
+ Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
+ Value *ICmpEQ = Builder.CreateICmpEQ(
+ CheckResult, ConstantInt::get(Builder.getInt32Ty(), kResumeFromValue));
+ return Builder.CreateSelect(
+ ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
+ ShadowV);
+}
+
+static Instruction *getNextInstructionOrDie(Instruction &Inst) {
+ assert(Inst.getNextNode() && "instruction is a terminator");
+ return Inst.getNextNode();
+}
+
+// Inserts a check that fcmp on shadow values are consistent with that on base
+// values.
+void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
+ const ValueToShadowMap &Map) {
+ if (!ClInstrumentFCmp)
+ return;
+
+ Function *F = FCmp.getFunction();
+ if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
+ return;
+ }
+
+ Value *LHS = FCmp.getOperand(0);
+ if (Config.getExtendedFPType(LHS->getType()) == nullptr)
+ return;
+ Value *RHS = FCmp.getOperand(1);
+
+ // Split the basic block. On mismatch, we'll jump to the new basic block with
+ // a call to the runtime for error reporting.
+ BasicBlock *FCmpBB = FCmp.getParent();
+ BasicBlock *NextBB = FCmpBB->splitBasicBlock(getNextInstructionOrDie(FCmp));
+ // Remove the newly created terminator unconditional branch.
+ FCmpBB->back().eraseFromParent();
+ BasicBlock *FailBB =
+ BasicBlock::Create(*Context, "", FCmpBB->getParent(), NextBB);
+
+ // Create the shadow fcmp and comparison between the fcmps.
+ IRBuilder<> FCmpBuilder(FCmpBB);
+ FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
+ Value *ShadowLHS = Map.getShadow(LHS);
+ Value *ShadowRHS = Map.getShadow(RHS);
+ // See comment on ClTruncateFCmpEq.
+ if (FCmp.isEquality() && ClTruncateFCmpEq) {
+ Type *Ty = ShadowLHS->getType();
+ ShadowLHS = FCmpBuilder.CreateFPExt(
+ FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
+ ShadowRHS = FCmpBuilder.CreateFPExt(
+ FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
+ }
+ Value *ShadowFCmp =
+ FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
+ Value *OriginalAndShadowFcmpMatch =
+ FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
+
+ if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
+ // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
+ // where an element is true if the corresponding elements in original and
+ // shadow are the same. We want all elements to be 1.
+ OriginalAndShadowFcmpMatch =
+ FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
+ }
+
+ FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB);
+
+ // Fill in FailBB.
+ IRBuilder<> FailBuilder(FailBB);
+ FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
+
+ const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
+ &FailBuilder](Value *L, Value *R, Value *ShadowL,
+ Value *ShadowR, Value *Result,
+ Value *ShadowResult) {
+ Type *FT = L->getType();
+ FunctionCallee *Callee = nullptr;
+ if (FT->isFloatTy()) {
+ Callee = &(NsanFCmpFail[kFloat]);
+ } else if (FT->isDoubleTy()) {
+ Callee = &(NsanFCmpFail[kDouble]);
+ } else if (FT->isX86_FP80Ty()) {
+ // FIXME: make NsanFCmpFailLongDouble work.
+ Callee = &(NsanFCmpFail[kDouble]);
+ L = FailBuilder.CreateCast(Instruction::FPTrunc, L,
+ Type::getDoubleTy(*Context));
+ R = FailBuilder.CreateCast(Instruction::FPTrunc, L,
+ Type::getDoubleTy(*Context));
+ } else {
+ llvm_unreachable("not implemented");
+ }
+ FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
+ ConstantInt::get(FCmpBuilder.getInt32Ty(),
+ FCmp.getPredicate()),
+ Result, ShadowResult});
+ };
+ if (LHS->getType()->isVectorTy()) {
+ for (int I = 0, E = cast<VectorType>(LHS->getType())
+ ->getElementCount()
+ .getFixedValue();
+ I < E; ++I) {
+ Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
+ Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
+ Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
+ Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
+ Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
+ Value *ExtractShadowFCmp =
+ FailBuilder.CreateExtractElement(ShadowFCmp, I);
+ EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
+ ExtractFCmp, ExtractShadowFCmp);
+ }
+ } else {
+ EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
+ }
+ FailBuilder.CreateBr(NextBB);
+
+ ++NumInstrumentedFCmp;
+}
+
+// Creates a shadow phi value for any phi that defines a value of FT type.
+PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
+ PHINode &Phi, const TargetLibraryInfo &TLI) {
+ Type *const VT = Phi.getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ return nullptr; // Not an FT value.
+ // The phi operands are shadow values and are not available when the phi is
+ // created. They will be populated in a final phase, once all shadow values
+ // have been created.
+ PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
+ Shadow->insertAfter(&Phi);
+ return Shadow;
+}
+
+Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
+ Type *ExtendedVT) {
+ IRBuilder<> Builder(getNextInstructionOrDie(Load));
+ Builder.SetCurrentDebugLocation(Load.getDebugLoc());
+ if (addrPointsToConstantData(Load.getPointerOperand())) {
+ // No need to look into the shadow memory, the value is a constant. Just
+ // convert from FT to 2FT.
+ return Builder.CreateFPExt(&Load, ExtendedVT);
+ }
+
+ // if (%shadowptr == &)
+ // %shadow = fpext %v
+ // else
+ // %shadow = load (ptrcast %shadow_ptr))
+ // Considered options here:
+ // - Have `NsanGetShadowPtrForLoad` return a fixed address
+ // &__nsan_unknown_value_shadow_address that is valid to load from, and
+ // use a select. This has the advantage that the generated IR is simpler.
+ // - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does
+ // not short-circuit, dereferencing the returned pointer is no longer an
+ // option, have to split and create a separate basic block. This has the
+ // advantage of being easier to debug because it crashes if we ever mess
+ // up.
+
+ const auto Extents = getMemoryExtentsOrDie(VT);
+ Value *ShadowPtr = Builder.CreateCall(
+ NsanGetShadowPtrForLoad[Extents.ValueType],
+ {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
+ ++NumInstrumentedFTLoads;
+
+ // Split the basic block.
+ BasicBlock *LoadBB = Load.getParent();
+ BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
+ // Create the two options for creating the shadow value.
+ BasicBlock *ShadowLoadBB =
+ BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB);
+ BasicBlock *FExtBB =
+ BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB);
+
+ // Replace the newly created terminator unconditional branch by a conditional
+ // branch to one of the options.
+ {
+ LoadBB->back().eraseFromParent();
+ IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
+ LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
+ LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
+ ShadowLoadBB);
+ }
+
+ // Fill in ShadowLoadBB.
+ IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
+ ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
+ Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
+ ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
+ if (ClCheckLoads) {
+ ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
+ CheckLoc::makeLoad(Load.getPointerOperand()));
+ }
+ ShadowLoadBBBuilder.CreateBr(NextBB);
+
+ // Fill in FExtBB.
+ IRBuilder<> FExtBBBuilder(FExtBB);
+ FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
+ Value *const FExt =
+ FExtBBBuilder.CreateCast(Instruction::FPExt, &Load, ExtendedVT);
+ FExtBBBuilder.CreateBr(NextBB);
+
+ // The shadow value come from any of the options.
+ IRBuilder<> NextBBBuilder(&*NextBB->begin());
+ NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
+ PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
+ ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
+ ShadowPhi->addIncoming(FExt, FExtBB);
+ return ShadowPhi;
+}
+
+Value *NumericalStabilitySanitizer::handleTrunc(FPTruncInst &Trunc, Type *VT,
+ Type *ExtendedVT,
+ const ValueToShadowMap &Map) {
+ Value *const OrigSource = Trunc.getOperand(0);
+ Type *const OrigSourceTy = OrigSource->getType();
+ Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
+
+ // When truncating:
+ // - (A) If the source has a shadow, we truncate from the shadow, else we
+ // truncate from the original source.
+ // - (B) If the shadow of the source is larger than the shadow of the dest,
+ // we still need a truncate. Else, the shadow of the source is the same
+ // type as the shadow of the dest (because mappings are non-decreasing), so
+ // we don't need to emit a truncate.
+ // Examples,
+ // with a mapping of {f32->f64;f64->f80;f80->f128}
+ // fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double
+ // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
+ // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
+ // fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1)
+ // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80
+ // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
+ // with a mapping of {f32->f64;f64->f128;f80->f128}
+ // fptrunc double %1 to float -> fptrunc fp128 s(%1) to double
+ // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
+ // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
+ // fptrunc x86_fp80 %1 to double -> fp128 %1
+ // fptrunc fp128 %1 to double -> fp128 %1
+ // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
+ // with a mapping of {f32->f32;f64->f32;f80->f64}
+ // fptrunc double %1 to float -> float s(%1)
+ // fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float
+ // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float
+ // fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float
+ // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float
+ // fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double
+
+ // See (A) above.
+ Value *const Source =
+ ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
+ Type *const SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
+ // See (B) above.
+ if (SourceTy == ExtendedVT)
+ return Source;
+
+ Instruction *const Shadow =
+ CastInst::Create(Instruction::FPTrunc, Source, ExtendedVT);
+ Shadow->insertAfter(&Trunc);
+ return Shadow;
+}
+
+Value *NumericalStabilitySanitizer::handleExt(FPExtInst &Ext, Type *VT,
+ Type *ExtendedVT,
+ const ValueToShadowMap &Map) {
+ Value *const OrigSource = Ext.getOperand(0);
+ Type *const OrigSourceTy = OrigSource->getType();
+ Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
+ // When extending:
+ // - (A) If the source has a shadow, we extend from the shadow, else we
+ // extend from the original source.
+ // - (B) If the shadow of the dest is larger than the shadow of the source,
+ // we still need an extend. Else, the shadow of the source is the same
+ // type as the shadow of the dest (because mappings are non-decreasing), so
+ // we don't need to emit an extend.
+ // Examples,
+ // with a mapping of {f32->f64;f64->f80;f80->f128}
+ // fpext half %1 to float -> fpext half %1 to double
+ // fpext half %1 to double -> fpext half %1 to x86_fp80
+ // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
+ // fpext float %1 to double -> double s(%1)
+ // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
+ // fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128
+ // with a mapping of {f32->f64;f64->f128;f80->f128}
+ // fpext half %1 to float -> fpext half %1 to double
+ // fpext half %1 to double -> fpext half %1 to fp128
+ // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
+ // fpext float %1 to double -> fpext double s(%1) to fp128
+ // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
+ // fpext double %1 to x86_fp80 -> fp128 s(%1)
+ // with a mapping of {f32->f32;f64->f32;f80->f64}
+ // fpext half %1 to float -> fpext half %1 to float
+ // fpext half %1 to double -> fpext half %1 to float
+ // fpext half %1 to x86_fp80 -> fpext half %1 to double
+ // fpext float %1 to double -> s(%1)
+ // fpext float %1 to x86_fp80 -> fpext float s(%1) to double
+ // fpext double %1 to x86_fp80 -> fpext float s(%1) to double
+
+ // See (A) above.
+ Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
+ Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
+ // See (B) above.
+ if (SourceTy == ExtendedVT)
+ return Source;
+
+ Instruction *Shadow =
+ CastInst::Create(Instruction::FPExt, Source, ExtendedVT);
+ Shadow->insertAfter(&Ext);
+ return Shadow;
+}
+
+// Returns a value with the address of the callee.
+Value *
+NumericalStabilitySanitizer::getCalleeAddress(CallBase &Call,
+ IRBuilder<> &Builder) const {
+ if (Function *Fn = Call.getCalledFunction()) {
+ // We're calling a statically known function.
+ return Builder.CreatePtrToInt(Fn, IntptrTy);
+ }
+ // We're calling a function through a function pointer.
+ return Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy);
+}
+
+namespace {
+
+// FIXME: This should be tablegen-ed.
+
+struct KnownIntrinsic {
+ struct WidenedIntrinsic {
+ const char *NarrowName;
+ Intrinsic::ID ID; // wide id.
+ using FnTypeFactory = FunctionType *(*)(LLVMContext &);
+ FnTypeFactory MakeFnTy;
+ };
+
+ static const char *get(LibFunc LFunc);
+
+ // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
+ // that applies the same operation on the shadow argument.
+ // Options are:
+ // - pass in the ID and full function type,
+ // - pass in the name, which includes the function type through mangling.
+ static const WidenedIntrinsic *widen(StringRef Name);
+
+private:
+ struct LFEntry {
+ LibFunc LFunc;
+ const char *IntrinsicName;
+ };
+ static const LFEntry kLibfuncIntrinsics[];
+
+ static const WidenedIntrinsic kWidenedIntrinsics[];
+};
+
+FunctionType *Make_Double_Double(LLVMContext &C) {
+ return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
+}
+
+FunctionType *Make_X86FP80_X86FP80(LLVMContext &C) {
+ return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
+ false);
+}
+
+FunctionType *Make_Double_DoubleI32(LLVMContext &C) {
+ return FunctionType::get(Type::getDoubleTy(C),
+ {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
+}
+
+FunctionType *Make_X86FP80_X86FP80I32(LLVMContext &C) {
+ return FunctionType::get(Type::getX86_FP80Ty(C),
+ {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
+ false);
+}
+
+FunctionType *Make_Double_DoubleDouble(LLVMContext &C) {
+ return FunctionType::get(Type::getDoubleTy(C),
+ {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
+}
+
+FunctionType *Make_X86FP80_X86FP80X86FP80(LLVMContext &C) {
+ return FunctionType::get(Type::getX86_FP80Ty(C),
+ {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
+ false);
+}
+
+FunctionType *Make_Double_DoubleDoubleDouble(LLVMContext &C) {
+ return FunctionType::get(
+ Type::getDoubleTy(C),
+ {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
+ false);
+}
+
+FunctionType *Make_X86FP80_X86FP80X86FP80X86FP80(LLVMContext &C) {
+ return FunctionType::get(
+ Type::getX86_FP80Ty(C),
+ {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
+ false);
+}
+
+const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
+ // FIXME: Right now we ignore vector intrinsics.
+ // This is hard because we have to model the semantics of the intrinsics,
+ // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
+ // Intrinsics that take any non-vector FT types:
+ // NOTE: Right now because of https://bugs.llvm.org/show_bug.cgi?id=45399
+ // for f128 we need to use Make_X86FP80_X86FP80 (go to a lower precision and
+ // come back).
+ {"llvm.sqrt.f32", Intrinsic::sqrt, Make_Double_Double},
+ {"llvm.sqrt.f64", Intrinsic::sqrt, Make_X86FP80_X86FP80},
+ {"llvm.sqrt.f80", Intrinsic::sqrt, Make_X86FP80_X86FP80},
+ {"llvm.powi.f32", Intrinsic::powi, Make_Double_DoubleI32},
+ {"llvm.powi.f64", Intrinsic::powi, Make_X86FP80_X86FP80I32},
+ {"llvm.powi.f80", Intrinsic::powi, Make_X86FP80_X86FP80I32},
+ {"llvm.sin.f32", Intrinsic::sin, Make_Double_Double},
+ {"llvm.sin.f64", Intrinsic::sin, Make_X86FP80_X86FP80},
+ {"llvm.sin.f80", Intrinsic::sin, Make_X86FP80_X86FP80},
+ {"llvm.cos.f32", Intrinsic::cos, Make_Double_Double},
+ {"llvm.cos.f64", Intrinsic::cos, Make_X86FP80_X86FP80},
+ {"llvm.cos.f80", Intrinsic::cos, Make_X86FP80_X86FP80},
+ {"llvm.pow.f32", Intrinsic::pow, Make_Double_DoubleDouble},
+ {"llvm.pow.f64", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.pow.f80", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.exp.f32", Intrinsic::exp, Make_Double_Double},
+ {"llvm.exp.f64", Intrinsic::exp, Make_X86FP80_X86FP80},
+ {"llvm.exp.f80", Intrinsic::exp, Make_X86FP80_X86FP80},
+ {"llvm.exp2.f32", Intrinsic::exp2, Make_Double_Double},
+ {"llvm.exp2.f64", Intrinsic::exp2, Make_X86FP80_X86FP80},
+ {"llvm.exp2.f80", Intrinsic::exp2, Make_X86FP80_X86FP80},
+ {"llvm.log.f32", Intrinsic::log, Make_Double_Double},
+ {"llvm.log.f64", Intrinsic::log, Make_X86FP80_X86FP80},
+ {"llvm.log.f80", Intrinsic::log, Make_X86FP80_X86FP80},
+ {"llvm.log10.f32", Intrinsic::log10, Make_Double_Double},
+ {"llvm.log10.f64", Intrinsic::log10, Make_X86FP80_X86FP80},
+ {"llvm.log10.f80", Intrinsic::log10, Make_X86FP80_X86FP80},
+ {"llvm.log2.f32", Intrinsic::log2, Make_Double_Double},
+ {"llvm.log2.f64", Intrinsic::log2, Make_X86FP80_X86FP80},
+ {"llvm.log2.f80", Intrinsic::log2, Make_X86FP80_X86FP80},
+ {"llvm.fma.f32", Intrinsic::fma, Make_Double_DoubleDoubleDouble},
+
+ {"llvm.fmuladd.f32", Intrinsic::fmuladd, Make_Double_DoubleDoubleDouble},
+
+ {"llvm.fma.f64", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80},
+
+ {"llvm.fmuladd.f64", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80},
+
+ {"llvm.fma.f80", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80},
+ {"llvm.fabs.f32", Intrinsic::fabs, Make_Double_Double},
+ {"llvm.fabs.f64", Intrinsic::fabs, Make_X86FP80_X86FP80},
+ {"llvm.fabs.f80", Intrinsic::fabs, Make_X86FP80_X86FP80},
+ {"llvm.minnum.f32", Intrinsic::minnum, Make_Double_DoubleDouble},
+ {"llvm.minnum.f64", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.minnum.f80", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.maxnum.f32", Intrinsic::maxnum, Make_Double_DoubleDouble},
+ {"llvm.maxnum.f64", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.maxnum.f80", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.minimum.f32", Intrinsic::minimum, Make_Double_DoubleDouble},
+ {"llvm.minimum.f64", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.minimum.f80", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.maximum.f32", Intrinsic::maximum, Make_Double_DoubleDouble},
+ {"llvm.maximum.f64", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.maximum.f80", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.copysign.f32", Intrinsic::copysign, Make_Double_DoubleDouble},
+ {"llvm.copysign.f64", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.copysign.f80", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80},
+ {"llvm.floor.f32", Intrinsic::floor, Make_Double_Double},
+ {"llvm.floor.f64", Intrinsic::floor, Make_X86FP80_X86FP80},
+ {"llvm.floor.f80", Intrinsic::floor, Make_X86FP80_X86FP80},
+ {"llvm.ceil.f32", Intrinsic::ceil, Make_Double_Double},
+ {"llvm.ceil.f64", Intrinsic::ceil, Make_X86FP80_X86FP80},
+ {"llvm.ceil.f80", Intrinsic::ceil, Make_X86FP80_X86FP80},
+ {"llvm.trunc.f32", Intrinsic::trunc, Make_Double_Double},
+ {"llvm.trunc.f64", Intrinsic::trunc, Make_X86FP80_X86FP80},
+ {"llvm.trunc.f80", Intrinsic::trunc, Make_X86FP80_X86FP80},
+ {"llvm.rint.f32", Intrinsic::rint, Make_Double_Double},
+ {"llvm.rint.f64", Intrinsic::rint, Make_X86FP80_X86FP80},
+ {"llvm.rint.f80", Intrinsic::rint, Make_X86FP80_X86FP80},
+ {"llvm.nearbyint.f32", Intrinsic::nearbyint, Make_Double_Double},
+ {"llvm.nearbyint.f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80},
+ {"llvm.nearbyin80f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80},
+ {"llvm.round.f32", Intrinsic::round, Make_Double_Double},
+ {"llvm.round.f64", Intrinsic::round, Make_X86FP80_X86FP80},
+ {"llvm.round.f80", Intrinsic::round, Make_X86FP80_X86FP80},
+ {"llvm.lround.f32", Intrinsic::lround, Make_Double_Double},
+ {"llvm.lround.f64", Intrinsic::lround, Make_X86FP80_X86FP80},
+ {"llvm.lround.f80", Intrinsic::lround, Make_X86FP80_X86FP80},
+ {"llvm.llround.f32", Intrinsic::llround, Make_Double_Double},
+ {"llvm.llround.f64", Intrinsic::llround, Make_X86FP80_X86FP80},
+ {"llvm.llround.f80", Intrinsic::llround, Make_X86FP80_X86FP80},
+ {"llvm.lrint.f32", Intrinsic::lrint, Make_Double_Double},
+ {"llvm.lrint.f64", Intrinsic::lrint, Make_X86FP80_X86FP80},
+ {"llvm.lrint.f80", Intrinsic::lrint, Make_X86FP80_X86FP80},
+ {"llvm.llrint.f32", Intrinsic::llrint, Make_Double_Double},
+ {"llvm.llrint.f64", Intrinsic::llrint, Make_X86FP80_X86FP80},
+ {"llvm.llrint.f80", Intrinsic::llrint, Make_X86FP80_X86FP80},
+};
+
+const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
+ {LibFunc_sqrtf, "llvm.sqrt.f32"}, //
+ {LibFunc_sqrt, "llvm.sqrt.f64"}, //
+ {LibFunc_sqrtl, "llvm.sqrt.f80"}, //
+ {LibFunc_sinf, "llvm.sin.f32"}, //
+ {LibFunc_sin, "llvm.sin.f64"}, //
+ {LibFunc_sinl, "llvm.sin.f80"}, //
+ {LibFunc_cosf, "llvm.cos.f32"}, //
+ {LibFunc_cos, "llvm.cos.f64"}, //
+ {LibFunc_cosl, "llvm.cos.f80"}, //
+ {LibFunc_powf, "llvm.pow.f32"}, //
+ {LibFunc_pow, "llvm.pow.f64"}, //
+ {LibFunc_powl, "llvm.pow.f80"}, //
+ {LibFunc_expf, "llvm.exp.f32"}, //
+ {LibFunc_exp, "llvm.exp.f64"}, //
+ {LibFunc_expl, "llvm.exp.f80"}, //
+ {LibFunc_exp2f, "llvm.exp2.f32"}, //
+ {LibFunc_exp2, "llvm.exp2.f64"}, //
+ {LibFunc_exp2l, "llvm.exp2.f80"}, //
+ {LibFunc_logf, "llvm.log.f32"}, //
+ {LibFunc_log, "llvm.log.f64"}, //
+ {LibFunc_logl, "llvm.log.f80"}, //
+ {LibFunc_log10f, "llvm.log10.f32"}, //
+ {LibFunc_log10, "llvm.log10.f64"}, //
+ {LibFunc_log10l, "llvm.log10.f80"}, //
+ {LibFunc_log2f, "llvm.log2.f32"}, //
+ {LibFunc_log2, "llvm.log2.f64"}, //
+ {LibFunc_log2l, "llvm.log2.f80"}, //
+ {LibFunc_fabsf, "llvm.fabs.f32"}, //
+ {LibFunc_fabs, "llvm.fabs.f64"}, //
+ {LibFunc_fabsl, "llvm.fabs.f80"}, //
+ {LibFunc_copysignf, "llvm.copysign.f32"}, //
+ {LibFunc_copysign, "llvm.copysign.f64"}, //
+ {LibFunc_copysignl, "llvm.copysign.f80"}, //
+ {LibFunc_floorf, "llvm.floor.f32"}, //
+ {LibFunc_floor, "llvm.floor.f64"}, //
+ {LibFunc_floorl, "llvm.floor.f80"}, //
+ {LibFunc_fmaxf, "llvm.maxnum.f32"}, //
+ {LibFunc_fmax, "llvm.maxnum.f64"}, //
+ {LibFunc_fmaxl, "llvm.maxnum.f80"}, //
+ {LibFunc_fminf, "llvm.minnum.f32"}, //
+ {LibFunc_fmin, "llvm.minnum.f64"}, //
+ {LibFunc_fminl, "llvm.minnum.f80"}, //
+ {LibFunc_ceilf, "llvm.ceil.f32"}, //
+ {LibFunc_ceil, "llvm.ceil.f64"}, //
+ {LibFunc_ceill, "llvm.ceil.f80"}, //
+ {LibFunc_truncf, "llvm.trunc.f32"}, //
+ {LibFunc_trunc, "llvm.trunc.f64"}, //
+ {LibFunc_truncl, "llvm.trunc.f80"}, //
+ {LibFunc_rintf, "llvm.rint.f32"}, //
+ {LibFunc_rint, "llvm.rint.f64"}, //
+ {LibFunc_rintl, "llvm.rint.f80"}, //
+ {LibFunc_nearbyintf, "llvm.nearbyint.f32"}, //
+ {LibFunc_nearbyint, "llvm.nearbyint.f64"}, //
+ {LibFunc_nearbyintl, "llvm.nearbyint.f80"}, //
+ {LibFunc_roundf, "llvm.round.f32"}, //
+ {LibFunc_round, "llvm.round.f64"}, //
+ {LibFunc_roundl, "llvm.round.f80"}, //
+};
+
+const char *KnownIntrinsic::get(LibFunc LFunc) {
+ for (const auto &E : kLibfuncIntrinsics) {
+ if (E.LFunc == LFunc)
+ return E.IntrinsicName;
+ }
+ return nullptr;
+}
+
+const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
+ for (const auto &E : kWidenedIntrinsics) {
+ if (E.NarrowName == Name)
+ return &E;
+ }
+ return nullptr;
+}
+
+} // namespace
+
+// Returns the name of the LLVM intrinsic corresponding to the given function.
+static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
+ const TargetLibraryInfo &TLI) {
+ LibFunc LFunc;
+ if (!TLI.getLibFunc(Fn, LFunc))
+ return nullptr;
+
+ if (const char *Name = KnownIntrinsic::get(LFunc))
+ return Name;
+
+ errs() << "FIXME: LibFunc: " << TLI.getName(LFunc) << "\n";
+ return nullptr;
+}
+
+// Try to handle a known function call.
+Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
+ CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map, IRBuilder<> &Builder) {
+ Function *const Fn = Call.getCalledFunction();
+ if (Fn == nullptr)
+ return nullptr;
+
+ Intrinsic::ID WidenedId = Intrinsic::ID();
+ FunctionType *WidenedFnTy = nullptr;
+ if (const auto ID = Fn->getIntrinsicID()) {
+ const auto *const Widened = KnownIntrinsic::widen(Fn->getName());
+ if (Widened) {
+ WidenedId = Widened->ID;
+ WidenedFnTy = Widened->MakeFnTy(*Context);
+ } else {
+ // If we don't know how to widen the intrinsic, we have no choice but to
+ // call the non-wide version on a truncated shadow and extend again
+ // afterwards.
+ WidenedId = ID;
+ WidenedFnTy = Fn->getFunctionType();
+ }
+ } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
+ // We might have a call to a library function that we can replace with a
+ // wider Intrinsic.
+ const auto *Widened = KnownIntrinsic::widen(Name);
+ assert(Widened && "make sure KnownIntrinsic entries are consistent");
+ WidenedId = Widened->ID;
+ WidenedFnTy = Widened->MakeFnTy(*Context);
+ } else {
+ // This is not a known library function or intrinsic.
+ return nullptr;
+ }
+
+ // Check that the widened intrinsic is valid.
+ SmallVector<Intrinsic::IITDescriptor, 8> Table;
+ getIntrinsicInfoTableEntries(WidenedId, Table);
+ SmallVector<Type *, 4> ArgTys;
+ ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+ const Intrinsic::MatchIntrinsicTypesResult Res =
+ Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
+ assert(Res == Intrinsic::MatchIntrinsicTypes_Match &&
+ "invalid widened intrinsic");
+ (void)Res;
+
+ // For known intrinsic functions, we create a second call to the same
+ // intrinsic with a different type.
+ SmallVector<Value *, 4> Args;
+ // The last operand is the intrinsic itself, skip it.
+ for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
+ Value *Arg = Call.getOperand(I);
+ Type *const OrigArgTy = Arg->getType();
+ Type *const IntrinsicArgTy = WidenedFnTy->getParamType(I);
+ if (OrigArgTy == IntrinsicArgTy) {
+ Args.push_back(Arg); // The arg is passed as is.
+ continue;
+ }
+ Type *const ShadowArgTy = Config.getExtendedFPType(Arg->getType());
+ assert(ShadowArgTy &&
+ "don't know how to get the shadow value for a non-FT");
+ Value *Shadow = Map.getShadow(Arg);
+ if (ShadowArgTy == IntrinsicArgTy) {
+ // The shadow is the right type for the intrinsic.
+ assert(Shadow->getType() == ShadowArgTy);
+ Args.push_back(Shadow);
+ continue;
+ }
+ // There is no intrinsic with his level of precision, truncate the shadow.
+ Args.push_back(
+ Builder.CreateCast(Instruction::FPTrunc, Shadow, IntrinsicArgTy));
+ }
+ Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
+ return WidenedFnTy->getReturnType() == ExtendedVT
+ ? IntrinsicCall
+ : Builder.CreateCast(Instruction::FPExt, IntrinsicCall,
+ ExtendedVT);
+}
+
+// Handle a CallBase, i.e. a function call, an inline asm sequence, or an
+// invoke.
+Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
+ Type *ExtendedVT,
+ const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map,
+ IRBuilder<> &Builder) {
+ // We cannot look inside inline asm, just expand the result again.
+ if (Call.isInlineAsm()) {
+ return Builder.CreateFPExt(&Call, ExtendedVT);
+ }
+
+ // Intrinsics and library functions (e.g. sin, exp) are handled
+ // specifically, because we know their semantics and can do better than
+ // blindly calling them (e.g. compute the sinus in the actual shadow domain).
+ if (Value *V =
+ maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
+ return V;
+
+ // If the return tag matches that of the called function, read the extended
+ // return value from the shadow ret ptr. Else, just extend the return value.
+ Value *L =
+ Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
+ Value *HasShadowRet =
+ Builder.CreateICmpEQ(L, getCalleeAddress(Call, Builder));
+
+ Value *ShadowRetVal = Builder.CreateLoad(
+ ExtendedVT,
+ Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
+ /*isVolatile=*/false);
+ Value *Shadow = Builder.CreateSelect(
+ HasShadowRet, ShadowRetVal,
+ Builder.CreateCast(Instruction::FPExt, &Call, ExtendedVT));
+ ++NumInstrumentedFTCalls;
+ return Shadow;
+ // Note that we do not need to set NsanShadowRetTag to zero as we know that
+ // either the function is not instrumented and it will never set
+ // NsanShadowRetTag; or it is and it will always do so.
+}
+
+// Creates a shadow value for the given FT value. At that point all operands are
+// guaranteed to be available.
+Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
+ Instruction &Inst, const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map) {
+ Type *const VT = Inst.getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
+
+ if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) {
+ return handleLoad(*Load, VT, ExtendedVT);
+ }
+ if (CallInst *Call = dyn_cast<CallInst>(&Inst)) {
+ // Insert after the call.
+ BasicBlock::iterator It(Inst);
+ IRBuilder<> Builder(Call->getParent(), ++It);
+ Builder.SetCurrentDebugLocation(Call->getDebugLoc());
+ return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
+ }
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(&Inst)) {
+ // The Invoke terminates the basic block, create a new basic block in
+ // between the successful invoke and the next block.
+ BasicBlock *InvokeBB = Invoke->getParent();
+ BasicBlock *NextBB = Invoke->getNormalDest();
+ BasicBlock *NewBB =
+ BasicBlock::Create(*Context, "", NextBB->getParent(), NextBB);
+ Inst.replaceSuccessorWith(NextBB, NewBB);
+
+ IRBuilder<> Builder(NewBB);
+ Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
+ Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
+ Builder.CreateBr(NextBB);
+ NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
+ return Shadow;
+ }
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*BinOp));
+ Builder.SetCurrentDebugLocation(BinOp->getDebugLoc());
+ return Builder.CreateBinOp(BinOp->getOpcode(),
+ Map.getShadow(BinOp->getOperand(0)),
+ Map.getShadow(BinOp->getOperand(1)));
+ }
+ if (UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*UnaryOp));
+ Builder.SetCurrentDebugLocation(UnaryOp->getDebugLoc());
+ return Builder.CreateUnOp(UnaryOp->getOpcode(),
+ Map.getShadow(UnaryOp->getOperand(0)));
+ }
+ if (FPTruncInst *Trunc = dyn_cast<FPTruncInst>(&Inst)) {
+ return handleTrunc(*Trunc, VT, ExtendedVT, Map);
+ }
+ if (FPExtInst *Ext = dyn_cast<FPExtInst>(&Inst)) {
+ return handleExt(*Ext, VT, ExtendedVT, Map);
+ }
+ if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
+ CastInst *Cast = dyn_cast<CastInst>(&Inst);
+ IRBuilder<> Builder(getNextInstructionOrDie(*Cast));
+ Builder.SetCurrentDebugLocation(Cast->getDebugLoc());
+ return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
+ ExtendedVT);
+ }
+
+ if (SelectInst *S = dyn_cast<SelectInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*S));
+ Builder.SetCurrentDebugLocation(S->getDebugLoc());
+ return Builder.CreateSelect(S->getCondition(),
+ Map.getShadow(S->getTrueValue()),
+ Map.getShadow(S->getFalseValue()));
+ }
+
+ if (ExtractElementInst *Extract = dyn_cast<ExtractElementInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*Extract));
+ Builder.SetCurrentDebugLocation(Extract->getDebugLoc());
+ return Builder.CreateExtractElement(
+ Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
+ }
+
+ if (InsertElementInst *Insert = dyn_cast<InsertElementInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*Insert));
+ Builder.SetCurrentDebugLocation(Insert->getDebugLoc());
+ return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
+ Map.getShadow(Insert->getOperand(1)),
+ Insert->getOperand(2));
+ }
+
+ if (ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*Shuffle));
+ Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
+ return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
+ Map.getShadow(Shuffle->getOperand(1)),
+ Shuffle->getShuffleMask());
+ }
+
+ if (ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*Extract));
+ Builder.SetCurrentDebugLocation(Extract->getDebugLoc());
+ // FIXME: We could make aggregate object first class citizens. For now we
+ // just extend the extracted value.
+ return Builder.CreateFPExt(Extract, ExtendedVT);
+ }
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Inst)) {
+ IRBuilder<> Builder(getNextInstructionOrDie(*BC));
+ Builder.SetCurrentDebugLocation(BC->getDebugLoc());
+ return Builder.CreateCast(Instruction::FPExt, BC, ExtendedVT);
+ }
+
+ errs() << "FIXME: implement " << Inst.getOpcodeName() << "\n";
+ llvm_unreachable("not implemented");
+}
+
+// Creates a shadow value for an instruction that defines a value of FT type.
+// FT operands that do not already have shadow values are created recursively.
+// The DFS is guaranteed to not loop as phis and arguments already have
+// shadows.
+void NumericalStabilitySanitizer::maybeCreateShadowValue(
+ Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
+ Type *const VT = Root.getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ return; // Not an FT value.
+
+ if (Map.hasShadow(&Root))
+ return; // Shadow already exists.
+
+ assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
+
+ std::vector<Instruction *> DfsStack(1, &Root);
+ while (!DfsStack.empty()) {
+ // Ensure that all operands to the instruction have shadows before
+ // proceeding.
+ Instruction *I = DfsStack.back();
+ // The shadow for the instruction might have been created deeper in the DFS,
+ // see `forward_use_with_two_uses` test.
+ if (Map.hasShadow(I)) {
+ DfsStack.pop_back();
+ continue;
+ }
+
+ bool MissingShadow = false;
+ for (Value *Op : I->operands()) {
+ Type *const VT = Op->getType();
+ if (!Config.getExtendedFPType(VT))
+ continue; // Not an FT value.
+ if (Map.hasShadow(Op))
+ continue; // Shadow is already available.
+ MissingShadow = true;
+ DfsStack.push_back(cast<Instruction>(Op));
+ }
+ if (MissingShadow)
+ continue; // Process operands and come back to this instruction later.
+
+ // All operands have shadows. Create a shadow for the current value.
+ Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
+ Map.setShadow(I, Shadow);
+ DfsStack.pop_back();
+ }
+}
+
+// A floating-point store needs its value and type written to shadow memory.
+void NumericalStabilitySanitizer::propagateFTStore(
+ StoreInst &Store, Type *const VT, Type *const ExtendedVT,
+ const ValueToShadowMap &Map) {
+ Value *StoredValue = Store.getValueOperand();
+ IRBuilder<> Builder(&Store);
+ Builder.SetCurrentDebugLocation(Store.getDebugLoc());
+ const auto Extents = getMemoryExtentsOrDie(VT);
+ Value *ShadowPtr = Builder.CreateCall(
+ NsanGetShadowPtrForStore[Extents.ValueType],
+ {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
+
+ Value *StoredShadow = Map.getShadow(StoredValue);
+ if (!Store.getParent()->getParent()->hasOptNone()) {
+ // Only check stores when optimizing, because non-optimized code generates
+ // too many stores to the stack, creating false positives.
+ if (ClCheckStores) {
+ StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
+ CheckLoc::makeStore(Store.getPointerOperand()));
+ ++NumInstrumentedFTStores;
+ }
+ }
+
+ Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
+ Store.isVolatile());
+}
+
+// A non-ft store needs to invalidate shadow memory. Exceptions are:
+// - memory transfers of floating-point data through other pointer types (llvm
+// optimization passes transform `*(float*)a = *(float*)b` into
+// `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
+// - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
+// ints. Note that this is not really necessary because if the value is
+// unknown the framework will re-extend it on load anyway. It just felt
+// easier to debug tests with vectors of FTs.
+void NumericalStabilitySanitizer::propagateNonFTStore(
+ StoreInst &Store, Type *const VT, const ValueToShadowMap &Map) {
+ Value *PtrOp = Store.getPointerOperand();
+ IRBuilder<> Builder(getNextInstructionOrDie(Store));
+ Builder.SetCurrentDebugLocation(Store.getDebugLoc());
+ Value *Dst = PtrOp;
+ const DataLayout &DL =
+ Store.getParent()->getParent()->getParent()->getDataLayout();
+ TypeSize SlotSize = DL.getTypeStoreSize(VT);
+ assert(!SlotSize.isScalable() && "unsupported");
+ const auto LoadSizeBytes = SlotSize.getFixedValue();
+ Value *ValueSize = Builder.Insert(Constant::getIntegerValue(
+ IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes)));
+
+ ++NumInstrumentedNonFTStores;
+ Value *StoredValue = Store.getValueOperand();
+ if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
+ // FIXME: Handle the case when the value is from a phi.
+ // This is a memory transfer with memcpy semantics. Copy the type and
+ // value from the source. Note that we cannot use __nsan_copy_values()
+ // here, because that will not work when there is a write to memory in
+ // between the load and the store, e.g. in the case of a swap.
+ Type *ShadowTypeIntTy = Type::getIntNTy(*Context, 8 * LoadSizeBytes);
+ Type *ShadowValueIntTy =
+ Type::getIntNTy(*Context, 8 * kShadowScale * LoadSizeBytes);
+ IRBuilder<> LoadBuilder(getNextInstructionOrDie(*Load));
+ Builder.SetCurrentDebugLocation(Store.getDebugLoc());
+ Value *LoadSrc = Load->getPointerOperand();
+ // Read the shadow type and value at load time. The type has the same size
+ // as the FT value, the value has twice its size.
+ // FIXME: cache them to avoid re-creating them when a load is used by
+ // several stores. Maybe create them like the FT shadows when a load is
+ // encountered.
+ Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
+ ShadowTypeIntTy,
+ LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
+ /*isVolatile=*/false);
+ Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
+ ShadowValueIntTy,
+ LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
+ /*isVolatile=*/false);
+
+ // Write back the shadow type and value at store time.
+ Builder.CreateAlignedStore(
+ RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
+ Align(1),
+ /*isVolatile=*/false);
+ Builder.CreateAlignedStore(RawShadowValue,
+ Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
+ Align(1),
+ /*isVolatile=*/false);
+
+ ++NumInstrumentedNonFTMemcpyStores;
+ return;
+ }
+ if (Constant *C = dyn_cast<Constant>(StoredValue)) {
+ // This might be a fp constant stored as an int. Bitcast and store if it has
+ // appropriate size.
+ Type *BitcastTy = nullptr; // The FT type to bitcast to.
+ if (ConstantInt *CInt = dyn_cast<ConstantInt>(C)) {
+ switch (CInt->getType()->getScalarSizeInBits()) {
+ case 32:
+ BitcastTy = Type::getFloatTy(*Context);
+ break;
+ case 64:
+ BitcastTy = Type::getDoubleTy(*Context);
+ break;
+ case 80:
+ BitcastTy = Type::getX86_FP80Ty(*Context);
+ break;
+ default:
+ break;
+ }
+ } else if (ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C)) {
+ const int NumElements =
+ cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
+ switch (CDV->getType()->getScalarSizeInBits()) {
+ case 32:
+ BitcastTy =
+ VectorType::get(Type::getFloatTy(*Context), NumElements, false);
+ break;
+ case 64:
+ BitcastTy =
+ VectorType::get(Type::getDoubleTy(*Context), NumElements, false);
+ break;
+ case 80:
+ BitcastTy =
+ VectorType::get(Type::getX86_FP80Ty(*Context), NumElements, false);
+ break;
+ default:
+ break;
+ }
+ }
+ if (BitcastTy) {
+ const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
+ Value *ShadowPtr = Builder.CreateCall(
+ NsanGetShadowPtrForStore[Extents.ValueType],
+ {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
+ // Bitcast the integer value to the appropriate FT type and extend to 2FT.
+ Type *ExtVT = Config.getExtendedFPType(BitcastTy);
+ Value *Shadow = Builder.CreateCast(
+ Instruction::FPExt, Builder.CreateBitCast(C, BitcastTy), ExtVT);
+ Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
+ Store.isVolatile());
+ return;
+ }
+ }
+ // All other stores just reset the shadow value to unknown.
+ Builder.CreateCall(NsanSetValueUnknown, {Dst, ValueSize});
+}
+
+void NumericalStabilitySanitizer::propagateShadowValues(
+ Instruction &Inst, const TargetLibraryInfo &TLI,
+ const ValueToShadowMap &Map) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(&Inst)) {
+ Value *StoredValue = Store->getValueOperand();
+ Type *const VT = StoredValue->getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ return propagateNonFTStore(*Store, VT, Map);
+ return propagateFTStore(*Store, VT, ExtendedVT, Map);
+ }
+
+ if (FCmpInst *FCmp = dyn_cast<FCmpInst>(&Inst)) {
+ emitFCmpCheck(*FCmp, Map);
+ return;
+ }
+
+ if (CallBase *CB = dyn_cast<CallBase>(&Inst)) {
+ maybeAddSuffixForNsanInterface(CB);
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+ maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
+ instrumentMemIntrinsic(MI);
+ return;
+ }
+ populateShadowStack(*CB, TLI, Map);
+ return;
+ }
+
+ if (ReturnInst *RetInst = dyn_cast<ReturnInst>(&Inst)) {
+ if (!ClCheckRet)
+ return;
+
+ Value *RV = RetInst->getReturnValue();
+ if (RV == nullptr)
+ return; // This is a `ret void`.
+ Type *const VT = RV->getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ return; // Not an FT ret.
+ Value *RVShadow = Map.getShadow(RV);
+ IRBuilder<> Builder(&Inst);
+ Builder.SetCurrentDebugLocation(RetInst->getDebugLoc());
+
+ RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
+ ++NumInstrumentedFTRets;
+ // Store tag.
+ Value *FnAddr =
+ Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
+ Builder.CreateStore(FnAddr, NsanShadowRetTag);
+ // Store value.
+ Value *ShadowRetValPtr =
+ Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
+ Builder.CreateStore(RVShadow, ShadowRetValPtr);
+ return;
+ }
+
+ if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
+ Value *V = Insert->getOperand(1);
+ Type *const VT = V->getType();
+ Type *const ExtendedVT = Config.getExtendedFPType(VT);
+ if (ExtendedVT == nullptr)
+ return;
+ IRBuilder<> Builder(Insert);
+ Builder.SetCurrentDebugLocation(Insert->getDebugLoc());
+ emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
+ return;
+ }
+}
+
+// Moves fast math flags from the function to individual instructions, and
+// removes the attribute from the function.
+// FIXME: Make this controllable with a flag.
+static void moveFastMathFlags(Function &F,
+ std::vector<Instruction *> &Instructions) {
+ FastMathFlags FMF;
+#define MOVE_FLAG(attr, setter) \
+ if (F.getFnAttribute(attr).getValueAsString() == "true") { \
+ F.removeFnAttr(attr); \
+ FMF.set##setter(); \
+ }
+ MOVE_FLAG("unsafe-fp-math", Fast)
+ MOVE_FLAG("no-infs-fp-math", NoInfs)
+ MOVE_FLAG("no-nans-fp-math", NoNaNs)
+ MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
+#undef MOVE_FLAG
+
+ for (Instruction *I : Instructions)
+ if (isa<FPMathOperator>(I))
+ I->setFastMathFlags(FMF);
+}
+
+bool NumericalStabilitySanitizer::sanitizeFunction(
+ Function &F, const TargetLibraryInfo &TLI) {
+ if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability))
+ return false;
+
+ // This is required to prevent instrumenting call to __nsan_init from within
+ // the module constructor.
+ if (F.getName() == kNsanModuleCtorName)
+ return false;
+ if (!Config.initialize(&F.getParent()->getContext()))
+ return false;
+ initialize(*F.getParent());
+ SmallVector<Instruction *, 8> AllLoadsAndStores;
+ SmallVector<Instruction *, 8> LocalLoadsAndStores;
+
+ // The instrumentation maintains:
+ // - for each IR value `v` of floating-point (or vector floating-point) type
+ // FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
+ // double for float and f128 for double).
+ // - A shadow memory, which stores `s(v)` for any `v` that has been stored,
+ // along with a shadow memory tag, which stores whether the value in the
+ // corresponding shadow memory is valid. Note that this might be
+ // incorrect if a non-instrumented function stores to memory, or if
+ // memory is stored to through a char pointer.
+ // - A shadow stack, which holds `s(v)` for any floating-point argument `v`
+ // of a call to an instrumented function. This allows
+ // instrumented functions to retrieve the shadow values for their
+ // arguments.
+ // Because instrumented functions can be called from non-instrumented
+ // functions, the stack needs to include a tag so that the instrumented
+ // function knows whether shadow values are available for their
+ // parameters (i.e. whether is was called by an instrumented function).
+ // When shadow arguments are not available, they have to be recreated by
+ // extending the precision of the non-shadow arguments to the non-shadow
+ // value. Non-instrumented functions do not modify (or even know about) the
+ // shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
+ // stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
+ // for the function (we use the address of the function). Both variables
+ // are thread local.
+ // Example:
+ // calls shadow stack tag shadow stack
+ // =======================================================================
+ // non_instrumented_1() 0 0
+ // |
+ // v
+ // instrumented_2(float a) 0 0
+ // |
+ // v
+ // instrumented_3(float b, double c) &instrumented_3 s(b),s(c)
+ // |
+ // v
+ // instrumented_4(float d) &instrumented_4 s(d)
+ // |
+ // v
+ // non_instrumented_5(float e) &non_instrumented_5 s(e)
+ // |
+ // v
+ // instrumented_6(float f) &non_instrumented_5 s(e)
+ //
+ // On entry, instrumented_2 checks whether the tag corresponds to its
+ // function ptr.
+ // Note that functions reset the tag to 0 after reading shadow parameters.
+ // This ensures that the function does not erroneously read invalid data if
+ // called twice in the same stack, once from an instrumented function and
+ // once from an uninstrumented one. For example, in the following example,
+ // resetting the tag in (A) ensures that (B) does not reuse the same the
+ // shadow arguments (which would be incorrect).
+ // instrumented_1(float a)
+ // |
+ // v
+ // instrumented_2(float b) (A)
+ // |
+ // v
+ // non_instrumented_3()
+ // |
+ // v
+ // instrumented_2(float b) (B)
+ //
+ // - A shadow return slot. Any function that returns a floating-point value
+ // places a shadow return value in __nsan_shadow_ret_val. Again, because
+ // we might be calling non-instrumented functions, this value is guarded
+ // by __nsan_shadow_ret_tag marker indicating which instrumented function
+ // placed the value in __nsan_shadow_ret_val, so that the caller can check
+ // that this corresponds to the callee. Both variables are thread local.
+ //
+ // For example, in the following example, the instrumentation in
+ // `instrumented_1` rejects the shadow return value from `instrumented_3`
+ // because is is not tagged as expected (`&instrumented_3` instead of
+ // `non_instrumented_2`):
+ //
+ // instrumented_1()
+ // |
+ // v
+ // float non_instrumented_2()
+ // |
+ // v
+ // float instrumented_3()
+ //
+ // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
+ // their overload on the shadow type.
+
+ // Collect all instructions before processing, as creating shadow values
+ // creates new instructions inside the function.
+ std::vector<Instruction *> OriginalInstructions;
+ for (auto &BB : F) {
+ for (auto &Inst : BB) {
+ OriginalInstructions.emplace_back(&Inst);
+ }
+ }
+
+ moveFastMathFlags(F, OriginalInstructions);
+ ValueToShadowMap ValueToShadow(&Config);
+
+ // In the first pass, we create shadow values for all FT function arguments
+ // and all phis. This ensures that the DFS of the next pass does not have
+ // any loops.
+ std::vector<PHINode *> OriginalPhis;
+ createShadowArguments(F, TLI, ValueToShadow);
+ for (Instruction *I : OriginalInstructions) {
+ if (PHINode *Phi = dyn_cast<PHINode>(I)) {
+ if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
+ OriginalPhis.push_back(Phi);
+ ValueToShadow.setShadow(Phi, Shadow);
+ }
+ }
+ }
+
+ // Create shadow values for all instructions creating FT values.
+ for (Instruction *I : OriginalInstructions) {
+ maybeCreateShadowValue(*I, TLI, ValueToShadow);
+ }
+
+ // Propagate shadow values across stores, calls and rets.
+ for (Instruction *I : OriginalInstructions) {
+ propagateShadowValues(*I, TLI, ValueToShadow);
+ }
+
+ // The last pass populates shadow phis with shadow values.
+ for (PHINode *Phi : OriginalPhis) {
+ PHINode *ShadowPhi = dyn_cast<PHINode>(ValueToShadow.getShadow(Phi));
+ for (int I = 0, E = Phi->getNumOperands(); I < E; ++I) {
+ Value *V = Phi->getOperand(I);
+ Value *Shadow = ValueToShadow.getShadow(V);
+ BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
+ // For some instructions (e.g. invoke), we create the shadow in a separate
+ // block, different from the block where the original value is created.
+ // In that case, the shadow phi might need to refer to this block instead
+ // of the original block.
+ // Note that this can only happen for instructions as constant shadows are
+ // always created in the same block.
+ ShadowPhi->addIncoming(Shadow, IncomingBB);
+ }
+ }
+
+ return !ValueToShadow.empty();
+}
+
+// Instrument the memory intrinsics so that they properly modify the shadow
+// memory.
+bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> Builder(MI);
+ if (MemSetInst *M = dyn_cast<MemSetInst>(MI)) {
+ Builder.SetCurrentDebugLocation(M->getDebugLoc());
+ Builder.CreateCall(
+ NsanSetValueUnknown,
+ {// Address
+ M->getArgOperand(0),
+ // Size
+ Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+ } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(MI)) {
+ Builder.SetCurrentDebugLocation(M->getDebugLoc());
+ Builder.CreateCall(
+ NsanCopyValues,
+ {// Destination
+ M->getArgOperand(0),
+ // Source
+ M->getArgOperand(1),
+ // Size
+ Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+ }
+ return false;
+}
+
+void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
+ Function *Fn = CI->getCalledFunction();
+ if (Fn == nullptr)
+ return;
+
+ if (!Fn->getName().starts_with("__nsan_"))
+ return;
+
+ if (Fn->getName() == "__nsan_dump_shadow_mem") {
+ assert(CI->arg_size() == 4 &&
+ "invalid prototype for __nsan_dump_shadow_mem");
+ // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
+ // configuration:
+ // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
+ // | shadow_type_id_for_double
+ const uint64_t shadow_value_type_ids =
+ (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
+ << 16) |
+ (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
+ << 8) |
+ static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
+ CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));
+ }
+}
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 6988292ac7156..d0b510f1eebef 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -954,6 +954,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::ShadowCallStack:
case Attribute::SanitizeAddress:
case Attribute::SanitizeMemory:
+ case Attribute::SanitizeNumericalStability:
case Attribute::SanitizeThread:
case Attribute::SanitizeHWAddress:
case Attribute::SanitizeMemTag:
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
new file mode 100644
index 0000000000000..a2c0695b86f1b
--- /dev/null
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
@@ -0,0 +1,931 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -nsan-truncate-fcmp-eq=false -S %s | FileCheck %s --check-prefixes=CHECK,DQQ
+; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dlq -nsan-truncate-fcmp-eq=false -S %s | FileCheck %s --check-prefixes=CHECK,DLQ
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests with simple control flow.
+
+ at float_const = private unnamed_addr constant float 0.5
+ at x86_fp80_const = private unnamed_addr constant x86_fp80 0xK3FC9E69594BEC44DE000
+ at double_const = private unnamed_addr constant double 0.5
+
+
+define float @return_param_float(float %a) sanitize_numericalstability {
+; CHECK-LABEL: @return_param_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @return_param_float to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[A]], double [[TMP4]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[A]] to double
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @return_param_float to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP8]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[A]]
+;
+entry:
+ ret float %a
+}
+
+; Note that the shadow fadd should not have a `fast` flag.
+define float @param_add_return_float(float %a) sanitize_numericalstability {
+; CHECK-LABEL: @param_add_return_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_float to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[B]]
+;
+entry:
+ %b = fadd fast float %a, 1.0
+ ret float %b
+}
+
+define x86_fp80 @param_add_return_x86_fp80(x86_fp80 %a) sanitize_numericalstability {
+; CHECK-LABEL: @param_add_return_x86_fp80(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_x86_fp80 to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[B:%.*]] = fadd x86_fp80 [[A]], 0xK3FC9E69594BEC44DE000
+; CHECK-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xLC0000000000000003FC9CD2B297D889B
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[B]], fp128 [[TMP5]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = fpext x86_fp80 [[B]] to fp128
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_x86_fp80 to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store fp128 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16
+; CHECK-NEXT: ret x86_fp80 [[B]]
+;
+entry:
+ %b = fadd x86_fp80 %a, 0xK3FC9E69594BEC44DE000
+ ret x86_fp80 %b
+}
+
+define double @param_add_return_double(double %a) sanitize_numericalstability {
+; DQQ-LABEL: @param_add_return_double(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_double to i64)
+; DQQ-NEXT: [[TMP2:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1
+; DQQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to fp128
+; DQQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]]
+; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00
+; DQQ-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xL00000000000000003FFF000000000000
+; DQQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_q(double [[B]], fp128 [[TMP5]], i32 1, i64 0)
+; DQQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+; DQQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to fp128
+; DQQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]]
+; DQQ-NEXT: store i64 ptrtoint (ptr @param_add_return_double to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DQQ-NEXT: store fp128 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16
+; DQQ-NEXT: ret double [[B]]
+;
+; DLQ-LABEL: @param_add_return_double(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_double to i64)
+; DLQ-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1
+; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to x86_fp80
+; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], x86_fp80 [[TMP2]], x86_fp80 [[TMP3]]
+; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00
+; DLQ-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP4]], 0xK3FFF8000000000000000
+; DLQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_l(double [[B]], x86_fp80 [[TMP5]], i32 1, i64 0)
+; DLQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+; DLQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to x86_fp80
+; DLQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], x86_fp80 [[TMP8]], x86_fp80 [[TMP5]]
+; DLQ-NEXT: store i64 ptrtoint (ptr @param_add_return_double to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DLQ-NEXT: store x86_fp80 [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 16
+; DLQ-NEXT: ret double [[B]]
+;
+entry:
+ %b = fadd double %a, 1.0
+ ret double %b
+}
+
+define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitize_numericalstability {
+; CHECK-LABEL: @return_param_add_return_float_vector(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @return_param_add_return_float_vector to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double>
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[B]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP5]], i64 1
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP9]], double [[TMP10]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP8]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[B]] to <2 x double>
+; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP5]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @return_param_add_return_float_vector to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store <2 x double> [[TMP15]], ptr @__nsan_shadow_ret_ptr, align 16
+; CHECK-NEXT: ret <2 x float> [[B]]
+;
+entry:
+ %b = fadd <2 x float> %a, <float 1.0, float 1.0>
+ ret <2 x float> %b
+}
+
+; TODO: This is ignored for now.
+define [2 x float] @return_param_float_array([2 x float] %a) sanitize_numericalstability {
+; CHECK-LABEL: @return_param_float_array(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret [2 x float] [[A:%.*]]
+;
+entry:
+ ret [2 x float] %a
+}
+
+define void @constantload_add_store_float(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @constantload_add_store_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load float, ptr @float_const, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP1]], i32 4, i64 [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP6]], double [[TMP1]]
+; CHECK-NEXT: store double [[TMP7]], ptr [[TMP2]], align 1
+; CHECK-NEXT: store float [[C]], ptr [[DST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load float, ptr @float_const
+ %c = fadd float %b, 1.0
+ store float %c, ptr %dst, align 1
+ ret void
+}
+
+define void @constantload_add_store_x86_fp80(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @constantload_add_store_x86_fp80(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load x86_fp80, ptr @x86_fp80_const, align 16
+; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[B]] to fp128
+; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000
+; CHECK-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xLC0000000000000003FC9CD2B297D889B
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_store(ptr [[DST:%.*]], i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = fpext x86_fp80 [[C]] to fp128
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], fp128 [[TMP6]], fp128 [[TMP1]]
+; CHECK-NEXT: store fp128 [[TMP7]], ptr [[TMP2]], align 1
+; CHECK-NEXT: store x86_fp80 [[C]], ptr [[DST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load x86_fp80, ptr @x86_fp80_const
+ %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000
+ store x86_fp80 %c, ptr %dst, align 1
+ ret void
+}
+
+define void @constantload_add_store_double(ptr %dst) sanitize_numericalstability {
+; DQQ-LABEL: @constantload_add_store_double(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[B:%.*]] = load double, ptr @double_const, align 8
+; DQQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to fp128
+; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00
+; DQQ-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xL00000000000000003FFF000000000000
+; DQQ-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[DST:%.*]], i64 1)
+; DQQ-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64
+; DQQ-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP3]])
+; DQQ-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1
+; DQQ-NEXT: [[TMP6:%.*]] = fpext double [[C]] to fp128
+; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], fp128 [[TMP6]], fp128 [[TMP1]]
+; DQQ-NEXT: store fp128 [[TMP7]], ptr [[TMP2]], align 1
+; DQQ-NEXT: store double [[C]], ptr [[DST]], align 1
+; DQQ-NEXT: ret void
+;
+; DLQ-LABEL: @constantload_add_store_double(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[B:%.*]] = load double, ptr @double_const, align 8
+; DLQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to x86_fp80
+; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00
+; DLQ-NEXT: [[TMP1:%.*]] = fadd x86_fp80 [[TMP0]], 0xK3FFF8000000000000000
+; DLQ-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[DST:%.*]], i64 1)
+; DLQ-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DST]] to i64
+; DLQ-NEXT: [[TMP4:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP1]], i32 4, i64 [[TMP3]])
+; DLQ-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 1
+; DLQ-NEXT: [[TMP6:%.*]] = fpext double [[C]] to x86_fp80
+; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], x86_fp80 [[TMP6]], x86_fp80 [[TMP1]]
+; DLQ-NEXT: store x86_fp80 [[TMP7]], ptr [[TMP2]], align 1
+; DLQ-NEXT: store double [[C]], ptr [[DST]], align 1
+; DLQ-NEXT: ret void
+;
+entry:
+ %b = load double, ptr @double_const
+ %c = fadd double %b, 1.0
+ store double %c, ptr %dst, align 1
+ ret void
+}
+
+define void @load_add_store_float(ptr %a) sanitize_numericalstability {
+; CHECK-LABEL: @load_add_store_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load float, ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[A]], i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 1
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00
+; CHECK-NEXT: [[TMP8:%.*]] = fadd double [[TMP7]], 1.000000e+00
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 1)
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP8]], i32 4, i64 [[TMP10]])
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], double [[TMP13]], double [[TMP8]]
+; CHECK-NEXT: store double [[TMP14]], ptr [[TMP9]], align 1
+; CHECK-NEXT: store float [[C]], ptr [[A]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load float, ptr %a, align 1
+ %c = fadd float %b, 1.0
+ store float %c, ptr %a, align 1
+ ret void
+}
+
+define void @load_add_store_x86_fp80(ptr %a) sanitize_numericalstability {
+; CHECK-LABEL: @load_add_store_x86_fp80(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load x86_fp80, ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_load(ptr [[A]], i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = load fp128, ptr [[TMP0]], align 1
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = fpext x86_fp80 [[B]] to fp128
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi fp128 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000
+; CHECK-NEXT: [[TMP8:%.*]] = fadd fp128 [[TMP7]], 0xLC0000000000000003FC9CD2B297D889B
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_longdouble_store(ptr [[A]], i64 1)
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP8]], i32 4, i64 [[TMP10]])
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = fpext x86_fp80 [[C]] to fp128
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], fp128 [[TMP13]], fp128 [[TMP8]]
+; CHECK-NEXT: store fp128 [[TMP14]], ptr [[TMP9]], align 1
+; CHECK-NEXT: store x86_fp80 [[C]], ptr [[A]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load x86_fp80, ptr %a, align 1
+ %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000
+ store x86_fp80 %c, ptr %a, align 1
+ ret void
+}
+
+define void @load_add_store_double(ptr %a) sanitize_numericalstability {
+; DQQ-LABEL: @load_add_store_double(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[B:%.*]] = load double, ptr [[A:%.*]], align 1
+; DQQ-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_load(ptr [[A]], i64 1)
+; DQQ-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; DQQ-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; DQQ: 2:
+; DQQ-NEXT: [[TMP3:%.*]] = load fp128, ptr [[TMP0]], align 1
+; DQQ-NEXT: br label [[TMP6:%.*]]
+; DQQ: 4:
+; DQQ-NEXT: [[TMP5:%.*]] = fpext double [[B]] to fp128
+; DQQ-NEXT: br label [[TMP6]]
+; DQQ: 6:
+; DQQ-NEXT: [[TMP7:%.*]] = phi fp128 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00
+; DQQ-NEXT: [[TMP8:%.*]] = fadd fp128 [[TMP7]], 0xL00000000000000003FFF000000000000
+; DQQ-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[A]], i64 1)
+; DQQ-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; DQQ-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP8]], i32 4, i64 [[TMP10]])
+; DQQ-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
+; DQQ-NEXT: [[TMP13:%.*]] = fpext double [[C]] to fp128
+; DQQ-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], fp128 [[TMP13]], fp128 [[TMP8]]
+; DQQ-NEXT: store fp128 [[TMP14]], ptr [[TMP9]], align 1
+; DQQ-NEXT: store double [[C]], ptr [[A]], align 1
+; DQQ-NEXT: ret void
+;
+; DLQ-LABEL: @load_add_store_double(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[B:%.*]] = load double, ptr [[A:%.*]], align 1
+; DLQ-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_load(ptr [[A]], i64 1)
+; DLQ-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; DLQ-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; DLQ: 2:
+; DLQ-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[TMP0]], align 1
+; DLQ-NEXT: br label [[TMP6:%.*]]
+; DLQ: 4:
+; DLQ-NEXT: [[TMP5:%.*]] = fpext double [[B]] to x86_fp80
+; DLQ-NEXT: br label [[TMP6]]
+; DLQ: 6:
+; DLQ-NEXT: [[TMP7:%.*]] = phi x86_fp80 [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00
+; DLQ-NEXT: [[TMP8:%.*]] = fadd x86_fp80 [[TMP7]], 0xK3FFF8000000000000000
+; DLQ-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_double_store(ptr [[A]], i64 1)
+; DLQ-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; DLQ-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP8]], i32 4, i64 [[TMP10]])
+; DLQ-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1
+; DLQ-NEXT: [[TMP13:%.*]] = fpext double [[C]] to x86_fp80
+; DLQ-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], x86_fp80 [[TMP13]], x86_fp80 [[TMP8]]
+; DLQ-NEXT: store x86_fp80 [[TMP14]], ptr [[TMP9]], align 1
+; DLQ-NEXT: store double [[C]], ptr [[A]], align 1
+; DLQ-NEXT: ret void
+;
+entry:
+ %b = load double, ptr %a, align 1
+ %c = fadd double %b, 1.0
+ store double %c, ptr %a, align 1
+ ret void
+}
+
+define void @load_add_store_vector(<2 x float>* %a) sanitize_numericalstability {
+; CHECK-LABEL: @load_add_store_vector(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load <2 x float>, ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[A]], i64 2)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[TMP0]], align 1
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = fpext <2 x float> [[B]] to <2 x double>
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 2)
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[C]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP10]], double [[TMP11]], i32 4, i64 [[TMP12]])
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[C]], i64 1
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP8]], i64 1
+; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP14]], double [[TMP15]], i32 4, i64 [[TMP16]])
+; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP13]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = fpext <2 x float> [[C]] to <2 x double>
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], <2 x double> [[TMP20]], <2 x double> [[TMP8]]
+; CHECK-NEXT: store <2 x double> [[TMP21]], ptr [[TMP9]], align 1
+; CHECK-NEXT: store <2 x float> [[C]], ptr [[A]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load <2 x float>, ptr %a, align 1
+ %c = fadd <2 x float> %b, <float 1.0, float 1.0>
+ store <2 x float> %c, ptr %a, align 1
+ ret void
+}
+
+declare float @returns_float()
+
+define void @call_fn_returning_float(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @call_fn_returning_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = call float @returns_float()
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @returns_float to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00
+; CHECK-NEXT: [[TMP6:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP5]], i32 4, i64 [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], double [[TMP10]], double [[TMP5]]
+; CHECK-NEXT: store double [[TMP11]], ptr [[TMP6]], align 1
+; CHECK-NEXT: store float [[C]], ptr [[DST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = call float @returns_float()
+ %c = fadd float %b, 1.0
+ store float %c, ptr %dst, align 1
+ ret void
+}
+
+define float @return_fn_returning_float(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @return_fn_returning_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = call float @returns_float()
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @returns_float to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP4]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @return_fn_returning_float to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP8]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[B]]
+;
+entry:
+ %b = call float @returns_float()
+ ret float %b
+}
+
+declare void @takes_floats(float %a, i8 %b, double %c, x86_fp80 %d)
+
+define void @call_fn_taking_float() sanitize_numericalstability {
+; DQQ-LABEL: @call_fn_taking_float(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: store i64 ptrtoint (ptr @takes_floats to i64), ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1
+; DQQ-NEXT: store fp128 0xL00000000000000004000800000000000, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1
+; DQQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 24), align 1
+; DQQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000)
+; DQQ-NEXT: ret void
+;
+; DLQ-LABEL: @call_fn_taking_float(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: store i64 ptrtoint (ptr @takes_floats to i64), ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1
+; DLQ-NEXT: store x86_fp80 0xK4000C000000000000000, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1
+; DLQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 18), align 1
+; DLQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000)
+; DLQ-NEXT: ret void
+;
+entry:
+ call void @takes_floats(float 1.0, i8 2, double 3.0, x86_fp80 0xK3FC9E69594BEC44DE000)
+ ret void
+}
+
+declare float @llvm.sin.f32(float) readnone
+
+define float @call_sin_intrinsic() sanitize_numericalstability {
+; CHECK-LABEL: @call_sin_intrinsic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[R:%.*]] = call float @llvm.sin.f32(float 1.000000e+00)
+; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00)
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @call_sin_intrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+entry:
+ %r = call float @llvm.sin.f32(float 1.0)
+ ret float %r
+}
+
+declare float @sinf(float)
+
+define float @call_sinf_libfunc() sanitize_numericalstability {
+; CHECK-LABEL: @call_sinf_libfunc(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[R:%.*]] = call float @sinf(float 1.000000e+00) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00)
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @call_sinf_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+entry:
+ %r = call float @sinf(float 1.0)
+ ret float %r
+}
+
+declare double @sin(double)
+
+; FIXME: nsan uses `sin(double)` for fp128.
+define double @call_sin_libfunc() sanitize_numericalstability {
+; DQQ-LABEL: @call_sin_libfunc(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #[[ATTR4]]
+; DQQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000)
+; DQQ-NEXT: [[TMP1:%.*]] = fpext x86_fp80 [[TMP0]] to fp128
+; DQQ-NEXT: [[TMP2:%.*]] = call i32 @__nsan_internal_check_double_q(double [[R]], fp128 [[TMP1]], i32 1, i64 0)
+; DQQ-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 1
+; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[R]] to fp128
+; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], fp128 [[TMP4]], fp128 [[TMP1]]
+; DQQ-NEXT: store i64 ptrtoint (ptr @call_sin_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DQQ-NEXT: store fp128 [[TMP5]], ptr @__nsan_shadow_ret_ptr, align 16
+; DQQ-NEXT: ret double [[R]]
+;
+; DLQ-LABEL: @call_sin_libfunc(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #[[ATTR4]]
+; DLQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000)
+; DLQ-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_double_l(double [[R]], x86_fp80 [[TMP0]], i32 1, i64 0)
+; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1
+; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[R]] to x86_fp80
+; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP0]]
+; DLQ-NEXT: store i64 ptrtoint (ptr @call_sin_libfunc to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DLQ-NEXT: store x86_fp80 [[TMP4]], ptr @__nsan_shadow_ret_ptr, align 16
+; DLQ-NEXT: ret double [[R]]
+;
+entry:
+ %r = call double @sin(double 1.0)
+ ret double %r
+}
+
+declare double @frexp(double, i32*)
+
+define double @call_frexp_libfunc_nointrinsic(double %0, i32* nocapture %1) sanitize_numericalstability {
+; DQQ-LABEL: @call_frexp_libfunc_nointrinsic(
+; DQQ-NEXT: [[TMP3:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64)
+; DQQ-NEXT: [[TMP5:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1
+; DQQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to fp128
+; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], fp128 [[TMP5]], fp128 [[TMP6]]
+; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP0]], fp128 [[TMP7]], i32 2, i64 0)
+; DQQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
+; DQQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to fp128
+; DQQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], fp128 [[TMP10]], fp128 [[TMP7]]
+; DQQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], ptr [[TMP1:%.*]])
+; DQQ-NEXT: [[TMP13:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; DQQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (ptr @frexp to i64)
+; DQQ-NEXT: [[TMP15:%.*]] = load fp128, ptr @__nsan_shadow_ret_ptr, align 16
+; DQQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to fp128
+; DQQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], fp128 [[TMP15]], fp128 [[TMP16]]
+; DQQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP12]], fp128 [[TMP17]], i32 1, i64 0)
+; DQQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1
+; DQQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to fp128
+; DQQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], fp128 [[TMP20]], fp128 [[TMP17]]
+; DQQ-NEXT: store i64 ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DQQ-NEXT: store fp128 [[TMP21]], ptr @__nsan_shadow_ret_ptr, align 16
+; DQQ-NEXT: ret double [[TMP12]]
+;
+; DLQ-LABEL: @call_frexp_libfunc_nointrinsic(
+; DLQ-NEXT: [[TMP3:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64)
+; DLQ-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1
+; DLQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80
+; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], x86_fp80 [[TMP5]], x86_fp80 [[TMP6]]
+; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP0]], x86_fp80 [[TMP7]], i32 2, i64 0)
+; DLQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
+; DLQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to x86_fp80
+; DLQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], x86_fp80 [[TMP10]], x86_fp80 [[TMP7]]
+; DLQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], ptr [[TMP1:%.*]])
+; DLQ-NEXT: [[TMP13:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; DLQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (ptr @frexp to i64)
+; DLQ-NEXT: [[TMP15:%.*]] = load x86_fp80, ptr @__nsan_shadow_ret_ptr, align 16
+; DLQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to x86_fp80
+; DLQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], x86_fp80 [[TMP15]], x86_fp80 [[TMP16]]
+; DLQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP12]], x86_fp80 [[TMP17]], i32 1, i64 0)
+; DLQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1
+; DLQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to x86_fp80
+; DLQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], x86_fp80 [[TMP20]], x86_fp80 [[TMP17]]
+; DLQ-NEXT: store i64 ptrtoint (ptr @call_frexp_libfunc_nointrinsic to i64), ptr @__nsan_shadow_ret_tag, align 8
+; DLQ-NEXT: store x86_fp80 [[TMP21]], ptr @__nsan_shadow_ret_ptr, align 16
+; DLQ-NEXT: ret double [[TMP12]]
+;
+ %3 = tail call double @frexp(double %0, i32* %1)
+ ret double %3
+}
+
+define float @call_fn_taking_float_by_fn_ptr(float (float)* nocapture %fn_ptr) sanitize_numericalstability {
+; CHECK-LABEL: @call_fn_taking_float_by_fn_ptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[FN_PTR:%.*]] to i64
+; CHECK-NEXT: store i64 [[TMP0]], ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: store double 1.000000e+00, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[R:%.*]] = call float [[FN_PTR]](float 1.000000e+00)
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[FN_PTR]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], double [[TMP4]], double [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @call_fn_taking_float_by_fn_ptr to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP10]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+entry:
+ %r = call float %fn_ptr(float 1.0)
+ ret float %r
+}
+
+define void @store_float(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @store_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1)
+; CHECK-NEXT: store double 4.200000e+01, ptr [[TMP0]], align 1
+; CHECK-NEXT: store float 4.200000e+01, ptr [[DST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ store float 42.0, ptr %dst, align 1
+ ret void
+}
+
+define void @store_non_float(ptr %dst) sanitize_numericalstability {
+; CHECK-LABEL: @store_non_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST]], i64 1)
+; CHECK-NEXT: store double 0x36F5000000000000, ptr [[TMP0]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ store i32 42, ptr %dst, align 1
+ ret void
+}
+
+define i1 @inline_asm(double %0) sanitize_numericalstability {
+; DQQ-LABEL: @inline_asm(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @inline_asm to i64)
+; DQQ-NEXT: [[TMP3:%.*]] = load fp128, ptr @__nsan_shadow_args_ptr, align 1
+; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to fp128
+; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], fp128 [[TMP3]], fp128 [[TMP4]]
+; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]])
+; DQQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
+; DQQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0
+; DQQ-NEXT: ret i1 [[TMP8]]
+;
+; DLQ-LABEL: @inline_asm(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @inline_asm to i64)
+; DLQ-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr @__nsan_shadow_args_ptr, align 1
+; DLQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80
+; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP4]]
+; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]])
+; DLQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
+; DLQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0
+; DLQ-NEXT: ret i1 [[TMP8]]
+;
+entry:
+ %1 = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double %0)
+ %2 = trunc i32 %1 to i8
+ %3 = icmp slt i8 %2, 0
+ ret i1 %3
+}
+
+define void @vector_extract(<2 x float> %0) sanitize_numericalstability {
+; CHECK-LABEL: @vector_extract(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_extract to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double>
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = extractelement <2 x float> %0, i32 1
+ ret void
+}
+
+define void @vector_insert(<2 x float> %0) sanitize_numericalstability {
+; CHECK-LABEL: @vector_insert(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_insert to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double>
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP0]], float 1.000000e+00, i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP5]], double 1.000000e+00, i32 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = insertelement <2 x float> %0, float 1.0, i32 1
+ ret void
+}
+
+
+define void @vector_shuffle(<2 x float> %0) sanitize_numericalstability {
+; CHECK-LABEL: @vector_shuffle(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @vector_shuffle to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double>
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = shufflevector <2 x float> %0, <2 x float> <float 1.0, float 1.0>, <2 x i32> <i32 1, i32 3>
+ ret void
+}
+
+define void @aggregate_extract({i32, {float, i1}} %0) sanitize_numericalstability {
+; CHECK-LABEL: @aggregate_extract(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, { float, i1 } } [[TMP0:%.*]], 1, 0
+; CHECK-NEXT: [[TMP2:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = extractvalue {i32, {float, i1}} %0, 1, 0
+ ret void
+}
+
+define void @aggregate_insert({i32, {float, i1}} %0, float %1) sanitize_numericalstability {
+; CHECK-LABEL: @aggregate_insert(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], ptrtoint (ptr @aggregate_insert to i64)
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[TMP1:%.*]] to double
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], double [[TMP4]], double [[TMP5]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP1]], double [[TMP6]], i32 5, i64 0)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float [[TMP1]], 1, 0
+; CHECK-NEXT: ret void
+;
+entry:
+ %2 = insertvalue {i32, {float, i1}} %0, float %1, 1, 0
+ ret void
+}
+
+define void @aggregate_insert_avoid_const_check({i32, {float, i1}} %0) sanitize_numericalstability {
+; CHECK-LABEL: @aggregate_insert_avoid_const_check(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float 1.000000e+00, 1, 0
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = insertvalue {i32, {float, i1}} %0, float 1.0, 1, 0
+ ret void
+}
+
+
+declare float @fabsf(float)
+
+define float @sub_fabs(float %a, float %b) sanitize_numericalstability {
+; CHECK-LABEL: @sub_fabs(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @sub_fabs to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr getelementptr inbounds ([16384 x i8], ptr @__nsan_shadow_args_ptr, i64 0, i64 8), align 1
+; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[B:%.*]] to double
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], double [[TMP5]], double [[TMP6]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[S:%.*]] = fsub float [[A]], [[B]]
+; CHECK-NEXT: [[TMP8:%.*]] = fsub double [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__nsan_internal_check_float_d(float [[S]], double [[TMP8]], i32 2, i64 0)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = fpext float [[S]] to double
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], double [[TMP11]], double [[TMP8]]
+; CHECK-NEXT: [[R:%.*]] = call float @fabsf(float [[S]]) #[[ATTR4]]
+; CHECK-NEXT: [[TMP13:%.*]] = call double @llvm.fabs.f64(double [[TMP8]])
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP13]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], double [[TMP16]], double [[TMP13]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @sub_fabs to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP17]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+entry:
+ %s = fsub float %a, %b
+ %r = call float @fabsf(float %s)
+ ret float %r
+}
+
+; Note that the `unsafe-fp-math` from the function attributes should be moved to
+; individual instructions, with the shadow instructions NOT getting the attribute.
+define float @param_add_return_float_unsafe_fp_math(float %a) #0 {
+; CHECK-LABEL: @param_add_return_float_unsafe_fp_math(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @param_add_return_float_unsafe_fp_math to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP9]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[B]]
+;
+entry:
+ %b = fadd float %a, 1.0
+ ret float %b
+}
+
+
+define void @truncate(<2 x double> %0) sanitize_numericalstability {
+; DQQ-LABEL: @truncate(
+; DQQ-NEXT: entry:
+; DQQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @truncate to i64)
+; DQQ-NEXT: [[TMP3:%.*]] = load <2 x fp128>, ptr @__nsan_shadow_args_ptr, align 1
+; DQQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x fp128>
+; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x fp128> [[TMP3]], <2 x fp128> [[TMP4]]
+; DQQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DQQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float>
+; DQQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x fp128> [[TMP5]] to <2 x double>
+; DQQ-NEXT: ret void
+;
+; DLQ-LABEL: @truncate(
+; DLQ-NEXT: entry:
+; DLQ-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @truncate to i64)
+; DLQ-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr @__nsan_shadow_args_ptr, align 1
+; DLQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x x86_fp80>
+; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x x86_fp80> [[TMP3]], <2 x x86_fp80> [[TMP4]]
+; DLQ-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; DLQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float>
+; DLQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x x86_fp80> [[TMP5]] to <2 x double>
+; DLQ-NEXT: ret void
+;
+entry:
+ %1 = fptrunc <2 x double> %0 to <2 x float>
+ ret void
+}
+
+define void @unaryop(float %a) sanitize_numericalstability {
+; CHECK-LABEL: @unaryop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (ptr @unaryop to i64)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr @__nsan_shadow_args_ptr, align 1
+; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]]
+; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8
+; CHECK-NEXT: [[C:%.*]] = fneg float [[A]]
+; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %c = fneg float %a
+ ret void
+}
+
+
+attributes #0 = { nounwind readonly uwtable sanitize_numericalstability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" }
+
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll
new file mode 100644
index 0000000000000..cb4e1ee13dbcd
--- /dev/null
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=nsan -nsan-shadow-type-mapping=dqq -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests with more involved control flow to check lazy construction of the
+; shadow values.
+
+define float @forward_use() sanitize_numericalstability {
+; CHECK-LABEL: @forward_use(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BLOCK1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP2:%.*]], 2.000000e+00
+; CHECK-NEXT: br label [[BLOCK1]]
+; CHECK: block1:
+; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[B]] = fadd float [[A]], 1.000000e+00
+; CHECK-NEXT: [[TMP2]] = fadd double [[TMP1]], 1.000000e+00
+; CHECK-NEXT: br label [[LOOP]]
+;
+
+entry:
+ br label %block1
+
+loop:
+ %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available.
+ br label %block1
+
+block1:
+ %a = phi float [ %d, %loop], [ 1.0, %entry ]
+ %b = fadd float %a, 1.0
+ br label %loop
+}
+
+define float @forward_use_with_load(float* %p) sanitize_numericalstability {
+; CHECK-LABEL: @forward_use_with_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BLOCK1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP10:%.*]], 2.000000e+00
+; CHECK-NEXT: br label [[BLOCK1]]
+; CHECK: block1:
+; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[L:%.*]] = load float, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP2]], null
+; CHECK-NEXT: br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP4:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP2]], align 1
+; CHECK-NEXT: br label [[TMP8:%.*]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[L]] to double
+; CHECK-NEXT: br label [[TMP8]]
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[TMP5]], [[TMP4]] ], [ [[TMP7]], [[TMP6]] ]
+; CHECK-NEXT: [[B]] = fadd float [[L]], 1.000000e+00
+; CHECK-NEXT: [[TMP10]] = fadd double [[TMP9]], 1.000000e+00
+; CHECK-NEXT: br label [[LOOP]]
+;
+
+entry:
+ br label %block1
+
+loop:
+ %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available.
+ br label %block1
+
+block1:
+ %a = phi float [ %d, %loop], [ 1.0, %entry ]
+ %l = load float, float* %p ; the load creates a new block
+ %b = fadd float %l, 1.0 ; this requires shadow(%l).
+ br label %loop
+}
+
+define float @forward_use_with_two_uses() sanitize_numericalstability {
+; CHECK-LABEL: @forward_use_with_two_uses(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BLOCK1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00
+; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP4:%.*]], 2.000000e+00
+; CHECK-NEXT: br label [[BLOCK1]]
+; CHECK: block1:
+; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[T1:%.*]] = fadd float [[A]], 1.000000e+00
+; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP1]], 1.000000e+00
+; CHECK-NEXT: [[T2:%.*]] = fadd float [[T1]], 3.000000e+00
+; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP2]], 3.000000e+00
+; CHECK-NEXT: [[B]] = fadd float [[T1]], [[T2]]
+; CHECK-NEXT: [[TMP4]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+
+entry:
+ br label %block1
+
+loop:
+ %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available.
+ br label %block1
+
+block1:
+ %a = phi float [ %d, %loop], [ 1.0, %entry ]
+ %t1 = fadd float %a, 1.0
+ %t2 = fadd float %t1, 3.0 ; this requires shadow(%t1)
+ %b = fadd float %t1, %t2 ; this requires shadow(%t2) and shadow(%t1).
+ br label %loop
+}
+
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll
new file mode 100644
index 0000000000000..450d49aa85a1e
--- /dev/null
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests for invoke instructions that require special handling of the phis.
+
+declare float @may_throw()
+
+declare void @personalityFn()
+
+define float @invoke1() sanitize_numericalstability personality void ()* @personalityFn {
+; CHECK-LABEL: @invoke1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw()
+; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]]
+; CHECK: 0:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]]
+; CHECK-NEXT: br label [[CONTINUE:%.*]]
+; CHECK: continue:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: land:
+; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[CONTINUE]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[CONTINUE]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @invoke1 to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP10]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+
+entry:
+ %c = invoke float @may_throw() to label %continue unwind label %land
+
+continue:
+ br label %exit
+
+land:
+ %res = landingpad { ptr, i32 } cleanup
+ %lv = uitofp i32 1 to float
+ br label %exit
+
+exit:
+ %r = phi float [ %lv, %land], [ %c, %continue ]
+ ret float %r
+}
+
+define float @invoke2() sanitize_numericalstability personality void ()* @personalityFn {
+; CHECK-LABEL: @invoke2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw()
+; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]]
+; CHECK: 0:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]]
+; CHECK-NEXT: br label [[CONTINUE:%.*]]
+; CHECK: continue:
+; CHECK-NEXT: [[CV:%.*]] = fadd float [[C]], 2.000000e+00
+; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP5]], 2.000000e+00
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: land:
+; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[CV]], [[CONTINUE]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP6]], [[CONTINUE]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP7]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], double [[TMP10]], double [[TMP7]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @invoke2 to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP11]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+
+entry:
+ %c = invoke float @may_throw() to label %continue unwind label %land
+
+continue:
+ %cv = fadd float %c, 2.0
+ br label %exit
+
+land:
+ %res = landingpad { ptr, i32 } cleanup
+ %lv = uitofp i32 1 to float
+ br label %exit
+
+exit:
+ %r = phi float [ %lv, %land], [ %cv, %continue ]
+ ret float %r
+}
+
+define float @invoke3() sanitize_numericalstability personality void ()* @personalityFn {
+; CHECK-LABEL: @invoke3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw()
+; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]]
+; CHECK: land:
+; CHECK-NEXT: [[RES:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: 0:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (ptr @may_throw to i64)
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[TMP0]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[TMP0]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]]
+; CHECK-NEXT: store i64 ptrtoint (ptr @invoke3 to i64), ptr @__nsan_shadow_ret_tag, align 8
+; CHECK-NEXT: store double [[TMP10]], ptr @__nsan_shadow_ret_ptr, align 8
+; CHECK-NEXT: ret float [[R]]
+;
+
+entry:
+ %c = invoke float @may_throw() to label %exit unwind label %land
+
+land:
+ %res = landingpad { ptr, i32 } cleanup
+ %lv = uitofp i32 1 to float
+ br label %exit
+
+exit:
+ %r = phi float [ %lv, %land], [ %c, %entry ]
+ ret float %r
+}
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
new file mode 100644
index 0000000000000..fc55f4f5c5334
--- /dev/null
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=nsan -nsan-shadow-type-mapping=dqq -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests with memory manipulation (memcpy, llvm.memcpy, ...).
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+define void @call_memcpy_intrinsic(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability {
+; CHECK-LABEL: @call_memcpy_intrinsic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @__nsan_copy_values(ptr [[A:%.*]], ptr [[B:%.*]], i64 16)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 16, i1 false)
+ ret void
+}
+
+declare dso_local i8* @memcpy(i8*, i8*, i64) local_unnamed_addr
+
+define void @call_memcpy(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability {
+; CHECK-LABEL: @call_memcpy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call ptr @memcpy(ptr nonnull align 8 dereferenceable(16) [[A:%.*]], ptr nonnull align 8 dereferenceable(16) [[B:%.*]], i64 16) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call i8* @memcpy(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 16)
+ ret void
+}
+
+
+define void @transfer_float(float* %dst, float* %src) sanitize_numericalstability {
+; CHECK-LABEL: @transfer_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[T:%.*]] = load float, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[SRC]], i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 1
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[T]] to double
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[DST:%.*]], i64 1)
+; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @__nsan_internal_check_float_d(float [[T]], double [[TMP7]], i32 4, i64 [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = fpext float [[T]] to double
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], double [[TMP12]], double [[TMP7]]
+; CHECK-NEXT: store double [[TMP13]], ptr [[TMP8]], align 1
+; CHECK-NEXT: store float [[T]], ptr [[DST]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %t = load float, ptr %src
+ store float %t, ptr %dst, align 1
+ ret void
+}
+
+define void @transfer_non_float(i32* %dst, i32* %src) sanitize_numericalstability {
+; CHECK-LABEL: @transfer_non_float(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[SRC:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[SRC]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[SRC]])
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 1
+; CHECK-NEXT: store i32 [[T]], ptr [[DST:%.*]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[DST]])
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[DST]])
+; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %t = load i32, ptr %src
+ store i32 %t, ptr %dst, align 1
+ ret void
+}
+
+define void @transfer_array([2 x float]* %a) sanitize_numericalstability {
+; CHECK-LABEL: @transfer_array(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B:%.*]] = load [2 x float], ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[A]])
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[A]])
+; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP2]], align 1
+; CHECK-NEXT: store [2 x float] [[B]], ptr [[A]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[A]])
+; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[A]])
+; CHECK-NEXT: store i128 [[TMP3]], ptr [[TMP5]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ %b = load [2 x float], ptr %a, align 1
+ store [2 x float] %b, ptr %a, align 1
+ ret void
+}
+
+define void @swap_untyped1(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_untyped1(
+; CHECK-NEXT: [[QV:%.*]] = load i64, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]])
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]])
+; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[PV:%.*]] = load i64, ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]])
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]])
+; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 1
+; CHECK-NEXT: store i64 [[PV]], ptr [[Q]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]])
+; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]])
+; CHECK-NEXT: store i128 [[TMP8]], ptr [[TMP10]], align 1
+; CHECK-NEXT: store i64 [[QV]], ptr [[P]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]])
+; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]])
+; CHECK-NEXT: store i128 [[TMP4]], ptr [[TMP12]], align 1
+; CHECK-NEXT: ret void
+;
+ %qv = load i64, ptr %q
+ %pv = load i64, ptr %p
+ store i64 %pv, ptr %q, align 8
+ store i64 %qv, ptr %p, align 8
+ ret void
+}
+
+; Same as swap_untyped1, but the load/stores are in the opposite order.
+define void @swap_untyped2(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_untyped2(
+; CHECK-NEXT: [[PV:%.*]] = load i64, ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]])
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]])
+; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[QV:%.*]] = load i64, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]])
+; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]])
+; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 1
+; CHECK-NEXT: store i64 [[PV]], ptr [[Q]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[Q]])
+; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[Q]])
+; CHECK-NEXT: store i128 [[TMP4]], ptr [[TMP10]], align 1
+; CHECK-NEXT: store i64 [[QV]], ptr [[P]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__nsan_internal_get_raw_shadow_type_ptr(ptr [[P]])
+; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = call ptr @__nsan_internal_get_raw_shadow_ptr(ptr [[P]])
+; CHECK-NEXT: store i128 [[TMP8]], ptr [[TMP12]], align 1
+; CHECK-NEXT: ret void
+;
+ %pv = load i64, ptr %p
+ %qv = load i64, ptr %q
+ store i64 %pv, ptr %q, align 8
+ store i64 %qv, ptr %p, align 8
+ ret void
+}
+
+define void @swap_ft1(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_ft1(
+; CHECK-NEXT: [[QV:%.*]] = load float, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 1
+; CHECK-NEXT: br label [[TMP7:%.*]]
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[QV]] to double
+; CHECK-NEXT: br label [[TMP7]]
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ]
+; CHECK-NEXT: [[PV:%.*]] = load float, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null
+; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]]
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 1
+; CHECK-NEXT: br label [[TMP15:%.*]]
+; CHECK: 13:
+; CHECK-NEXT: [[TMP14:%.*]] = fpext float [[PV]] to double
+; CHECK-NEXT: br label [[TMP15]]
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP19:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP16]], i32 4, i64 [[TMP18]])
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = fpext float [[PV]] to double
+; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], double [[TMP21]], double [[TMP16]]
+; CHECK-NEXT: store double [[TMP22]], ptr [[TMP17]], align 1
+; CHECK-NEXT: store float [[PV]], ptr [[Q]], align 8
+; CHECK-NEXT: [[TMP23:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 1)
+; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP8]], i32 4, i64 [[TMP24]])
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP25]], 1
+; CHECK-NEXT: [[TMP27:%.*]] = fpext float [[QV]] to double
+; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP26]], double [[TMP27]], double [[TMP8]]
+; CHECK-NEXT: store double [[TMP28]], ptr [[TMP23]], align 1
+; CHECK-NEXT: store float [[QV]], ptr [[P]], align 8
+; CHECK-NEXT: ret void
+;
+ %qv = load float, ptr %q
+ %pv = load float, ptr %p
+ store float %pv, ptr %q, align 8
+ store float %qv, ptr %p, align 8
+ ret void
+}
+
+; Same as swap_ft1, but the load/stores are in the opposite order.
+define void @swap_ft2(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_ft2(
+; CHECK-NEXT: [[PV:%.*]] = load float, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 1
+; CHECK-NEXT: br label [[TMP7:%.*]]
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[PV]] to double
+; CHECK-NEXT: br label [[TMP7]]
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ]
+; CHECK-NEXT: [[QV:%.*]] = load float, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 1)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null
+; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]]
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 1
+; CHECK-NEXT: br label [[TMP15:%.*]]
+; CHECK: 13:
+; CHECK-NEXT: [[TMP14:%.*]] = fpext float [[QV]] to double
+; CHECK-NEXT: br label [[TMP15]]
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP19:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP8]], i32 4, i64 [[TMP18]])
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = fpext float [[PV]] to double
+; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], double [[TMP21]], double [[TMP8]]
+; CHECK-NEXT: store double [[TMP22]], ptr [[TMP17]], align 1
+; CHECK-NEXT: store float [[PV]], ptr [[Q]], align 8
+; CHECK-NEXT: [[TMP23:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 1)
+; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP16]], i32 4, i64 [[TMP24]])
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP25]], 1
+; CHECK-NEXT: [[TMP27:%.*]] = fpext float [[QV]] to double
+; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP26]], double [[TMP27]], double [[TMP16]]
+; CHECK-NEXT: store double [[TMP28]], ptr [[TMP23]], align 1
+; CHECK-NEXT: store float [[QV]], ptr [[P]], align 8
+; CHECK-NEXT: ret void
+;
+ %pv = load float, ptr %p
+ %qv = load float, ptr %q
+ store float %pv, ptr %q, align 8
+ store float %qv, ptr %p, align 8
+ ret void
+}
+
+define void @swap_vectorft1(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_vectorft1(
+; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 2)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[TMP1]], align 1
+; CHECK-NEXT: br label [[TMP7:%.*]]
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = fpext <2 x float> [[QV]] to <2 x double>
+; CHECK-NEXT: br label [[TMP7]]
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ]
+; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 2)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null
+; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]]
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, ptr [[TMP9]], align 1
+; CHECK-NEXT: br label [[TMP15:%.*]]
+; CHECK: 13:
+; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[PV]] to <2 x double>
+; CHECK-NEXT: br label [[TMP15]]
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 2)
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[PV]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP16]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP21:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP18]], double [[TMP19]], i32 4, i64 [[TMP20]])
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[PV]], i64 1
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[TMP16]], i64 1
+; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP22]], double [[TMP23]], i32 4, i64 [[TMP24]])
+; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP21]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP26]], 1
+; CHECK-NEXT: [[TMP28:%.*]] = fpext <2 x float> [[PV]] to <2 x double>
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], <2 x double> [[TMP28]], <2 x double> [[TMP16]]
+; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP17]], align 1
+; CHECK-NEXT: store <2 x float> [[PV]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP30:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 2)
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[QV]], i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP34:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP31]], double [[TMP32]], i32 4, i64 [[TMP33]])
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[QV]], i64 1
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP8]], i64 1
+; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP38:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP35]], double [[TMP36]], i32 4, i64 [[TMP37]])
+; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP34]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 1
+; CHECK-NEXT: [[TMP41:%.*]] = fpext <2 x float> [[QV]] to <2 x double>
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP40]], <2 x double> [[TMP41]], <2 x double> [[TMP8]]
+; CHECK-NEXT: store <2 x double> [[TMP42]], ptr [[TMP30]], align 1
+; CHECK-NEXT: store <2 x float> [[QV]], ptr [[P]], align 16
+; CHECK-NEXT: ret void
+;
+ %qv = load <2 x float>, ptr %q
+ %pv = load <2 x float>, ptr %p
+ store <2 x float> %pv, ptr %q, align 16
+ store <2 x float> %qv, ptr %p, align 16
+ ret void
+}
+
+; Same as swap_vectorft1, but the load/stores are in the opposite order.
+define void @swap_vectorft2(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability {
+; CHECK-LABEL: @swap_vectorft2(
+; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[P]], i64 2)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], null
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[TMP1]], align 1
+; CHECK-NEXT: br label [[TMP7:%.*]]
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = fpext <2 x float> [[PV]] to <2 x double>
+; CHECK-NEXT: br label [[TMP7]]
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x double> [ [[TMP4]], [[TMP3]] ], [ [[TMP6]], [[TMP5]] ]
+; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, ptr [[Q:%.*]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_load(ptr [[Q]], i64 2)
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP9]], null
+; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP11:%.*]]
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, ptr [[TMP9]], align 1
+; CHECK-NEXT: br label [[TMP15:%.*]]
+; CHECK: 13:
+; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[QV]] to <2 x double>
+; CHECK-NEXT: br label [[TMP15]]
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[TMP11]] ], [ [[TMP14]], [[TMP13]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[Q]], i64 2)
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[PV]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP21:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP18]], double [[TMP19]], i32 4, i64 [[TMP20]])
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[PV]], i64 1
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[TMP8]], i64 1
+; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[Q]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP22]], double [[TMP23]], i32 4, i64 [[TMP24]])
+; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP21]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP26]], 1
+; CHECK-NEXT: [[TMP28:%.*]] = fpext <2 x float> [[PV]] to <2 x double>
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP27]], <2 x double> [[TMP28]], <2 x double> [[TMP8]]
+; CHECK-NEXT: store <2 x double> [[TMP29]], ptr [[TMP17]], align 1
+; CHECK-NEXT: store <2 x float> [[PV]], ptr [[Q]], align 16
+; CHECK-NEXT: [[TMP30:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[P]], i64 2)
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[QV]], i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP16]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP34:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP31]], double [[TMP32]], i32 4, i64 [[TMP33]])
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[QV]], i64 1
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[TMP16]], i64 1
+; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT: [[TMP38:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP35]], double [[TMP36]], i32 4, i64 [[TMP37]])
+; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP34]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 1
+; CHECK-NEXT: [[TMP41:%.*]] = fpext <2 x float> [[QV]] to <2 x double>
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP40]], <2 x double> [[TMP41]], <2 x double> [[TMP16]]
+; CHECK-NEXT: store <2 x double> [[TMP42]], ptr [[TMP30]], align 1
+; CHECK-NEXT: store <2 x float> [[QV]], ptr [[P]], align 16
+; CHECK-NEXT: ret void
+;
+ %pv = load <2 x float>, ptr %p
+ %qv = load <2 x float>, ptr %q
+ store <2 x float> %pv, ptr %q, align 16
+ store <2 x float> %qv, ptr %p, align 16
+ ret void
+}
+
More information about the llvm-commits
mailing list