[compiler-rt] [llvm] [TySan] Add option to outline instrumentation (PR #120582)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 07:57:34 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (gbMattN)
<details>
<summary>Changes</summary>
Added a command line option to use function calls rather than inline checks for TySan instrumentation.
---
Full diff: https://github.com/llvm/llvm-project/pull/120582.diff
5 Files Affected:
- (modified) compiler-rt/lib/tysan/tysan.cpp (+107)
- (modified) compiler-rt/lib/tysan/tysan_platform.h (+15-1)
- (modified) llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp (+98-30)
- (added) llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll (+41)
- (added) llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll (+24)
``````````diff
diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp
index 39d78e7c95e0cd..91458bee4fda29 100644
--- a/compiler-rt/lib/tysan/tysan.cpp
+++ b/compiler-rt/lib/tysan/tysan.cpp
@@ -22,6 +22,7 @@
#include "tysan/tysan.h"
+#include <stdint.h>
#include <string.h>
using namespace __sanitizer;
@@ -207,6 +208,64 @@ static void reportError(void *Addr, int Size, tysan_type_descriptor *TD,
}
}
+ALWAYS_INLINE
+static void SetShadowType(tysan_type_descriptor *td,
+ tysan_type_descriptor **shadowData,
+ uint64_t AccessSize) {
+ *shadowData = td;
+ uint64_t shadowDataInt = (uint64_t)shadowData;
+
+ for (uint64_t i = 1; i < AccessSize; ++i) {
+ int64_t dataOffset = i << PtrShift();
+ int64_t *badShadowData = (int64_t *)(shadowDataInt + dataOffset);
+ int64_t badTD = int64_t(i) * -1;
+ *badShadowData = badTD;
+ }
+}
+
+ALWAYS_INLINE
+static bool GetNotAllBadTD(uint64_t ShadowDataInt, uint64_t AccessSize) {
+ bool notAllBadTD = false;
+ for (uint64_t i = 1; i < AccessSize; ++i) {
+ int64_t **unkShadowData = (int64_t **)(ShadowDataInt + (i << PtrShift()));
+ int64_t *ILdTD = *unkShadowData;
+ notAllBadTD = notAllBadTD || (ILdTD != nullptr);
+ }
+ return notAllBadTD;
+}
+
+ALWAYS_INLINE
+static bool GetNotAllUnkTD(uint64_t ShadowDataInt, uint64_t AccessSize) {
+ bool notAllBadTD = false;
+ for (uint64_t i = 1; i < AccessSize; ++i) {
+ int64_t *badShadowData = (int64_t *)(ShadowDataInt + (i << PtrShift()));
+ int64_t ILdTD = *badShadowData;
+ notAllBadTD = notAllBadTD || (ILdTD >= 0);
+ }
+ return notAllBadTD;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_instrument_mem_inst(char *dest, char *src, uint64_t size,
+ bool needsMemMove) {
+ tysan_type_descriptor **destShadowDataPtr = shadow_for(dest);
+
+ if (!src) {
+ internal_memset((char *)destShadowDataPtr, 0, size << PtrShift());
+ return;
+ }
+
+ uint64_t srcInt = (uint64_t)src;
+ uint64_t srcShadowInt = ((srcInt & AppMask()) << PtrShift()) + ShadowAddr();
+ uint64_t *srcShadow = (uint64_t *)srcShadowInt;
+
+ if (needsMemMove) {
+ internal_memmove((char *)destShadowDataPtr, srcShadow, size << PtrShift());
+ } else {
+ internal_memcpy((char *)destShadowDataPtr, srcShadow, size << PtrShift());
+ }
+}
+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
__tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) {
GET_CALLER_PC_BP_SP;
@@ -253,6 +312,54 @@ __tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) {
}
}
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_instrument_with_shadow_update(void *ptr, tysan_type_descriptor *td,
+ bool sanitizeFunction,
+ uint64_t accessSize, int flags) {
+ tysan_type_descriptor **shadowData = shadow_for(ptr);
+ tysan_type_descriptor *loadedTD = *shadowData;
+ bool shadowIsNull = loadedTD == nullptr;
+
+ // TODO, sanitizeFunction is known at compile time, so maybe this is split
+ // into two different functions
+ if (sanitizeFunction) {
+
+ if (td != loadedTD) {
+
+ // We now know that the types did not match (we're on the slow path). If
+ // the type is unknown, then set it.
+ if (shadowIsNull) {
+ // We're about to set the type. Make sure that all bytes in the value
+ // are also of unknown type.
+ bool isAllUnknownTD = GetNotAllUnkTD((uint64_t)shadowData, accessSize);
+ if (isAllUnknownTD)
+ __tysan_check(ptr, accessSize, td, flags);
+ SetShadowType(td, shadowData, accessSize);
+ } else {
+ __tysan_check(ptr, accessSize, td, flags);
+ }
+ } else {
+ // We appear to have the right type. Make sure that all other bytes in
+ // the type are still marked as interior bytes. If not, call the runtime.
+ bool isNotAllBadTD = GetNotAllBadTD((uint64_t)shadowData, accessSize);
+ if (isNotAllBadTD) {
+ __tysan_check(ptr, accessSize, td, flags);
+ }
+ }
+ } else if (shadowIsNull) {
+ SetShadowType(td, shadowData, accessSize);
+ }
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_set_shadow_type(void *ptr, tysan_type_descriptor *td,
+ uint64_t accessSize) {
+ // In the mode where writes always set the type, for a write (which does
+ // not also read), we just set the type.
+ tysan_type_descriptor **shadow = shadow_for(ptr);
+ SetShadowType(td, shadow, accessSize);
+}
+
Flags __tysan::flags_data;
SANITIZER_INTERFACE_ATTRIBUTE uptr __tysan_shadow_memory_address;
diff --git a/compiler-rt/lib/tysan/tysan_platform.h b/compiler-rt/lib/tysan/tysan_platform.h
index f01392885d9398..19f77f0cace6be 100644
--- a/compiler-rt/lib/tysan/tysan_platform.h
+++ b/compiler-rt/lib/tysan/tysan_platform.h
@@ -21,24 +21,28 @@ struct Mapping {
static const uptr kShadowAddr = 0x010000000000ull;
static const uptr kAppAddr = 0x550000000000ull;
static const uptr kAppMemMsk = ~0x780000000000ull;
+ static const uptr kPtrShift = 3;
};
#elif defined(__aarch64__)
struct Mapping39 {
static const uptr kShadowAddr = 0x0800000000ull;
static const uptr kAppAddr = 0x5500000000ull;
static const uptr kAppMemMsk = ~0x7800000000ull;
+ static const uptr kPtrShift = 3;
};
struct Mapping42 {
static const uptr kShadowAddr = 0x10000000000ull;
static const uptr kAppAddr = 0x2aa00000000ull;
static const uptr kAppMemMsk = ~0x3c000000000ull;
+ static const uptr kPtrShift = 3;
};
struct Mapping48 {
static const uptr kShadowAddr = 0x0002000000000ull;
static const uptr kAppAddr = 0x0aaaa00000000ull;
static const uptr kAppMemMsk = ~0x0fff800000000ull;
+ static const uptr kPtrShift = 3;
};
#define TYSAN_RUNTIME_VMA 1
#else
@@ -49,7 +53,12 @@ struct Mapping48 {
extern int vmaSize;
#endif
-enum MappingType { MAPPING_SHADOW_ADDR, MAPPING_APP_ADDR, MAPPING_APP_MASK };
+enum MappingType {
+ MAPPING_SHADOW_ADDR,
+ MAPPING_APP_ADDR,
+ MAPPING_APP_MASK,
+ MAPPING_PTR_SHIFT
+};
template <typename Mapping, int Type> uptr MappingImpl(void) {
switch (Type) {
@@ -59,6 +68,8 @@ template <typename Mapping, int Type> uptr MappingImpl(void) {
return Mapping::kAppAddr;
case MAPPING_APP_MASK:
return Mapping::kAppMemMsk;
+ case MAPPING_PTR_SHIFT:
+ return Mapping::kPtrShift;
}
}
@@ -88,6 +99,9 @@ uptr AppAddr() { return MappingArchImpl<MAPPING_APP_ADDR>(); }
ALWAYS_INLINE
uptr AppMask() { return MappingArchImpl<MAPPING_APP_MASK>(); }
+ALWAYS_INLINE
+uptr PtrShift() { return MappingArchImpl<MAPPING_PTR_SHIFT>(); }
+
} // namespace __tysan
#endif
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 19610958e47b72..d03adb2bcc2dc3 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -62,6 +62,12 @@ static cl::opt<bool>
cl::desc("Writes always set the type"), cl::Hidden,
cl::init(false));
+static cl::opt<bool> ClOutlineInstrumentation(
+ "tysan-outline-instrumentation",
+ cl::desc("Uses function calls for all TySan instrumentation, reducing "
+ "ELF size"),
+ cl::Hidden, cl::init(false));
+
STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
namespace {
@@ -109,12 +115,16 @@ struct TypeSanitizer {
Regex AnonNameRegex;
Type *IntptrTy;
uint64_t PtrShift;
- IntegerType *OrdTy;
+ IntegerType *OrdTy, *U64Ty;
/// Callbacks to run-time library are computed in initializeCallbacks.
FunctionCallee TysanCheck;
FunctionCallee TysanCtorFunction;
+ FunctionCallee TysanIntrumentMemInst;
+ FunctionCallee TysanInstrumentWithShadowUpdate;
+ FunctionCallee TysanSetShadowType;
+
/// Callback to set types for gloabls.
Function *TysanGlobalsSetTypeFunction;
};
@@ -134,6 +144,8 @@ TypeSanitizer::TypeSanitizer(Module &M)
void TypeSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(M.getContext());
OrdTy = IRB.getInt32Ty();
+ U64Ty = IRB.getInt64Ty();
+ Type *BoolType = IRB.getInt1Ty();
AttributeList Attr;
Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
@@ -148,6 +160,30 @@ void TypeSanitizer::initializeCallbacks(Module &M) {
TysanCtorFunction =
M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
+
+ TysanIntrumentMemInst = M.getOrInsertFunction(
+ "__tysan_instrument_mem_inst", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer of data to be written to
+ IRB.getPtrTy(), // Pointer of data to write
+ U64Ty, // Size of the data in bytes
+ BoolType // Do we need to call memmove
+ );
+
+ TysanInstrumentWithShadowUpdate = M.getOrInsertFunction(
+ "__tysan_instrument_with_shadow_update", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer to data to be read
+ IRB.getPtrTy(), // Pointer to type descriptor
+ BoolType, // Do we need to type check this
+ U64Ty, // Size of data we access in bytes
+ OrdTy // Flags
+ );
+
+ TysanSetShadowType = M.getOrInsertFunction(
+ "__tysan_set_shadow_type", Attr, IRB.getVoidTy(),
+ IRB.getPtrTy(), // Pointer of data to be written to
+ IRB.getPtrTy(), // Pointer to the new type descriptor
+ U64Ty // Size of data we access in bytes
+ );
}
void TypeSanitizer::instrumentGlobals(Module &M) {
@@ -593,6 +629,29 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
+ if (ClOutlineInstrumentation) {
+ if (!ForceSetType && (!ClWritesAlwaysSetType || IsRead)) {
+ // We need to check the type here. If the type is unknown, then the read
+ // sets the type. If the type is known, then it is checked. If the type
+ // doesn't match, then we call the runtime (which may yet determine that
+ // the mismatch is okay).
+
+ Constant *Flags =
+ ConstantInt::get(OrdTy, (int)IsRead | (((int)IsWrite) << 1));
+
+ IRB.CreateCall(TysanInstrumentWithShadowUpdate,
+ {Ptr, TD,
+ SanitizeFunction ? IRB.getTrue() : IRB.getFalse(),
+ IRB.getInt64(AccessSize), Flags});
+ } else if (ForceSetType || IsWrite) {
+ // In the mode where writes always set the type, for a write (which does
+ // not also read), we just set the type.
+ IRB.CreateCall(TysanSetShadowType, {Ptr, TD, IRB.getInt64(AccessSize)});
+ }
+
+ return true;
+ }
+
Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
ShadowBase, AppMemMask);
Type *Int8PtrPtrTy = PointerType::get(IRB.getPtrTy(), 0);
@@ -840,37 +899,46 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
}
}
- if (!ShadowBase)
- ShadowBase = getShadowBase(*F);
- if (!AppMemMask)
- AppMemMask = getAppMemMask(*F);
+ if (ClOutlineInstrumentation) {
+ if (!Src) {
+ Src = ConstantPointerNull::get(IRB.getPtrTy());
+ }
+ IRB.CreateCall(
+ TysanIntrumentMemInst,
+ {Dest, Src, Size, NeedsMemMove ? IRB.getTrue() : IRB.getFalse()});
+ } else {
+ if (!ShadowBase)
+ ShadowBase = getShadowBase(*F);
+ if (!AppMemMask)
+ AppMemMask = getAppMemMask(*F);
+
+ Value *ShadowDataInt = IRB.CreateAdd(
+ IRB.CreateShl(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
+ PtrShift),
+ ShadowBase);
+ Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
+
+ if (!Src) {
+ IRB.CreateMemSet(ShadowData, IRB.getInt8(0),
+ IRB.CreateShl(Size, PtrShift), Align(1ull << PtrShift));
+ return true;
+ }
- Value *ShadowDataInt = IRB.CreateAdd(
- IRB.CreateShl(
- IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
- PtrShift),
- ShadowBase);
- Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
-
- if (!Src) {
- IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
- Align(1ull << PtrShift));
- return true;
- }
+ Value *SrcShadowDataInt = IRB.CreateAdd(
+ IRB.CreateShl(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
+ PtrShift),
+ ShadowBase);
+ Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
- Value *SrcShadowDataInt = IRB.CreateAdd(
- IRB.CreateShl(
- IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
- PtrShift),
- ShadowBase);
- Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
-
- if (NeedsMemMove) {
- IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
- Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
- } else {
- IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
- Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ if (NeedsMemMove) {
+ IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+ Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ } else {
+ IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+ Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+ }
}
return true;
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
new file mode 100644
index 00000000000000..a690f2a197ca4c
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; Test basic type sanitizer instrumentation.
+;
+; RUN: opt -passes='tysan-module,tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test_load(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr %a, ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1)
+; CHECK-NEXT: %tmp1 = load i32, ptr %a, align 4, !tbaa !0
+; CHECK-NEXT: ret i32 %tmp1
+entry:
+ %tmp1 = load i32, ptr %a, align 4, !tbaa !3
+ ret i32 %tmp1
+}
+
+define void @test_store(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_instrument_with_shadow_update(ptr %a, ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2)
+; CHECK-NEXT: store i32 42, ptr %a, align 4, !tbaa !4
+; CHECK-NEXT: ret void
+
+entry:
+ store i32 42, ptr %a, align 4, !tbaa !6
+ ret void
+}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!3 = !{!2, !2, i64 0}
+!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4}
+!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
+!6 = !{!5, !2, i64 12}
diff --git a/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
new file mode 100644
index 00000000000000..f8450d123ea9f3
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+; RUN: opt -passes='tysan-module,tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at global1 = global i32 0, align 4
+ at global2 = global i32 0, align 4
+
+
+; CHECK-LABEL: define internal void @__tysan_set_globals_types(
+; CHECK-NEXT: %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT: %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT: call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+!llvm.tysan.globals = !{!13, !14}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!13 = !{ptr @global1, !2}
+!14 = !{ptr @global1, !2}
``````````
</details>
https://github.com/llvm/llvm-project/pull/120582
More information about the llvm-commits
mailing list