[compiler-rt] [llvm] [TySan] Add option to outline instrumentation (PR #120582)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 19 07:57:34 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (gbMattN)

<details>
<summary>Changes</summary>

Added a command line option to use function calls rather than inline checks for TySan instrumentation.

---
Full diff: https://github.com/llvm/llvm-project/pull/120582.diff


5 Files Affected:

- (modified) compiler-rt/lib/tysan/tysan.cpp (+107) 
- (modified) compiler-rt/lib/tysan/tysan_platform.h (+15-1) 
- (modified) llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp (+98-30) 
- (added) llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll (+41) 
- (added) llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll (+24) 


``````````diff
diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp
index 39d78e7c95e0cd..91458bee4fda29 100644
--- a/compiler-rt/lib/tysan/tysan.cpp
+++ b/compiler-rt/lib/tysan/tysan.cpp
@@ -22,6 +22,7 @@
 
 #include "tysan/tysan.h"
 
+#include <stdint.h>
 #include <string.h>
 
 using namespace __sanitizer;
@@ -207,6 +208,64 @@ static void reportError(void *Addr, int Size, tysan_type_descriptor *TD,
   }
 }
 
+ALWAYS_INLINE
+static void SetShadowType(tysan_type_descriptor *td,
+                          tysan_type_descriptor **shadowData,
+                          uint64_t AccessSize) {
+  *shadowData = td;
+  uint64_t shadowDataInt = (uint64_t)shadowData;
+
+  for (uint64_t i = 1; i < AccessSize; ++i) {
+    int64_t dataOffset = i << PtrShift();
+    int64_t *badShadowData = (int64_t *)(shadowDataInt + dataOffset);
+    int64_t badTD = int64_t(i) * -1;
+    *badShadowData = badTD;
+  }
+}
+
+ALWAYS_INLINE
+static bool GetNotAllBadTD(uint64_t ShadowDataInt, uint64_t AccessSize) {
+  bool notAllBadTD = false;
+  for (uint64_t i = 1; i < AccessSize; ++i) {
+    int64_t **unkShadowData = (int64_t **)(ShadowDataInt + (i << PtrShift()));
+    int64_t *ILdTD = *unkShadowData;
+    notAllBadTD = notAllBadTD || (ILdTD != nullptr);
+  }
+  return notAllBadTD;
+}
+
+ALWAYS_INLINE
+static bool GetNotAllUnkTD(uint64_t ShadowDataInt, uint64_t AccessSize) {
+  bool notAllBadTD = false;
+  for (uint64_t i = 1; i < AccessSize; ++i) {
+    int64_t *badShadowData = (int64_t *)(ShadowDataInt + (i << PtrShift()));
+    int64_t ILdTD = *badShadowData;
+    notAllBadTD = notAllBadTD || (ILdTD >= 0);
+  }
+  return notAllBadTD;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_instrument_mem_inst(char *dest, char *src, uint64_t size,
+                            bool needsMemMove) {
+  tysan_type_descriptor **destShadowDataPtr = shadow_for(dest);
+
+  if (!src) {
+    internal_memset((char *)destShadowDataPtr, 0, size << PtrShift());
+    return;
+  }
+
+  uint64_t srcInt = (uint64_t)src;
+  uint64_t srcShadowInt = ((srcInt & AppMask()) << PtrShift()) + ShadowAddr();
+  uint64_t *srcShadow = (uint64_t *)srcShadowInt;
+
+  if (needsMemMove) {
+    internal_memmove((char *)destShadowDataPtr, srcShadow, size << PtrShift());
+  } else {
+    internal_memcpy((char *)destShadowDataPtr, srcShadow, size << PtrShift());
+  }
+}
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
 __tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) {
   GET_CALLER_PC_BP_SP;
@@ -253,6 +312,54 @@ __tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) {
   }
 }
 
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_instrument_with_shadow_update(void *ptr, tysan_type_descriptor *td,
+                                      bool sanitizeFunction,
+                                      uint64_t accessSize, int flags) {
+  tysan_type_descriptor **shadowData = shadow_for(ptr);
+  tysan_type_descriptor *loadedTD = *shadowData;
+  bool shadowIsNull = loadedTD == nullptr;
+
+  // TODO, sanitizeFunction is known at compile time, so maybe this is split
+  // into two different functions
+  if (sanitizeFunction) {
+
+    if (td != loadedTD) {
+
+      // We now know that the types did not match (we're on the slow path). If
+      // the type is unknown, then set it.
+      if (shadowIsNull) {
+        // We're about to set the type. Make sure that all bytes in the value
+        // are also of unknown type.
+        bool isAllUnknownTD = GetNotAllUnkTD((uint64_t)shadowData, accessSize);
+        if (isAllUnknownTD)
+          __tysan_check(ptr, accessSize, td, flags);
+        SetShadowType(td, shadowData, accessSize);
+      } else {
+        __tysan_check(ptr, accessSize, td, flags);
+      }
+    } else {
+      // We appear to have the right type. Make sure that all other bytes in
+      // the type are still marked as interior bytes. If not, call the runtime.
+      bool isNotAllBadTD = GetNotAllBadTD((uint64_t)shadowData, accessSize);
+      if (isNotAllBadTD) {
+        __tysan_check(ptr, accessSize, td, flags);
+      }
+    }
+  } else if (shadowIsNull) {
+    SetShadowType(td, shadowData, accessSize);
+  }
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__tysan_set_shadow_type(void *ptr, tysan_type_descriptor *td,
+                        uint64_t accessSize) {
+  // In the mode where writes always set the type, for a write (which does
+  // not also read), we just set the type.
+  tysan_type_descriptor **shadow = shadow_for(ptr);
+  SetShadowType(td, shadow, accessSize);
+}
+
 Flags __tysan::flags_data;
 
 SANITIZER_INTERFACE_ATTRIBUTE uptr __tysan_shadow_memory_address;
diff --git a/compiler-rt/lib/tysan/tysan_platform.h b/compiler-rt/lib/tysan/tysan_platform.h
index f01392885d9398..19f77f0cace6be 100644
--- a/compiler-rt/lib/tysan/tysan_platform.h
+++ b/compiler-rt/lib/tysan/tysan_platform.h
@@ -21,24 +21,28 @@ struct Mapping {
   static const uptr kShadowAddr = 0x010000000000ull;
   static const uptr kAppAddr = 0x550000000000ull;
   static const uptr kAppMemMsk = ~0x780000000000ull;
+  static const uptr kPtrShift = 3;
 };
 #elif defined(__aarch64__)
 struct Mapping39 {
   static const uptr kShadowAddr = 0x0800000000ull;
   static const uptr kAppAddr = 0x5500000000ull;
   static const uptr kAppMemMsk = ~0x7800000000ull;
+  static const uptr kPtrShift = 3;
 };
 
 struct Mapping42 {
   static const uptr kShadowAddr = 0x10000000000ull;
   static const uptr kAppAddr = 0x2aa00000000ull;
   static const uptr kAppMemMsk = ~0x3c000000000ull;
+  static const uptr kPtrShift = 3;
 };
 
 struct Mapping48 {
   static const uptr kShadowAddr = 0x0002000000000ull;
   static const uptr kAppAddr = 0x0aaaa00000000ull;
   static const uptr kAppMemMsk = ~0x0fff800000000ull;
+  static const uptr kPtrShift = 3;
 };
 #define TYSAN_RUNTIME_VMA 1
 #else
@@ -49,7 +53,12 @@ struct Mapping48 {
 extern int vmaSize;
 #endif
 
-enum MappingType { MAPPING_SHADOW_ADDR, MAPPING_APP_ADDR, MAPPING_APP_MASK };
+enum MappingType {
+  MAPPING_SHADOW_ADDR,
+  MAPPING_APP_ADDR,
+  MAPPING_APP_MASK,
+  MAPPING_PTR_SHIFT
+};
 
 template <typename Mapping, int Type> uptr MappingImpl(void) {
   switch (Type) {
@@ -59,6 +68,8 @@ template <typename Mapping, int Type> uptr MappingImpl(void) {
     return Mapping::kAppAddr;
   case MAPPING_APP_MASK:
     return Mapping::kAppMemMsk;
+  case MAPPING_PTR_SHIFT:
+    return Mapping::kPtrShift;
   }
 }
 
@@ -88,6 +99,9 @@ uptr AppAddr() { return MappingArchImpl<MAPPING_APP_ADDR>(); }
 ALWAYS_INLINE
 uptr AppMask() { return MappingArchImpl<MAPPING_APP_MASK>(); }
 
+ALWAYS_INLINE
+uptr PtrShift() { return MappingArchImpl<MAPPING_PTR_SHIFT>(); }
+
 } // namespace __tysan
 
 #endif
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 19610958e47b72..d03adb2bcc2dc3 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -62,6 +62,12 @@ static cl::opt<bool>
                           cl::desc("Writes always set the type"), cl::Hidden,
                           cl::init(false));
 
+static cl::opt<bool> ClOutlineInstrumentation(
+    "tysan-outline-instrumentation",
+    cl::desc("Uses function calls for all TySan instrumentation, reducing "
+             "ELF size"),
+    cl::Hidden, cl::init(false));
+
 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
 
 namespace {
@@ -109,12 +115,16 @@ struct TypeSanitizer {
   Regex AnonNameRegex;
   Type *IntptrTy;
   uint64_t PtrShift;
-  IntegerType *OrdTy;
+  IntegerType *OrdTy, *U64Ty;
 
   /// Callbacks to run-time library are computed in initializeCallbacks.
   FunctionCallee TysanCheck;
   FunctionCallee TysanCtorFunction;
 
+  FunctionCallee TysanIntrumentMemInst;
+  FunctionCallee TysanInstrumentWithShadowUpdate;
+  FunctionCallee TysanSetShadowType;
+
   /// Callback to set types for gloabls.
   Function *TysanGlobalsSetTypeFunction;
 };
@@ -134,6 +144,8 @@ TypeSanitizer::TypeSanitizer(Module &M)
 void TypeSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
   OrdTy = IRB.getInt32Ty();
+  U64Ty = IRB.getInt64Ty();
+  Type *BoolType = IRB.getInt1Ty();
 
   AttributeList Attr;
   Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
@@ -148,6 +160,30 @@ void TypeSanitizer::initializeCallbacks(Module &M) {
 
   TysanCtorFunction =
       M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
+
+  TysanIntrumentMemInst = M.getOrInsertFunction(
+      "__tysan_instrument_mem_inst", Attr, IRB.getVoidTy(),
+      IRB.getPtrTy(), // Pointer of data to be written to
+      IRB.getPtrTy(), // Pointer of data to write
+      U64Ty,          // Size of the data in bytes
+      BoolType        // Do we need to call memmove
+  );
+
+  TysanInstrumentWithShadowUpdate = M.getOrInsertFunction(
+      "__tysan_instrument_with_shadow_update", Attr, IRB.getVoidTy(),
+      IRB.getPtrTy(), // Pointer to data to be read
+      IRB.getPtrTy(), // Pointer to type descriptor
+      BoolType,       // Do we need to type check this
+      U64Ty,          // Size of data we access in bytes
+      OrdTy           // Flags
+  );
+
+  TysanSetShadowType = M.getOrInsertFunction(
+      "__tysan_set_shadow_type", Attr, IRB.getVoidTy(),
+      IRB.getPtrTy(), // Pointer of data to be written to
+      IRB.getPtrTy(), // Pointer to the new type descriptor
+      U64Ty           // Size of data we access in bytes
+  );
 }
 
 void TypeSanitizer::instrumentGlobals(Module &M) {
@@ -593,6 +629,29 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
 
   Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
 
+  if (ClOutlineInstrumentation) {
+    if (!ForceSetType && (!ClWritesAlwaysSetType || IsRead)) {
+      // We need to check the type here. If the type is unknown, then the read
+      // sets the type. If the type is known, then it is checked. If the type
+      // doesn't match, then we call the runtime (which may yet determine that
+      // the mismatch is okay).
+
+      Constant *Flags =
+          ConstantInt::get(OrdTy, (int)IsRead | (((int)IsWrite) << 1));
+
+      IRB.CreateCall(TysanInstrumentWithShadowUpdate,
+                     {Ptr, TD,
+                      SanitizeFunction ? IRB.getTrue() : IRB.getFalse(),
+                      IRB.getInt64(AccessSize), Flags});
+    } else if (ForceSetType || IsWrite) {
+      // In the mode where writes always set the type, for a write (which does
+      // not also read), we just set the type.
+      IRB.CreateCall(TysanSetShadowType, {Ptr, TD, IRB.getInt64(AccessSize)});
+    }
+
+    return true;
+  }
+
   Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
                                                 ShadowBase, AppMemMask);
   Type *Int8PtrPtrTy = PointerType::get(IRB.getPtrTy(), 0);
@@ -840,37 +899,46 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
     }
   }
 
-  if (!ShadowBase)
-    ShadowBase = getShadowBase(*F);
-  if (!AppMemMask)
-    AppMemMask = getAppMemMask(*F);
+  if (ClOutlineInstrumentation) {
+    if (!Src) {
+      Src = ConstantPointerNull::get(IRB.getPtrTy());
+    }
+    IRB.CreateCall(
+        TysanIntrumentMemInst,
+        {Dest, Src, Size, NeedsMemMove ? IRB.getTrue() : IRB.getFalse()});
+  } else {
+    if (!ShadowBase)
+      ShadowBase = getShadowBase(*F);
+    if (!AppMemMask)
+      AppMemMask = getAppMemMask(*F);
+
+    Value *ShadowDataInt = IRB.CreateAdd(
+        IRB.CreateShl(
+            IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
+            PtrShift),
+        ShadowBase);
+    Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
+
+    if (!Src) {
+      IRB.CreateMemSet(ShadowData, IRB.getInt8(0),
+                       IRB.CreateShl(Size, PtrShift), Align(1ull << PtrShift));
+      return true;
+    }
 
-  Value *ShadowDataInt = IRB.CreateAdd(
-      IRB.CreateShl(
-          IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
-          PtrShift),
-      ShadowBase);
-  Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
-
-  if (!Src) {
-    IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
-                     Align(1ull << PtrShift));
-    return true;
-  }
+    Value *SrcShadowDataInt = IRB.CreateAdd(
+        IRB.CreateShl(
+            IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
+            PtrShift),
+        ShadowBase);
+    Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
 
-  Value *SrcShadowDataInt = IRB.CreateAdd(
-      IRB.CreateShl(
-          IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
-          PtrShift),
-      ShadowBase);
-  Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
-
-  if (NeedsMemMove) {
-    IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
-                      Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
-  } else {
-    IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
-                     Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+    if (NeedsMemMove) {
+      IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+                        Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+    } else {
+      IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
+                       Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
+    }
   }
 
   return true;
diff --git a/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
new file mode 100644
index 00000000000000..a690f2a197ca4c
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/basic_outlined.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; Test basic type sanitizer instrumentation.
+;
+; RUN: opt -passes='tysan-module,tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test_load(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT:    %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT:    call void @__tysan_instrument_with_shadow_update(ptr %a, ptr @__tysan_v1_int_o_0, i1 true, i64 4, i32 1)
+; CHECK-NEXT:    %tmp1 = load i32, ptr %a, align 4, !tbaa !0
+; CHECK-NEXT:    ret i32 %tmp1
+entry:
+  %tmp1 = load i32, ptr %a, align 4, !tbaa !3
+  ret i32 %tmp1
+}
+
+define void @test_store(ptr %a) sanitize_type {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT:    %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT:    call void @__tysan_instrument_with_shadow_update(ptr %a, ptr @__tysan_v1___ZTS1v_o_12, i1 true, i64 4, i32 2)
+; CHECK-NEXT:    store i32 42, ptr %a, align 4, !tbaa !4
+; CHECK-NEXT:    ret void
+
+entry:
+  store i32 42, ptr %a, align 4, !tbaa !6
+  ret void
+}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!3 = !{!2, !2, i64 0}
+!4 = !{!"_ZTS1x", !2, i64 0, !2, i64 4}
+!5 = !{!"_ZTS1v", !2, i64 8, !2, i64 12, !4, i64 16}
+!6 = !{!5, !2, i64 12}
diff --git a/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
new file mode 100644
index 00000000000000..f8450d123ea9f3
--- /dev/null
+++ b/llvm/test/Instrumentation/TypeSanitizer/globals_outlined.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs
+; RUN: opt -passes='tysan-module,tysan' -tysan-outline-instrumentation -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at global1 = global i32 0, align 4
+ at global2 = global i32 0, align 4
+
+
+; CHECK-LABEL: define internal void @__tysan_set_globals_types(
+; CHECK-NEXT:     %app.mem.mask = load i64, ptr @__tysan_app_memory_mask, align 8
+; CHECK-NEXT:     %shadow.base = load i64, ptr @__tysan_shadow_memory_address, align 8
+; CHECK-NEXT:     call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT:     call void @__tysan_set_shadow_type(ptr @global1, ptr @__tysan_v1_int, i64 4)
+; CHECK-NEXT:     ret void
+; CHECK-NEXT:   }
+
+!llvm.tysan.globals = !{!13, !14}
+
+!0 = !{!"Simple C++ TBAA"}
+!1 = !{!"omnipotent char", !0, i64 0}
+!2 = !{!"int", !1, i64 0}
+!13 = !{ptr @global1, !2}
+!14 = !{ptr @global1, !2}

``````````

</details>


https://github.com/llvm/llvm-project/pull/120582


More information about the llvm-commits mailing list