[compiler-rt] ddf5725 - [nsan] Emit calls to optimized functions (#98900)

Wed Jul 24 01:20:40 PDT 2024

Author: Dmitry Chestnykh
Date: 2024-07-24T11:20:36+03:00
New Revision: ddf5725ef180692b60962ae56e352a7af6fc5919

URL: https://github.com/llvm/llvm-project/commit/ddf5725ef180692b60962ae56e352a7af6fc5919
DIFF: https://github.com/llvm/llvm-project/commit/ddf5725ef180692b60962ae56e352a7af6fc5919.diff

LOG: [nsan] Emit calls to optimized functions (#98900)

As previously noted in nsan.cpp we can implement
optimized variants of `__nsan_copy_values` and
`__nsan_set_value_unknown` if a memory operation
size is known.
Now the instrumentation creates calls to optimized functions if there is
4, 8 or 16-byte memory operation like
`memset(X, value, 4/8/16)` or `memcpy(dst, src, 4/8/16)`
nsan.cpp provides definitions of the optimized functions.

Added: 
    

Modified: 
    compiler-rt/lib/nsan/nsan.cpp
    llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
    llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index 718242c2ecdf8..568a1b3a65575 100644

--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -54,8 +54,6 @@ using namespace __nsan;
 constexpr int kMaxVectorWidth = 8;
 
 // When copying application memory, we also copy its shadow and shadow type.
-// FIXME: We could provide fixed-size versions that would nicely
-// vectorize for known sizes.
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
 __nsan_copy_values(const u8 *daddr, const u8 *saddr, uptr size) {
   internal_memmove((void *)GetShadowTypeAddrFor(daddr),
@@ -64,13 +62,33 @@ __nsan_copy_values(const u8 *daddr, const u8 *saddr, uptr size) {
                    size * kShadowScale);
 }
 
-// FIXME: We could provide fixed-size versions that would nicely
-// vectorize for known sizes.
+#define NSAN_COPY_VALUES_N(N)                                                  \
+  extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_copy_##N(               \
+      const u8 *daddr, const u8 *saddr) {                                      \
+    __builtin_memmove((void *)GetShadowTypeAddrFor(daddr),                     \
+                      GetShadowTypeAddrFor(saddr), N);                         \
+    __builtin_memmove((void *)GetShadowAddrFor(daddr),                         \
+                      GetShadowAddrFor(saddr), N * kShadowScale);              \
+  }
+
+NSAN_COPY_VALUES_N(4)
+NSAN_COPY_VALUES_N(8)
+NSAN_COPY_VALUES_N(16)
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
 __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)GetShadowTypeAddrFor(addr), 0, size);
 }
 
+#define NSAN_SET_VALUE_UNKNOWN_N(N)                                            \
+  extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_set_value_unknown_##N(  \
+      const u8 *daddr) {                                                       \
+    __builtin_memset((void *)GetShadowTypeAddrFor(daddr), 0, N);               \
+  }
+
+NSAN_SET_VALUE_UNKNOWN_N(4)
+NSAN_SET_VALUE_UNKNOWN_N(8)
+NSAN_SET_VALUE_UNKNOWN_N(16)
 
 const char *FTInfo<float>::kCppTypeName = "float";
 const char *FTInfo<double>::kCppTypeName = "double";

diff  --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index 99b1c779f3167..0b54f571f5508 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -493,6 +493,60 @@ class ValueToShadowMap {
   DenseMap<Value *, Value *> Map;
 };
 
+class NsanMemOpFn {
+public:
+  NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized, StringRef Fallback,
+              size_t NumArgs);
+  FunctionCallee getFunctionFor(uint64_t MemOpSize) const;
+  FunctionCallee getFallback() const;
+
+private:
+  SmallVector<FunctionCallee> Funcs;
+  size_t NumSizedFuncs;
+};
+
+NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
+                         StringRef Fallback, size_t NumArgs) {
+  LLVMContext &Ctx = M.getContext();
+  AttributeList Attr;
+  Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
+  Type *PtrTy = PointerType::getUnqual(Ctx);
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  IntegerType *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
+  FunctionType *SizedFnTy = nullptr;
+
+  NumSizedFuncs = Sized.size();
+
+  // First entry is fallback function
+  if (NumArgs == 3) {
+    Funcs.push_back(
+        M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, PtrTy, IntptrTy));
+    SizedFnTy = FunctionType::get(VoidTy, {PtrTy, PtrTy}, false);
+  } else if (NumArgs == 2) {
+    Funcs.push_back(
+        M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, IntptrTy));
+    SizedFnTy = FunctionType::get(VoidTy, {PtrTy}, false);
+  } else {
+    assert(!"Unexpected value of sized functions arguments");
+  }
+
+  for (size_t i = 0; i < NumSizedFuncs; ++i)
+    Funcs.push_back(M.getOrInsertFunction(Sized[i], SizedFnTy, Attr));
+}
+
+FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const {
+  // Now `getFunctionFor` operates on `Funcs` of size 4 (at least) and the
+  // following code assumes that the number of functions in `Func` is sufficient
+  assert(NumSizedFuncs >= 3 && "Unexpected number of sized functions");
+
+  size_t Idx =
+      MemOpSize == 4 ? 1 : (MemOpSize == 8 ? 2 : (MemOpSize == 16 ? 3 : 0));
+
+  return Funcs[Idx];
+}
+
+FunctionCallee NsanMemOpFn::getFallback() const { return Funcs[0]; }
+
 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
 /// API function declarations into the module if they don't exist already.
 /// Instantiating ensures the __nsan_init function is in the list of global
@@ -550,12 +604,16 @@ class NumericalStabilitySanitizer {
   LLVMContext &Context;
   MappingConfig Config;
   IntegerType *IntptrTy = nullptr;
+
+  // TODO: Use std::array instead?
   FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
   FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
   FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
   FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
-  FunctionCallee NsanCopyValues;
-  FunctionCallee NsanSetValueUnknown;
+
+  NsanMemOpFn NsanCopyFns;
+  NsanMemOpFn NsanSetUnknownFns;
+
   FunctionCallee NsanGetRawShadowTypePtr;
   FunctionCallee NsanGetRawShadowPtr;
   GlobalValue *NsanShadowRetTag = nullptr;
@@ -598,7 +656,14 @@ static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
 }
 
 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
-    : DL(M.getDataLayout()), Context(M.getContext()), Config(Context) {
+    : DL(M.getDataLayout()), Context(M.getContext()), Config(Context),
+      NsanCopyFns(M, {"__nsan_copy_4", "__nsan_copy_8", "__nsan_copy_16"},
+                  "__nsan_copy_values", /*NumArgs=*/3),
+      NsanSetUnknownFns(M,
+                        {"__nsan_set_value_unknown_4",
+                         "__nsan_set_value_unknown_8",
+                         "__nsan_set_value_unknown_16"},
+                        "__nsan_set_value_unknown", /*NumArgs=*/2) {
   IntptrTy = DL.getIntPtrType(Context);
   Type *PtrTy = PointerType::getUnqual(Context);
   Type *Int32Ty = Type::getInt32Ty(Context);
@@ -634,11 +699,6 @@ NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
         Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
   }
 
-  NsanCopyValues = M.getOrInsertFunction("__nsan_copy_values", Attr, VoidTy,
-                                         PtrTy, PtrTy, IntptrTy);
-  NsanSetValueUnknown = M.getOrInsertFunction("__nsan_set_value_unknown", Attr,
-                                              VoidTy, PtrTy, IntptrTy);
-
   // TODO: Add attributes nofree, nosync, readnone, readonly,
   NsanGetRawShadowTypePtr = M.getOrInsertFunction(
       "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
@@ -1880,7 +1940,7 @@ void NumericalStabilitySanitizer::propagateNonFTStore(
     }
   }
   // All other stores just reset the shadow value to unknown.
-  Builder.CreateCall(NsanSetValueUnknown, {Dst, ValueSize});
+  Builder.CreateCall(NsanSetUnknownFns.getFallback(), {Dst, ValueSize});
 }
 
 void NumericalStabilitySanitizer::propagateShadowValues(
@@ -2123,21 +2183,45 @@ bool NumericalStabilitySanitizer::sanitizeFunction(
   return !ValueToShadow.empty();
 }
 
+static uint64_t GetMemOpSize(Value *V) {
+  uint64_t OpSize = 0;
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    auto *CInt = dyn_cast<ConstantInt>(C);
+    if (CInt && CInt->getValue().getBitWidth() <= 64)
+      OpSize = CInt->getValue().getZExtValue();
+  }
+
+  return OpSize;
+}
+
 // Instrument the memory intrinsics so that they properly modify the shadow
 // memory.
 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
   IRBuilder<> Builder(MI);
   if (auto *M = dyn_cast<MemSetInst>(MI)) {
-    Builder.CreateCall(
-        NsanSetValueUnknown,
-        {/*Address=*/M->getArgOperand(0),
-         /*Size=*/Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+    FunctionCallee SetUnknownFn =
+        NsanSetUnknownFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
+    if (SetUnknownFn.getFunctionType()->getNumParams() == 1)
+      Builder.CreateCall(SetUnknownFn, {/*Address=*/M->getArgOperand(0)});
+    else
+      Builder.CreateCall(SetUnknownFn,
+                         {/*Address=*/M->getArgOperand(0),
+                          /*Size=*/Builder.CreateIntCast(M->getArgOperand(2),
+                                                         IntptrTy, false)});
+
   } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
-    Builder.CreateCall(
-        NsanCopyValues,
-        {/*Destination=*/M->getArgOperand(0),
-         /*Source=*/M->getArgOperand(1),
-         /*Size=*/Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+    FunctionCallee CopyFn =
+        NsanCopyFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
+
+    if (CopyFn.getFunctionType()->getNumParams() == 2)
+      Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
+                                  /*Source=*/M->getArgOperand(1)});
+    else
+      Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
+                                  /*Source=*/M->getArgOperand(1),
+                                  /*Size=*/
+                                  Builder.CreateIntCast(M->getArgOperand(2),
+                                                        IntptrTy, false)});
   }
   return false;
 }

diff  --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
index 3fe78d8b19b0a..31f32d7bd8df5 100644
--- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll
@@ -7,15 +7,24 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
 
-define void @call_memcpy_intrinsic(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numerical_stability {
-; CHECK-LABEL: @call_memcpy_intrinsic(
+define void @call_memcpy_intrinsics(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numerical_stability {
+; CHECK-LABEL: @call_memcpy_intrinsics(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @__nsan_copy_values(ptr [[A:%.*]], ptr [[B:%.*]], i64 16)
+; CHECK-NEXT:    call void @__nsan_copy_4(ptr [[A:%.*]], ptr [[B:%.*]])
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 4, i1 false)
+; CHECK-NEXT:    call void @__nsan_copy_8(ptr [[A:%.*]], ptr [[B:%.*]])
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 8, i1 false)
+; CHECK-NEXT:    call void @__nsan_copy_16(ptr [[A:%.*]], ptr [[B:%.*]])
 ; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 16, i1 false)
+; CHECK-NEXT:    call void @__nsan_copy_values(ptr [[A:%.*]], ptr [[B:%.*]], i64 15)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], ptr nonnull align 8 dereferenceable(16) [[B]], i64 15, i1 false)
 ; CHECK-NEXT:    ret void
 ;
 entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 8, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(ptr nonnull align 8 dereferenceable(16) %a, ptr nonnull align 8 dereferenceable(16) %b, i64 15, i1 false)
   ret void
 }
 
@@ -32,6 +41,26 @@ entry:
   ret void
 }
 
+define void @call_memset_intrinsics(i8* nonnull align 8 dereferenceable(16) %a) sanitize_numerical_stability {
+; CHECK-LABEL: @call_memset_intrinsics(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @__nsan_set_value_unknown_4(ptr [[A:%.*]])
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    call void @__nsan_set_value_unknown_8(ptr [[A:%.*]])
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @__nsan_set_value_unknown_16(ptr [[A:%.*]])
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    call void @__nsan_set_value_unknown(ptr [[A:%.*]], i64 15)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) [[A]], i8 0, i64 15, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) %a, i8 0, i64 4, i1 false)
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) %a, i8 0, i64 8, i1 false)
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) %a, i8 0, i64 16, i1 false)
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(16) %a, i8 0, i64 15, i1 false)
+  ret void
+}
 
 define void @transfer_float(float* %dst, float* %src) sanitize_numerical_stability {
 ; CHECK-LABEL: @transfer_float(