[llvm] 5bb06c7 - [DFSan] Add callback that allows to track which function tainted data reaches.
Andrew Browne via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 11 23:10:38 PST 2022
Author: Andrew Browne
Date: 2022-12-11T23:10:17-08:00
New Revision: 5bb06c7cce6bdcffb3ced29fa733c0dbb1b63c41
URL: https://github.com/llvm/llvm-project/commit/5bb06c7cce6bdcffb3ced29fa733c0dbb1b63c41
DIFF: https://github.com/llvm/llvm-project/commit/5bb06c7cce6bdcffb3ced29fa733c0dbb1b63c41.diff
LOG: [DFSan] Add callback that allows to track which function tainted data reaches.
Authored-by: Christopher Liebchen <liebchen at google.com>
Co-authored-by: Andrew Browne <browneee at google.com>
Reviewed By: browneee
Differential Revision: https://reviews.llvm.org/D139543
Added:
compiler-rt/test/dfsan/reaches_function.c
llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll
Modified:
compiler-rt/include/sanitizer/dfsan_interface.h
compiler-rt/lib/dfsan/dfsan.cpp
compiler-rt/lib/dfsan/done_abilist.txt
compiler-rt/test/dfsan/Inputs/flags_abilist.txt
llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
Removed:
################################################################################
diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h
index 8e581a67572d3..519bfffa9a20b 100644
--- a/compiler-rt/include/sanitizer/dfsan_interface.h
+++ b/compiler-rt/include/sanitizer/dfsan_interface.h
@@ -31,6 +31,14 @@ typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
dfsan_origin origin);
+/// Signature of the callback argument to dfsan_set_reaches_function_callback().
+/// The description is intended to hold the name of the variable.
+typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
+ dfsan_origin origin,
+ const char *file,
+ unsigned int line,
+ const char *function);
+
/// Computes the union of \c l1 and \c l2, resulting in a union label.
dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
@@ -91,6 +99,18 @@ void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
/// This function returns all label bits seen in signal handler conditions.
dfsan_label dfsan_get_labels_in_signal_conditional();
+/// Sets a callback to be invoked when tainted data reaches a function.
+/// This could occur at function entry, or at a load instruction.
+/// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
+void dfsan_set_reaches_function_callback(
+ dfsan_reaches_function_callback_t callback);
+
+/// Making callbacks that handle signals well is tricky, so when
+/// -dfsan-reaches-function-callbacks=true, functions reached in signal
+/// handlers will add the labels they see into a global (bitwise-or together).
+/// This function returns all label bits seen during signal handlers.
+dfsan_label dfsan_get_labels_in_signal_reaches_function();
+
/// Interceptor hooks.
/// Whenever a dfsan's custom function is called the corresponding
/// hook is called it non-zero. The hooks should be defined by the user.
diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 0a6f319a9313c..faf5a6619c26c 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -718,6 +718,67 @@ dfsan_get_labels_in_signal_conditional() {
return __dfsan::labels_in_signal_conditional;
}
+namespace __dfsan {
+
+typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
+ dfsan_origin origin,
+ const char *file,
+ unsigned int line,
+ const char *function);
+static dfsan_reaches_function_callback_t reaches_function_callback = nullptr;
+static dfsan_label labels_in_signal_reaches_function = 0;
+
+static void ReachesFunctionCallback(dfsan_label label, dfsan_origin origin,
+ const char *file, unsigned int line,
+ const char *function) {
+ if (label == 0) {
+ return;
+ }
+ if (reaches_function_callback == nullptr) {
+ return;
+ }
+
+ // This initial ReachesFunctionCallback handler needs to be in here in dfsan
+ // runtime (rather than being an entirely user implemented hook) so that it
+ // has access to dfsan thread information.
+ DFsanThread *t = GetCurrentThread();
+ // A callback operation which does useful work (like record the flow) will
+ // likely be too long executed in a signal handler.
+ if (t && t->InSignalHandler()) {
+ // Record set of labels used in signal handler for completeness.
+ labels_in_signal_reaches_function |= label;
+ return;
+ }
+
+ reaches_function_callback(label, origin, file, line, function);
+}
+
+} // namespace __dfsan
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__dfsan_reaches_function_callback_origin(dfsan_label label, dfsan_origin origin,
+ const char *file, unsigned int line,
+ const char *function) {
+ __dfsan::ReachesFunctionCallback(label, origin, file, line, function);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__dfsan_reaches_function_callback(dfsan_label label, const char *file,
+ unsigned int line, const char *function) {
+ __dfsan::ReachesFunctionCallback(label, 0, file, line, function);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+dfsan_set_reaches_function_callback(
+ __dfsan::dfsan_reaches_function_callback_t callback) {
+ __dfsan::reaches_function_callback = callback;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
+dfsan_get_labels_in_signal_reaches_function() {
+ return __dfsan::labels_in_signal_reaches_function;
+}
+
class Decorator : public __sanitizer::SanitizerCommonDecorator {
public:
Decorator() : SanitizerCommonDecorator() {}
@@ -1031,6 +1092,7 @@ extern "C" void dfsan_flush() {
}
}
__dfsan::labels_in_signal_conditional = 0;
+ __dfsan::labels_in_signal_reaches_function = 0;
}
// TODO: CheckMemoryLayoutSanity is based on msan.
diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index e8fcd83d13bf8..ff8a37fbf426f 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -50,6 +50,12 @@ fun:dfsan_set_conditional_callback=uninstrumented
fun:dfsan_set_conditional_callback=discard
fun:dfsan_get_labels_in_signal_conditional=uninstrumented
fun:dfsan_get_labels_in_signal_conditional=discard
+fun:dfsan_set_reaches_function_callback=uninstrumented
+fun:dfsan_set_reaches_function_callback=discard
+fun:dfsan_get_labels_in_signal_reaches_function=uninstrumented
+fun:dfsan_get_labels_in_signal_reaches_function=discard
+fun:dfsan_reaches_function_callback=uninstrumented
+fun:dfsan_reaches_function_callback=discard
###############################################################################
# glibc
diff --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
index 6245a419792b2..f0dff9b372b59 100644
--- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
+++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
@@ -13,3 +13,9 @@ fun:my_dfsan_conditional_callback=discard
fun:dfsan_set_conditional_callback=uninstrumented
fun:dfsan_set_conditional_callback=discard
+
+fun:my_dfsan_reaches_function_callback=uninstrumented
+fun:my_dfsan_reaches_function_callback=discard
+
+fun:dfsan_set_reaches_function_callback=uninstrumented
+fun:dfsan_set_reaches_function_callback=discard
diff --git a/compiler-rt/test/dfsan/reaches_function.c b/compiler-rt/test/dfsan/reaches_function.c
new file mode 100644
index 0000000000000..46a2b7b383815
--- /dev/null
+++ b/compiler-rt/test/dfsan/reaches_function.c
@@ -0,0 +1,67 @@
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 %s %t-callbacks.o -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGIN_TRACKING -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -gmlt -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-reaches-function-callbacks=1 -mllvm -dfsan-track-origins=2 %s %t-callbacks.o -o %t
+// RUN: %run %t 2>&1 | FileCheck --check-prefix=CHECK-ORIGIN-TRACKING %s
+
+// REQUIRES: x86_64-target-arch
+
+// Tests that callbacks are inserted for reached functions when
+// -dfsan-reaches-function-callbacks is specified.
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <sanitizer/dfsan_interface.h>
+
+#ifdef CALLBACKS
+// Compile this code without DFSan to avoid recursive instrumentation.
+
+void my_dfsan_reaches_function_callback(dfsan_label label, dfsan_origin origin,
+ const char *file, unsigned int line,
+ const char *function) {
+#ifdef ORIGIN_TRACKING
+ dfsan_print_origin_id_trace(origin);
+#else
+ printf("%s:%d %s\n", file, line, function);
+#endif
+}
+
+#else
+
+__attribute__((noinline)) uint64_t add(uint64_t *a, uint64_t *b) {
+
+ return *a + *b;
+ // CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] add.dfsan
+ // CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at
+ // CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 3]]:{{.*}}
+ // CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at
+ // CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}}
+}
+
+extern void my_dfsan_reaches_function_callback(dfsan_label label,
+ dfsan_origin origin,
+ const char *file,
+ unsigned int line,
+ const char *function);
+
+int main(int argc, char *argv[]) {
+
+ dfsan_set_reaches_function_callback(my_dfsan_reaches_function_callback);
+
+ uint64_t a = 0;
+ uint64_t b = 0;
+
+ dfsan_set_label(8, &a, sizeof(a));
+ uint64_t c = add(&a, &b);
+ // CHECK: {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 1]] main
+ // CHECK-ORIGIN-TRACKING: Origin value: 0x10000002, Taint value was stored to memory at
+ // CHECK-ORIGIN-TRACKING: #0 {{.*}} in add.dfsan {{.*}}compiler-rt/test/dfsan/reaches_function.c:{{.*}}
+ // CHECK-ORIGIN-TRACKING: Origin value: 0x1, Taint value was created at
+ // CHECK-ORIGIN-TRACKING: #0 {{.*}} in main {{.*}}compiler-rt/test/dfsan/reaches_function.c:[[# @LINE - 6]]:{{.*}}
+ return c;
+}
+
+#endif // #ifdef CALLBACKS
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index fe2a5028eb2a3..b62f15014bf62 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -223,6 +223,14 @@ static cl::opt<bool> ClConditionalCallbacks(
cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
cl::init(false));
+// Experimental feature that inserts callbacks for data reaching a function,
+// either via function arguments and loads.
+// This must be true for dfsan_set_reaches_function_callback() to have effect.
+static cl::opt<bool> ClReachesFunctionCallbacks(
+ "dfsan-reaches-function-callbacks",
+ cl::desc("Insert calls to callback functions on data reaching a function."),
+ cl::Hidden, cl::init(false));
+
// Controls whether the pass tracks the control flow of select instructions.
static cl::opt<bool> ClTrackSelectControlFlow(
"dfsan-track-select-control-flow",
@@ -446,6 +454,8 @@ class DataFlowSanitizer {
FunctionType *DFSanVarargWrapperFnTy;
FunctionType *DFSanConditionalCallbackFnTy;
FunctionType *DFSanConditionalCallbackOriginFnTy;
+ FunctionType *DFSanReachesFunctionCallbackFnTy;
+ FunctionType *DFSanReachesFunctionCallbackOriginFnTy;
FunctionType *DFSanCmpCallbackFnTy;
FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy;
@@ -467,6 +477,8 @@ class DataFlowSanitizer {
FunctionCallee DFSanMemTransferCallbackFn;
FunctionCallee DFSanConditionalCallbackFn;
FunctionCallee DFSanConditionalCallbackOriginFn;
+ FunctionCallee DFSanReachesFunctionCallbackFn;
+ FunctionCallee DFSanReachesFunctionCallbackOriginFn;
FunctionCallee DFSanCmpCallbackFn;
FunctionCallee DFSanChainOriginFn;
FunctionCallee DFSanChainOriginIfTaintedFn;
@@ -673,6 +685,11 @@ struct DFSanFunction {
// branch instruction using the given conditional expression.
void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
+ // If ClReachesFunctionCallbacks is enabled, insert a callback for each
+ // argument and load instruction.
+ void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,
+ Value *Data);
+
bool isLookupTableConstant(Value *P);
private:
@@ -1025,6 +1042,45 @@ void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
}
}
+void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
+ Instruction &I,
+ Value *Data) {
+ if (!ClReachesFunctionCallbacks) {
+ return;
+ }
+ const DebugLoc &dbgloc = I.getDebugLoc();
+ Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);
+ ConstantInt *CILine;
+ llvm::Value *FilePathPtr;
+
+ if (dbgloc.get() == nullptr) {
+ CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0, false));
+ FilePathPtr = IRB.CreateGlobalStringPtr(
+ I.getFunction()->getParent()->getSourceFileName());
+ } else {
+ CILine = llvm::ConstantInt::get(I.getContext(),
+ llvm::APInt(32, dbgloc.getLine(), false));
+ FilePathPtr =
+ IRB.CreateGlobalStringPtr(dbgloc->getFilename());
+ }
+
+ llvm::Value *FunctionNamePtr =
+ IRB.CreateGlobalStringPtr(I.getFunction()->getName());
+
+ CallInst *CB;
+ std::vector<Value *> args;
+
+ if (DFS.shouldTrackOrigins()) {
+ Value *DataOrigin = getOrigin(Data);
+ args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };
+ CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);
+ } else {
+ args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };
+ CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);
+ }
+ CB->setDebugLoc(dbgloc);
+}
+
Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
if (!OrigTy->isSized())
return PrimitiveShadowTy;
@@ -1097,6 +1153,16 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
DFSanConditionalCallbackOriginFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
/*isVarArg=*/false);
+ Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,
+ OriginTy, Int8Ptr};
+ DFSanReachesFunctionCallbackFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,
+ /*isVarArg=*/false);
+ Type *DFSanReachesFunctionCallbackOriginArgs[5] = {
+ PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};
+ DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,
+ /*isVarArg=*/false);
DFSanCmpCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
/*isVarArg=*/false);
@@ -1324,6 +1390,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
+ DFSanRuntimeFunctions.insert(
+ DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());
+ DFSanRuntimeFunctions.insert(
+ DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
@@ -1357,6 +1427,11 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
DFSanConditionalCallbackOriginFn =
Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
DFSanConditionalCallbackOriginFnTy);
+ DFSanReachesFunctionCallbackFn = Mod->getOrInsertFunction(
+ "__dfsan_reaches_function_callback", DFSanReachesFunctionCallbackFnTy);
+ DFSanReachesFunctionCallbackOriginFn =
+ Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
+ DFSanReachesFunctionCallbackOriginFnTy);
}
void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
@@ -1585,6 +1660,31 @@ bool DataFlowSanitizer::runImpl(
DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
FnsWithForceZeroLabel.count(F), GetTLI(*F));
+ if (ClReachesFunctionCallbacks) {
+ // Add callback for arguments reaching this function.
+ for (auto &FArg : F->args()) {
+ Instruction *Next = &F->getEntryBlock().front();
+ Value *FArgShadow = DFSF.getShadow(&FArg);
+ if (isZeroShadow(FArgShadow))
+ continue;
+ if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {
+ Next = FArgShadowInst->getNextNode();
+ }
+ if (shouldTrackOrigins()) {
+ if (Instruction *Origin =
+ dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {
+ // Ensure IRB insertion point is after loads for shadow and origin.
+ Instruction *OriginNext = Origin->getNextNode();
+ if (Next->comesBefore(OriginNext)) {
+ Next = OriginNext;
+ }
+ }
+ }
+ IRBuilder<> IRB(Next);
+ DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);
+ }
+ }
+
// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
@@ -2267,6 +2367,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
if (LI.isAtomic())
LI.setOrdering(addAcquireOrdering(LI.getOrdering()));
+ Instruction *AfterLi = LI.getNextNode();
Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
std::vector<Value *> Shadows;
std::vector<Value *> Origins;
@@ -2304,6 +2405,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
}
+
+ IRBuilder<> IRB(AfterLi);
+ DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);
}
Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
@@ -3303,6 +3407,8 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
DFSF.SkipInsts.insert(LI);
DFSF.setOrigin(&CB, LI);
}
+
+ DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);
}
}
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll b/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll
new file mode 100644
index 0000000000000..654696833a671
--- /dev/null
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/reaches_function.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -passes=dfsan -dfsan-reaches-function-callbacks=1 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f()
+
+define void @load(i32) {
+ ; CHECK-LABEL: define void @load.dfsan
+ ; CHECK: call{{.*}}@__dfsan_reaches_function_callback
+ %i = alloca i32
+ store i32 %0, ptr %i
+ ret void
+}
+
+define void @store(ptr) {
+ ; CHECK-LABEL: define void @store.dfsan
+ ; CHECK: call{{.*}}@__dfsan_reaches_function_callback
+ %load = load i32, ptr %0
+ ret void
+}
+
+define void @call() {
+ ; CHECK-LABEL: define void @call.dfsan
+ ; CHECK: call{{.*}}@__dfsan_reaches_function_callback
+ %ret = call i32 @f()
+ ret void
+}
+
+; CHECK-LABEL: @__dfsan_reaches_function_callback(i8, ptr, i32, ptr)
More information about the llvm-commits
mailing list