[llvm] 4e17358 - [DFSan] Add option for conditional callbacks.
Andrew Browne via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 5 15:08:23 PST 2022
Author: Andrew Browne
Date: 2022-01-05T15:07:09-08:00
New Revision: 4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b
URL: https://github.com/llvm/llvm-project/commit/4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b
DIFF: https://github.com/llvm/llvm-project/commit/4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b.diff
LOG: [DFSan] Add option for conditional callbacks.
This allows DFSan to find tainted values used to control program behavior.
Reviewed By: morehouse
Differential Revision: https://reviews.llvm.org/D116207
Added:
compiler-rt/test/dfsan/conditional_callbacks.c
compiler-rt/test/dfsan/conditional_callbacks_sig.c
Modified:
compiler-rt/include/sanitizer/dfsan_interface.h
compiler-rt/lib/dfsan/dfsan.cpp
compiler-rt/lib/dfsan/done_abilist.txt
compiler-rt/test/dfsan/Inputs/flags_abilist.txt
llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
Removed:
################################################################################
diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h
index bc0652c99a149..8e581a67572d3 100644
--- a/compiler-rt/include/sanitizer/dfsan_interface.h
+++ b/compiler-rt/include/sanitizer/dfsan_interface.h
@@ -27,6 +27,10 @@ typedef uint32_t dfsan_origin;
/// Signature of the callback argument to dfsan_set_write_callback().
typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
+/// Signature of the callback argument to dfsan_set_conditional_callback().
+typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
+ dfsan_origin origin);
+
/// Computes the union of \c l1 and \c l2, resulting in a union label.
dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
@@ -74,6 +78,19 @@ void dfsan_flush(void);
/// callback executes. Pass in NULL to remove any callback.
void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
+/// Sets a callback to be invoked on any conditional expressions which have a
+/// taint label set. This can be used to find where tainted data influences
+/// the behavior of the program.
+/// These callbacks will only be added when -dfsan-conditional-callbacks=true.
+void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
+
+/// Conditional expressions occur during signal handlers.
+/// Making callbacks that handle signals well is tricky, so when
+/// -dfsan-conditional-callbacks=true, conditional expressions used in signal
+/// handlers will add the labels they see into a global (bitwise-or together).
+/// This function returns all label bits seen in signal handler conditions.
+dfsan_label dfsan_get_labels_in_signal_conditional();
+
/// Interceptor hooks.
/// Whenever a dfsan's custom function is called the corresponding
/// hook is called it non-zero. The hooks should be defined by the user.
diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index ee7221c7b9a84..c8a3bdca06f8d 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -600,6 +600,60 @@ dfsan_has_label(dfsan_label label, dfsan_label elem) {
return (label & elem) == elem;
}
+namespace __dfsan {
+
+typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
+ dfsan_origin origin);
+static dfsan_conditional_callback_t conditional_callback = nullptr;
+static dfsan_label labels_in_signal_conditional = 0;
+
+static void ConditionalCallback(dfsan_label label, dfsan_origin origin) {
+ // Programs have many branches. For efficiency the conditional sink callback
+ // handler needs to ignore as many as possible as early as possible.
+ if (label == 0) {
+ return;
+ }
+ if (conditional_callback == nullptr) {
+ return;
+ }
+
+ // This initial ConditionalCallback handler needs to be in here in dfsan
+ // runtime (rather than being an entirely user implemented hook) so that it
+ // has access to dfsan thread information.
+ DFsanThread *t = GetCurrentThread();
+ // A callback operation which does useful work (like record the flow) will
+ // likely be too long executed in a signal handler.
+ if (t && t->InSignalHandler()) {
+ // Record set of labels used in signal handler for completeness.
+ labels_in_signal_conditional |= label;
+ return;
+ }
+
+ conditional_callback(label, origin);
+}
+
+} // namespace __dfsan
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) {
+ __dfsan::ConditionalCallback(label, origin);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback(
+ dfsan_label label) {
+ __dfsan::ConditionalCallback(label, 0);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback(
+ __dfsan::dfsan_conditional_callback_t callback) {
+ __dfsan::conditional_callback = callback;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
+dfsan_get_labels_in_signal_conditional() {
+ return __dfsan::labels_in_signal_conditional;
+}
+
class Decorator : public __sanitizer::SanitizerCommonDecorator {
public:
Decorator() : SanitizerCommonDecorator() {}
@@ -898,6 +952,7 @@ extern "C" void dfsan_flush() {
Die();
}
}
+ __dfsan::labels_in_signal_conditional = 0;
}
// TODO: CheckMemoryLayoutSanity is based on msan.
diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index fc2dd02ccf5f6..e8fcd83d13bf8 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -46,6 +46,10 @@ fun:dfsan_get_init_origin=uninstrumented
fun:dfsan_get_init_origin=discard
fun:dfsan_get_track_origins=uninstrumented
fun:dfsan_get_track_origins=discard
+fun:dfsan_set_conditional_callback=uninstrumented
+fun:dfsan_set_conditional_callback=discard
+fun:dfsan_get_labels_in_signal_conditional=uninstrumented
+fun:dfsan_get_labels_in_signal_conditional=discard
###############################################################################
# glibc
diff --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
index ce827dd7a642d..6245a419792b2 100644
--- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
+++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
@@ -7,3 +7,9 @@ fun:main=discard
fun:dfsan_set_label=uninstrumented
fun:dfsan_set_label=discard
+
+fun:my_dfsan_conditional_callback=uninstrumented
+fun:my_dfsan_conditional_callback=discard
+
+fun:dfsan_set_conditional_callback=uninstrumented
+fun:dfsan_set_conditional_callback=discard
diff --git a/compiler-rt/test/dfsan/conditional_callbacks.c b/compiler-rt/test/dfsan/conditional_callbacks.c
new file mode 100644
index 0000000000000..53d9f288e8429
--- /dev/null
+++ b/compiler-rt/test/dfsan/conditional_callbacks.c
@@ -0,0 +1,110 @@
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t
+// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s
+//
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGINS -c %s -o %t-callbacks-orig.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks -mllvm -dfsan-track-origins=1 -DORIGINS %s %t-callbacks-orig.o -o %t-orig
+// RUN: %run %t-orig FooBarBaz 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+
+// Tests that callbacks are inserted for conditionals when
+// -dfsan-conditional-callbacks is specified.
+
+#include <assert.h>
+#include <sanitizer/dfsan_interface.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef CALLBACKS
+// Compile this code without DFSan to avoid recursive instrumentation.
+
+extern dfsan_label LabelI;
+extern dfsan_label LabelJ;
+extern dfsan_label LabelIJ;
+
+void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) {
+ assert(Label != 0);
+#ifdef ORIGINS
+ assert(Origin != 0);
+#else
+ assert(Origin == 0);
+#endif
+
+ static int Count = 0;
+ switch (Count++) {
+ case 0:
+ assert(Label == LabelI);
+ break;
+ case 1:
+ assert(Label == LabelJ);
+ break;
+ case 2:
+ assert(Label == LabelIJ);
+ break;
+ default:
+ break;
+ }
+
+ fprintf(stderr, "Label %u used as condition\n", Label);
+}
+
+#else
+// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the
+// callbacks.
+
+dfsan_label LabelI;
+dfsan_label LabelJ;
+dfsan_label LabelIJ;
+
+extern void my_dfsan_conditional_callback(dfsan_label Label,
+ dfsan_origin Origin);
+
+int main(int Argc, char *Argv[]) {
+ assert(Argc == 2);
+
+ dfsan_set_conditional_callback(my_dfsan_conditional_callback);
+
+ int result = 0;
+ // Make these not look like constants, otherwise the branch we're expecting
+ // may be optimized out.
+ int DataI = (Argv[0][0] != 0) ? 1 : 0;
+ int DataJ = (Argv[1][0] != 0) ? 2 : 0;
+ LabelI = 1;
+ dfsan_set_label(LabelI, &DataI, sizeof(DataI));
+ LabelJ = 2;
+ dfsan_set_label(LabelJ, &DataJ, sizeof(DataJ));
+ LabelIJ = dfsan_union(LabelI, LabelJ);
+
+ assert(dfsan_get_label(DataI) == LabelI);
+
+ // CHECK: Label 1 used as condition
+ if (DataI) {
+ result = 42;
+ }
+
+ assert(dfsan_get_label(DataJ) == LabelJ);
+
+ // CHECK: Label 2 used as condition
+ switch (DataJ) {
+ case 1:
+ result += 10000;
+ break;
+ case 2:
+ result += 4200;
+ break;
+ default:
+ break;
+ }
+
+ int tainted_cond = ((DataI * DataJ) != 1);
+ assert(dfsan_get_label(tainted_cond) == LabelIJ);
+
+ // CHECK: Label 3 used as condition
+ result = tainted_cond ? result + 420000 : 9;
+
+ assert(result == 424242);
+ return 0;
+}
+
+#endif // #ifdef CALLBACKS
diff --git a/compiler-rt/test/dfsan/conditional_callbacks_sig.c b/compiler-rt/test/dfsan/conditional_callbacks_sig.c
new file mode 100644
index 0000000000000..174f2fe442a75
--- /dev/null
+++ b/compiler-rt/test/dfsan/conditional_callbacks_sig.c
@@ -0,0 +1,98 @@
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t
+// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+
+#include <assert.h>
+#include <sanitizer/dfsan_interface.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifdef CALLBACKS
+// Compile this code without DFSan to avoid recursive instrumentation.
+
+void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) {
+ assert(Label != 0);
+ assert(Origin == 0);
+
+ static int Count = 0;
+ switch (Count++) {
+ case 0:
+ assert(Label == 1);
+ break;
+ case 1:
+ assert(Label == 4);
+ break;
+ default:
+ break;
+ }
+
+ fprintf(stderr, "Label %u used as condition\n", Label);
+}
+
+#else
+// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the
+// callbacks.
+
+extern void my_dfsan_conditional_callback(dfsan_label Label,
+ dfsan_origin Origin);
+
+volatile int x = 0;
+volatile int y = 1;
+volatile int z = 0;
+
+void SignalHandler(int signo) {
+ assert(dfsan_get_label(x) == 0);
+ assert(dfsan_get_label(y) != 0);
+ assert(dfsan_get_label(z) != 0);
+ // Running the conditional callback from a signal handler is risky,
+ // because the code must be written with signal handler context in mind.
+ // Instead dfsan_get_labels_in_signal_conditional() will indicate labels
+ // used in conditions inside signal handlers.
+ // CHECK-NOT: Label 8 used as condition
+ if (z != 0) {
+ x = y;
+ }
+}
+
+int main(int Argc, char *Argv[]) {
+ assert(Argc >= 1);
+ int unknown = (Argv[0][0] != 0) ? 1 : 0;
+ dfsan_set_label(1, &unknown, sizeof(unknown));
+
+ dfsan_set_conditional_callback(my_dfsan_conditional_callback);
+
+ // CHECK: Label 1 used as condition
+ if (unknown) {
+ z = 42;
+ }
+
+ assert(dfsan_get_labels_in_signal_conditional() == 0);
+ dfsan_set_label(4, (void *)&y, sizeof(y));
+ dfsan_set_label(8, (void *)&z, sizeof(z));
+
+ struct sigaction sa = {};
+ sa.sa_handler = SignalHandler;
+ int r = sigaction(SIGHUP, &sa, NULL);
+ assert(dfsan_get_label(r) == 0);
+
+ kill(getpid(), SIGHUP);
+ signal(SIGHUP, SIG_DFL);
+
+ assert(dfsan_get_labels_in_signal_conditional() == 8);
+ assert(x == 1);
+ // CHECK: Label 4 used as condition
+ if (x != 0) {
+ z = 123;
+ }
+ // Flush should clear the conditional signals seen.
+ dfsan_flush();
+ assert(dfsan_get_labels_in_signal_conditional() == 0);
+ return 0;
+}
+
+#endif // #ifdef CALLBACKS
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 1e5688828d309..7c7d3ee13de77 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -208,6 +208,14 @@ static cl::opt<bool> ClEventCallbacks(
cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
cl::Hidden, cl::init(false));
+// Experimental feature that inserts callbacks for conditionals, including:
+// conditional branch, switch, select.
+// This must be true for dfsan_set_conditional_callback() to have effect.
+static cl::opt<bool> ClConditionalCallbacks(
+ "dfsan-conditional-callbacks",
+ cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
+ cl::init(false));
+
// Controls whether the pass tracks the control flow of select instructions.
static cl::opt<bool> ClTrackSelectControlFlow(
"dfsan-track-select-control-flow",
@@ -428,6 +436,8 @@ class DataFlowSanitizer {
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
FunctionType *DFSanVarargWrapperFnTy;
+ FunctionType *DFSanConditionalCallbackFnTy;
+ FunctionType *DFSanConditionalCallbackOriginFnTy;
FunctionType *DFSanCmpCallbackFnTy;
FunctionType *DFSanLoadStoreCallbackFnTy;
FunctionType *DFSanMemTransferCallbackFnTy;
@@ -444,6 +454,8 @@ class DataFlowSanitizer {
FunctionCallee DFSanLoadCallbackFn;
FunctionCallee DFSanStoreCallbackFn;
FunctionCallee DFSanMemTransferCallbackFn;
+ FunctionCallee DFSanConditionalCallbackFn;
+ FunctionCallee DFSanConditionalCallbackOriginFn;
FunctionCallee DFSanCmpCallbackFn;
FunctionCallee DFSanChainOriginFn;
FunctionCallee DFSanChainOriginIfTaintedFn;
@@ -642,6 +654,10 @@ struct DFSanFunction {
Align getShadowAlign(Align InstAlignment);
+ // If ClConditionalCallbacks is enabled, insert a callback after a given
+ // branch instruction using the given conditional expression.
+ void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
+
private:
/// Collapses the shadow with aggregate type into a single primitive shadow
/// value.
@@ -748,6 +764,8 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
void visitSelectInst(SelectInst &I);
void visitMemSetInst(MemSetInst &I);
void visitMemTransferInst(MemTransferInst &I);
+ void visitBranchInst(BranchInst &BR);
+ void visitSwitchInst(SwitchInst &SW);
private:
void visitCASOrRMW(Align InstAlignment, Instruction &I);
@@ -971,6 +989,22 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
return PrimitiveShadow;
}
+void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
+ Value *Condition) {
+ if (!ClConditionalCallbacks) {
+ return;
+ }
+ IRBuilder<> IRB(&I);
+ Value *CondShadow = getShadow(Condition);
+ if (DFS.shouldTrackOrigins()) {
+ Value *CondOrigin = getOrigin(Condition);
+ IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
+ {CondShadow, CondOrigin});
+ } else {
+ IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
+ }
+}
+
Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
if (!OrigTy->isSized())
return PrimitiveShadowTy;
@@ -1032,6 +1066,13 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
DFSanVarargWrapperFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ DFSanConditionalCallbackFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
+ /*isVarArg=*/false);
+ Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
+ DFSanConditionalCallbackOriginFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
+ /*isVarArg=*/false);
DFSanCmpCallbackFnTy =
FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
/*isVarArg=*/false);
@@ -1270,6 +1311,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
+ DFSanRuntimeFunctions.insert(
+ DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
+ DFSanRuntimeFunctions.insert(
+ DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
@@ -1292,6 +1337,12 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
"__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
DFSanCmpCallbackFn =
Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
+
+ DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
+ "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy);
+ DFSanConditionalCallbackOriginFn =
+ Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
+ DFSanConditionalCallbackOriginFnTy);
}
void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
@@ -2593,6 +2644,8 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
Value *FalseOrigin =
ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
+ DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
+
if (isa<VectorType>(I.getCondition()->getType())) {
ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
FalseShadow, &I);
@@ -2683,6 +2736,17 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
}
}
+void DFSanVisitor::visitBranchInst(BranchInst &BR) {
+ if (!BR.isConditional())
+ return;
+
+ DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
+}
+
+void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
+ DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
+}
+
static bool isAMustTailRetVal(Value *RetVal) {
// Tail call may have a bitcast between return.
if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
More information about the llvm-commits
mailing list