[llvm] 4e17358 - [DFSan] Add option for conditional callbacks.

Andrew Browne via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 5 15:08:23 PST 2022


Author: Andrew Browne
Date: 2022-01-05T15:07:09-08:00
New Revision: 4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b

URL: https://github.com/llvm/llvm-project/commit/4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b
DIFF: https://github.com/llvm/llvm-project/commit/4e173585f6cf1c55b4ed450bce1a9ae11ecbbb2b.diff

LOG: [DFSan] Add option for conditional callbacks.

This allows DFSan to find tainted values used to control program behavior.

Reviewed By: morehouse

Differential Revision: https://reviews.llvm.org/D116207

Added: 
    compiler-rt/test/dfsan/conditional_callbacks.c
    compiler-rt/test/dfsan/conditional_callbacks_sig.c

Modified: 
    compiler-rt/include/sanitizer/dfsan_interface.h
    compiler-rt/lib/dfsan/dfsan.cpp
    compiler-rt/lib/dfsan/done_abilist.txt
    compiler-rt/test/dfsan/Inputs/flags_abilist.txt
    llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h
index bc0652c99a149..8e581a67572d3 100644
--- a/compiler-rt/include/sanitizer/dfsan_interface.h
+++ b/compiler-rt/include/sanitizer/dfsan_interface.h
@@ -27,6 +27,10 @@ typedef uint32_t dfsan_origin;
 /// Signature of the callback argument to dfsan_set_write_callback().
 typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
 
+/// Signature of the callback argument to dfsan_set_conditional_callback().
+typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
+                                             dfsan_origin origin);
+
 /// Computes the union of \c l1 and \c l2, resulting in a union label.
 dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
 
@@ -74,6 +78,19 @@ void dfsan_flush(void);
 /// callback executes.  Pass in NULL to remove any callback.
 void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
 
+/// Sets a callback to be invoked on any conditional expressions which have a
+/// taint label set. This can be used to find where tainted data influences
+/// the behavior of the program.
+/// These callbacks will only be added when -dfsan-conditional-callbacks=true.
+void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
+
+/// Conditional expressions occur during signal handlers.
+/// Making callbacks that handle signals well is tricky, so when
+/// -dfsan-conditional-callbacks=true, conditional expressions used in signal
+/// handlers will add the labels they see into a global (bitwise-or together).
+/// This function returns all label bits seen in signal handler conditions.
+dfsan_label dfsan_get_labels_in_signal_conditional();
+
 /// Interceptor hooks.
 /// Whenever a dfsan's custom function is called the corresponding
 /// hook is called it non-zero. The hooks should be defined by the user.

diff  --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index ee7221c7b9a84..c8a3bdca06f8d 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -600,6 +600,60 @@ dfsan_has_label(dfsan_label label, dfsan_label elem) {
   return (label & elem) == elem;
 }
 
+namespace __dfsan {
+
+typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
+                                             dfsan_origin origin);
+static dfsan_conditional_callback_t conditional_callback = nullptr;
+static dfsan_label labels_in_signal_conditional = 0;
+
+static void ConditionalCallback(dfsan_label label, dfsan_origin origin) {
+  // Programs have many branches. For efficiency the conditional sink callback
+  // handler needs to ignore as many as possible as early as possible.
+  if (label == 0) {
+    return;
+  }
+  if (conditional_callback == nullptr) {
+    return;
+  }
+
+  // This initial ConditionalCallback handler needs to be in here in dfsan
+  // runtime (rather than being an entirely user implemented hook) so that it
+  // has access to dfsan thread information.
+  DFsanThread *t = GetCurrentThread();
+  // A callback operation which does useful work (like record the flow) will
+  // likely be too long executed in a signal handler.
+  if (t && t->InSignalHandler()) {
+    // Record set of labels used in signal handler for completeness.
+    labels_in_signal_conditional |= label;
+    return;
+  }
+
+  conditional_callback(label, origin);
+}
+
+}  // namespace __dfsan
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
+__dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) {
+  __dfsan::ConditionalCallback(label, origin);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback(
+    dfsan_label label) {
+  __dfsan::ConditionalCallback(label, 0);
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback(
+    __dfsan::dfsan_conditional_callback_t callback) {
+  __dfsan::conditional_callback = callback;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
+dfsan_get_labels_in_signal_conditional() {
+  return __dfsan::labels_in_signal_conditional;
+}
+
 class Decorator : public __sanitizer::SanitizerCommonDecorator {
  public:
   Decorator() : SanitizerCommonDecorator() {}
@@ -898,6 +952,7 @@ extern "C" void dfsan_flush() {
       Die();
     }
   }
+  __dfsan::labels_in_signal_conditional = 0;
 }
 
 // TODO: CheckMemoryLayoutSanity is based on msan.

diff  --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index fc2dd02ccf5f6..e8fcd83d13bf8 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -46,6 +46,10 @@ fun:dfsan_get_init_origin=uninstrumented
 fun:dfsan_get_init_origin=discard
 fun:dfsan_get_track_origins=uninstrumented
 fun:dfsan_get_track_origins=discard
+fun:dfsan_set_conditional_callback=uninstrumented
+fun:dfsan_set_conditional_callback=discard
+fun:dfsan_get_labels_in_signal_conditional=uninstrumented
+fun:dfsan_get_labels_in_signal_conditional=discard
 
 ###############################################################################
 # glibc

diff  --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
index ce827dd7a642d..6245a419792b2 100644
--- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
+++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt
@@ -7,3 +7,9 @@ fun:main=discard
 
 fun:dfsan_set_label=uninstrumented
 fun:dfsan_set_label=discard
+
+fun:my_dfsan_conditional_callback=uninstrumented
+fun:my_dfsan_conditional_callback=discard
+
+fun:dfsan_set_conditional_callback=uninstrumented
+fun:dfsan_set_conditional_callback=discard

diff  --git a/compiler-rt/test/dfsan/conditional_callbacks.c b/compiler-rt/test/dfsan/conditional_callbacks.c
new file mode 100644
index 0000000000000..53d9f288e8429
--- /dev/null
+++ b/compiler-rt/test/dfsan/conditional_callbacks.c
@@ -0,0 +1,110 @@
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t
+// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s
+//
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -DORIGINS -c %s -o %t-callbacks-orig.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks -mllvm -dfsan-track-origins=1 -DORIGINS %s %t-callbacks-orig.o -o %t-orig
+// RUN: %run %t-orig FooBarBaz 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+
+// Tests that callbacks are inserted for conditionals when
+// -dfsan-conditional-callbacks is specified.
+
+#include <assert.h>
+#include <sanitizer/dfsan_interface.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef CALLBACKS
+// Compile this code without DFSan to avoid recursive instrumentation.
+
+extern dfsan_label LabelI;
+extern dfsan_label LabelJ;
+extern dfsan_label LabelIJ;
+
+void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) {
+  assert(Label != 0);
+#ifdef ORIGINS
+  assert(Origin != 0);
+#else
+  assert(Origin == 0);
+#endif
+
+  static int Count = 0;
+  switch (Count++) {
+  case 0:
+    assert(Label == LabelI);
+    break;
+  case 1:
+    assert(Label == LabelJ);
+    break;
+  case 2:
+    assert(Label == LabelIJ);
+    break;
+  default:
+    break;
+  }
+
+  fprintf(stderr, "Label %u used as condition\n", Label);
+}
+
+#else
+// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the
+// callbacks.
+
+dfsan_label LabelI;
+dfsan_label LabelJ;
+dfsan_label LabelIJ;
+
+extern void my_dfsan_conditional_callback(dfsan_label Label,
+                                          dfsan_origin Origin);
+
+int main(int Argc, char *Argv[]) {
+  assert(Argc == 2);
+
+  dfsan_set_conditional_callback(my_dfsan_conditional_callback);
+
+  int result = 0;
+  // Make these not look like constants, otherwise the branch we're expecting
+  // may be optimized out.
+  int DataI = (Argv[0][0] != 0) ? 1 : 0;
+  int DataJ = (Argv[1][0] != 0) ? 2 : 0;
+  LabelI = 1;
+  dfsan_set_label(LabelI, &DataI, sizeof(DataI));
+  LabelJ = 2;
+  dfsan_set_label(LabelJ, &DataJ, sizeof(DataJ));
+  LabelIJ = dfsan_union(LabelI, LabelJ);
+
+  assert(dfsan_get_label(DataI) == LabelI);
+
+  // CHECK: Label 1 used as condition
+  if (DataI) {
+    result = 42;
+  }
+
+  assert(dfsan_get_label(DataJ) == LabelJ);
+
+  // CHECK: Label 2 used as condition
+  switch (DataJ) {
+  case 1:
+    result += 10000;
+    break;
+  case 2:
+    result += 4200;
+    break;
+  default:
+    break;
+  }
+
+  int tainted_cond = ((DataI * DataJ) != 1);
+  assert(dfsan_get_label(tainted_cond) == LabelIJ);
+
+  // CHECK: Label 3 used as condition
+  result = tainted_cond ? result + 420000 : 9;
+
+  assert(result == 424242);
+  return 0;
+}
+
+#endif // #ifdef CALLBACKS

diff  --git a/compiler-rt/test/dfsan/conditional_callbacks_sig.c b/compiler-rt/test/dfsan/conditional_callbacks_sig.c
new file mode 100644
index 0000000000000..174f2fe442a75
--- /dev/null
+++ b/compiler-rt/test/dfsan/conditional_callbacks_sig.c
@@ -0,0 +1,98 @@
+// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o
+// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t
+// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+
+#include <assert.h>
+#include <sanitizer/dfsan_interface.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifdef CALLBACKS
+// Compile this code without DFSan to avoid recursive instrumentation.
+
+void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) {
+  assert(Label != 0);
+  assert(Origin == 0);
+
+  static int Count = 0;
+  switch (Count++) {
+  case 0:
+    assert(Label == 1);
+    break;
+  case 1:
+    assert(Label == 4);
+    break;
+  default:
+    break;
+  }
+
+  fprintf(stderr, "Label %u used as condition\n", Label);
+}
+
+#else
+// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the
+// callbacks.
+
+extern void my_dfsan_conditional_callback(dfsan_label Label,
+                                          dfsan_origin Origin);
+
+volatile int x = 0;
+volatile int y = 1;
+volatile int z = 0;
+
+void SignalHandler(int signo) {
+  assert(dfsan_get_label(x) == 0);
+  assert(dfsan_get_label(y) != 0);
+  assert(dfsan_get_label(z) != 0);
+  // Running the conditional callback from a signal handler is risky,
+  // because the code must be written with signal handler context in mind.
+  // Instead dfsan_get_labels_in_signal_conditional() will indicate labels
+  // used in conditions inside signal handlers.
+  // CHECK-NOT: Label 8 used as condition
+  if (z != 0) {
+    x = y;
+  }
+}
+
+int main(int Argc, char *Argv[]) {
+  assert(Argc >= 1);
+  int unknown = (Argv[0][0] != 0) ? 1 : 0;
+  dfsan_set_label(1, &unknown, sizeof(unknown));
+
+  dfsan_set_conditional_callback(my_dfsan_conditional_callback);
+
+  // CHECK: Label 1 used as condition
+  if (unknown) {
+    z = 42;
+  }
+
+  assert(dfsan_get_labels_in_signal_conditional() == 0);
+  dfsan_set_label(4, (void *)&y, sizeof(y));
+  dfsan_set_label(8, (void *)&z, sizeof(z));
+
+  struct sigaction sa = {};
+  sa.sa_handler = SignalHandler;
+  int r = sigaction(SIGHUP, &sa, NULL);
+  assert(dfsan_get_label(r) == 0);
+
+  kill(getpid(), SIGHUP);
+  signal(SIGHUP, SIG_DFL);
+
+  assert(dfsan_get_labels_in_signal_conditional() == 8);
+  assert(x == 1);
+  // CHECK: Label 4 used as condition
+  if (x != 0) {
+    z = 123;
+  }
+  // Flush should clear the conditional signals seen.
+  dfsan_flush();
+  assert(dfsan_get_labels_in_signal_conditional() == 0);
+  return 0;
+}
+
+#endif // #ifdef CALLBACKS

diff  --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 1e5688828d309..7c7d3ee13de77 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -208,6 +208,14 @@ static cl::opt<bool> ClEventCallbacks(
     cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
     cl::Hidden, cl::init(false));
 
+// Experimental feature that inserts callbacks for conditionals, including:
+// conditional branch, switch, select.
+// This must be true for dfsan_set_conditional_callback() to have effect.
+static cl::opt<bool> ClConditionalCallbacks(
+    "dfsan-conditional-callbacks",
+    cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
+    cl::init(false));
+
 // Controls whether the pass tracks the control flow of select instructions.
 static cl::opt<bool> ClTrackSelectControlFlow(
     "dfsan-track-select-control-flow",
@@ -428,6 +436,8 @@ class DataFlowSanitizer {
   FunctionType *DFSanSetLabelFnTy;
   FunctionType *DFSanNonzeroLabelFnTy;
   FunctionType *DFSanVarargWrapperFnTy;
+  FunctionType *DFSanConditionalCallbackFnTy;
+  FunctionType *DFSanConditionalCallbackOriginFnTy;
   FunctionType *DFSanCmpCallbackFnTy;
   FunctionType *DFSanLoadStoreCallbackFnTy;
   FunctionType *DFSanMemTransferCallbackFnTy;
@@ -444,6 +454,8 @@ class DataFlowSanitizer {
   FunctionCallee DFSanLoadCallbackFn;
   FunctionCallee DFSanStoreCallbackFn;
   FunctionCallee DFSanMemTransferCallbackFn;
+  FunctionCallee DFSanConditionalCallbackFn;
+  FunctionCallee DFSanConditionalCallbackOriginFn;
   FunctionCallee DFSanCmpCallbackFn;
   FunctionCallee DFSanChainOriginFn;
   FunctionCallee DFSanChainOriginIfTaintedFn;
@@ -642,6 +654,10 @@ struct DFSanFunction {
 
   Align getShadowAlign(Align InstAlignment);
 
+  // If ClConditionalCallbacks is enabled, insert a callback after a given
+  // branch instruction using the given conditional expression.
+  void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
+
 private:
   /// Collapses the shadow with aggregate type into a single primitive shadow
   /// value.
@@ -748,6 +764,8 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
   void visitSelectInst(SelectInst &I);
   void visitMemSetInst(MemSetInst &I);
   void visitMemTransferInst(MemTransferInst &I);
+  void visitBranchInst(BranchInst &BR);
+  void visitSwitchInst(SwitchInst &SW);
 
 private:
   void visitCASOrRMW(Align InstAlignment, Instruction &I);
@@ -971,6 +989,22 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
   return PrimitiveShadow;
 }
 
+void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
+                                                     Value *Condition) {
+  if (!ClConditionalCallbacks) {
+    return;
+  }
+  IRBuilder<> IRB(&I);
+  Value *CondShadow = getShadow(Condition);
+  if (DFS.shouldTrackOrigins()) {
+    Value *CondOrigin = getOrigin(Condition);
+    IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
+                   {CondShadow, CondOrigin});
+  } else {
+    IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
+  }
+}
+
 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
   if (!OrigTy->isSized())
     return PrimitiveShadowTy;
@@ -1032,6 +1066,13 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
       FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
   DFSanVarargWrapperFnTy = FunctionType::get(
       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+  DFSanConditionalCallbackFnTy =
+      FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
+                        /*isVarArg=*/false);
+  Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
+  DFSanConditionalCallbackOriginFnTy = FunctionType::get(
+      Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
+      /*isVarArg=*/false);
   DFSanCmpCallbackFnTy =
       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
                         /*isVarArg=*/false);
@@ -1270,6 +1311,10 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
       DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
   DFSanRuntimeFunctions.insert(
       DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
+  DFSanRuntimeFunctions.insert(
+      DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
+  DFSanRuntimeFunctions.insert(
+      DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
   DFSanRuntimeFunctions.insert(
       DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
   DFSanRuntimeFunctions.insert(
@@ -1292,6 +1337,12 @@ void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
       "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
   DFSanCmpCallbackFn =
       Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
+
+  DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
+      "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy);
+  DFSanConditionalCallbackOriginFn =
+      Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
+                               DFSanConditionalCallbackOriginFnTy);
 }
 
 void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
@@ -2593,6 +2644,8 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
   Value *FalseOrigin =
       ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
 
+  DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
+
   if (isa<VectorType>(I.getCondition()->getType())) {
     ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
                                                FalseShadow, &I);
@@ -2683,6 +2736,17 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
   }
 }
 
+void DFSanVisitor::visitBranchInst(BranchInst &BR) {
+  if (!BR.isConditional())
+    return;
+
+  DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
+}
+
+void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
+  DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
+}
+
 static bool isAMustTailRetVal(Value *RetVal) {
   // Tail call may have a bitcast between return.
   if (auto *I = dyn_cast<BitCastInst>(RetVal)) {


        


More information about the llvm-commits mailing list