[compiler-rt] 2d9c6e1 - [dfsan] Add origin chain utils
Jianzhou Zhao via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 11 11:10:40 PST 2021
Author: Jianzhou Zhao
Date: 2021-02-11T19:10:11Z
New Revision: 2d9c6e10e92e983fb3ad497f29bea697a94c935a
URL: https://github.com/llvm/llvm-project/commit/2d9c6e10e92e983fb3ad497f29bea697a94c935a
DIFF: https://github.com/llvm/llvm-project/commit/2d9c6e10e92e983fb3ad497f29bea697a94c935a.diff
LOG: [dfsan] Add origin chain utils
This is a part of https://reviews.llvm.org/D95835.
The design is based on MSan origin chains.
An 4-byte origin is a hash of an origin chain. An origin chain is a
pair of a stack hash id and a hash to its previous origin chain. 0 means
no previous origin chains exist. We limit the length of a chain to be
16. With origin_history_size = 0, the limit is removed.
The change does not have any test cases yet. The following change
will be adding test cases when the APIs are used.
Reviewed-by: morehouse
Differential Revision: https://reviews.llvm.org/D96160
Added:
compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp
compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h
compiler-rt/lib/dfsan/dfsan_flags.h
compiler-rt/lib/dfsan/dfsan_origin.h
Modified:
compiler-rt/lib/dfsan/CMakeLists.txt
compiler-rt/lib/dfsan/dfsan.cpp
compiler-rt/lib/dfsan/dfsan.h
compiler-rt/lib/dfsan/dfsan_flags.inc
Removed:
################################################################################
diff --git a/compiler-rt/lib/dfsan/CMakeLists.txt b/compiler-rt/lib/dfsan/CMakeLists.txt
index b4e589bed504..fa6823c9c00c 100644
--- a/compiler-rt/lib/dfsan/CMakeLists.txt
+++ b/compiler-rt/lib/dfsan/CMakeLists.txt
@@ -3,6 +3,7 @@ include_directories(..)
# Runtime library sources and build flags.
set(DFSAN_RTL_SOURCES
dfsan.cpp
+ dfsan_chained_origin_depot.cpp
dfsan_custom.cpp
dfsan_interceptors.cpp
dfsan_thread.cpp
@@ -10,7 +11,9 @@ set(DFSAN_RTL_SOURCES
set(DFSAN_RTL_HEADERS
dfsan.h
+ dfsan_chained_origin_depot.h
dfsan_flags.inc
+ dfsan_flags.h
dfsan_platform.h
dfsan_thread.h
)
diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 31c176c9ef2a..4f02c49ebe63 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -20,6 +20,9 @@
#include "dfsan/dfsan.h"
+#include "dfsan/dfsan_chained_origin_depot.h"
+#include "dfsan/dfsan_flags.h"
+#include "dfsan/dfsan_origin.h"
#include "dfsan/dfsan_thread.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
@@ -282,6 +285,46 @@ dfsan_label dfsan_create_label(const char *desc, void *userdata) {
return label;
}
+// For platforms which support slow unwinder only, we need to restrict the store
+// context size to 1, basically only storing the current pc, because the slow
+// unwinder which is based on libunwind is not async signal safe and causes
+// random freezes in forking applications as well as in signal handlers.
+// DFSan supports only Linux. So we do not restrict the store context size.
+#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
+ BufferedStackTrace stack; \
+ stack.Unwind(pc, bp, nullptr, true, flags().store_context_size);
+
+#define PRINT_CALLER_STACK_TRACE \
+ { \
+ GET_CALLER_PC_BP_SP; \
+ (void)sp; \
+ GET_STORE_STACK_TRACE_PC_BP(pc, bp) \
+ stack.Print(); \
+ }
+
+static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) {
+ // StackDepot is not async signal safe. Do not create new chains in a signal
+ // handler.
+ DFsanThread *t = GetCurrentThread();
+ if (t && t->InSignalHandler())
+ return id;
+
+ // As an optimization the origin of an application byte is updated only when
+ // its shadow is non-zero. Because we are only interested in the origins of
+ // taint labels, it does not matter what origin a zero label has. This reduces
+ // memory write cost. MSan does similar optimization. The following invariant
+ // may not hold because of some bugs. We check the invariant to help debug.
+ if (!from_init && id == 0 && flags().check_origin_invariant) {
+ Printf(" DFSan found invalid origin invariant\n");
+ PRINT_CALLER_STACK_TRACE
+ }
+
+ Origin o = Origin::FromRawId(id);
+ stack->tag = StackTrace::TAG_UNKNOWN;
+ Origin chained = Origin::CreateChainedOrigin(o, stack);
+ return chained.raw_id();
+}
+
static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,
uptr size) {
dfsan_label *labelp = (dfsan_label *)shadow_addr;
diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h
index e03716f662e5..62eda73e966f 100644
--- a/compiler-rt/lib/dfsan/dfsan.h
+++ b/compiler-rt/lib/dfsan/dfsan.h
@@ -15,6 +15,8 @@
#define DFSAN_H
#include "sanitizer_common/sanitizer_internal_defs.h"
+
+#include "dfsan_flags.h"
#include "dfsan_platform.h"
using __sanitizer::uptr;
@@ -58,19 +60,6 @@ inline const dfsan_label *shadow_for(const void *ptr) {
return shadow_for(const_cast<void *>(ptr));
}
-struct Flags {
-#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
-#include "dfsan_flags.inc"
-#undef DFSAN_FLAG
-
- void SetDefaults();
-};
-
-extern Flags flags_data;
-inline Flags &flags() {
- return flags_data;
-}
-
} // namespace __dfsan
#endif // DFSAN_H
diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp
new file mode 100644
index 000000000000..9ec598bf2ce9
--- /dev/null
+++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp
@@ -0,0 +1,22 @@
+//===-- dfsan_chained_origin_depot.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of DataFlowSanitizer.
+//
+// A storage for chained origins.
+//===----------------------------------------------------------------------===//
+
+#include "dfsan_chained_origin_depot.h"
+
+namespace __dfsan {
+
+static ChainedOriginDepot chainedOriginDepot;
+
+ChainedOriginDepot* GetChainedOriginDepot() { return &chainedOriginDepot; }
+
+} // namespace __dfsan
diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h
new file mode 100644
index 000000000000..d715ef707f41
--- /dev/null
+++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h
@@ -0,0 +1,26 @@
+//===-- dfsan_chained_origin_depot.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of DataFlowSanitizer.
+//
+// A storage for chained origins.
+//===----------------------------------------------------------------------===//
+
+#ifndef DFSAN_CHAINED_ORIGIN_DEPOT_H
+#define DFSAN_CHAINED_ORIGIN_DEPOT_H
+
+#include "sanitizer_common/sanitizer_chained_origin_depot.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __dfsan {
+
+ChainedOriginDepot* GetChainedOriginDepot();
+
+} // namespace __dfsan
+
+#endif // DFSAN_CHAINED_ORIGIN_DEPOT_H
diff --git a/compiler-rt/lib/dfsan/dfsan_flags.h b/compiler-rt/lib/dfsan/dfsan_flags.h
new file mode 100644
index 000000000000..ec7edf6112a9
--- /dev/null
+++ b/compiler-rt/lib/dfsan/dfsan_flags.h
@@ -0,0 +1,32 @@
+//===-- dfsan_flags.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of DataFlowSanitizer.
+//
+// DFSan flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef DFSAN_FLAGS_H
+#define DFSAN_FLAGS_H
+
+namespace __dfsan {
+
+struct Flags {
+#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "dfsan_flags.inc"
+#undef DFSAN_FLAG
+
+ void SetDefaults();
+};
+
+extern Flags flags_data;
+inline Flags &flags() { return flags_data; }
+
+} // namespace __dfsan
+
+#endif // DFSAN_FLAGS_H
diff --git a/compiler-rt/lib/dfsan/dfsan_flags.inc b/compiler-rt/lib/dfsan/dfsan_flags.inc
index cdd0035c9b2d..4ac4ef736e45 100644
--- a/compiler-rt/lib/dfsan/dfsan_flags.inc
+++ b/compiler-rt/lib/dfsan/dfsan_flags.inc
@@ -29,3 +29,14 @@ DFSAN_FLAG(
DFSAN_FLAG(const char *, dump_labels_at_exit, "", "The path of the file where "
"to dump the labels when the "
"program terminates.")
+DFSAN_FLAG(
+ int, origin_history_size, Origin::kMaxDepth,
+ "The limit of origin chain length. Non-positive values mean unlimited.")
+DFSAN_FLAG(
+ int, origin_history_per_stack_limit, 20000,
+ "The limit of origin node's references count. "
+ "Non-positive values mean unlimited.")
+DFSAN_FLAG(int, store_context_size, 20,
+ "The depth limit of origin tracking stack traces.")
+DFSAN_FLAG(bool, check_origin_invariant, false,
+ "Whether to check if the origin invariant holds.")
diff --git a/compiler-rt/lib/dfsan/dfsan_origin.h b/compiler-rt/lib/dfsan/dfsan_origin.h
new file mode 100644
index 000000000000..89fd7f99599f
--- /dev/null
+++ b/compiler-rt/lib/dfsan/dfsan_origin.h
@@ -0,0 +1,127 @@
+//===-- dfsan_origin.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of DataFlowSanitizer.
+//
+// Origin id utils.
+//===----------------------------------------------------------------------===//
+
+#ifndef DFSAN_ORIGIN_H
+#define DFSAN_ORIGIN_H
+
+#include "dfsan_chained_origin_depot.h"
+#include "dfsan_flags.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+namespace __dfsan {
+
+// Origin handling.
+//
+// Origin is a 32-bit identifier that is attached to any taint value in the
+// program and describes how this memory came to be tainted.
+//
+// Chained origin id is like:
+// zzzz xxxx xxxx xxxx
+//
+// Chained origin id describes an event of storing a taint value to
+// memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of
+// (stack_id, prev_id) -> id, where
+// * stack_id describes the event.
+// StackDepot keeps a mapping between those and corresponding stack traces.
+// * prev_id is another origin id that describes the earlier part of the
+// taint value history. 0 prev_id indicates the start of a chain.
+// Following a chain of prev_id provides the full recorded history of a taint
+// value.
+//
+// This, effectively, defines a forest where nodes are points in value history
+// marked with origin ids, and edges are events that are marked with stack_id.
+//
+// The "zzzz" bits of chained origin id are used to store the length of the
+// origin chain.
+
+class Origin {
+ public:
+ static bool isValidId(u32 id) { return id != 0; }
+
+ u32 raw_id() const { return raw_id_; }
+
+ bool isChainedOrigin() const { return Origin::isValidId(raw_id_); }
+
+ u32 getChainedId() const {
+ CHECK(Origin::isValidId(raw_id_));
+ return raw_id_ & kChainedIdMask;
+ }
+
+ // Returns the next origin in the chain and the current stack trace.
+ //
+ // It scans a partition of StackDepot linearly, and is used only by origin
+ // tracking report.
+ Origin getNextChainedOrigin(StackTrace *stack) const {
+ CHECK(Origin::isValidId(raw_id_));
+ u32 prev_id;
+ u32 stack_id = GetChainedOriginDepot()->Get(getChainedId(), &prev_id);
+ if (stack)
+ *stack = StackDepotGet(stack_id);
+ return Origin(prev_id);
+ }
+
+ static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) {
+ int depth = prev.isChainedOrigin() ? prev.depth() : -1;
+ // depth is the length of the chain minus 1.
+ // origin_history_size of 0 means unlimited depth.
+ if (flags().origin_history_size > 0) {
+ ++depth;
+ if (depth >= flags().origin_history_size || depth > kMaxDepth)
+ return prev;
+ }
+
+ StackDepotHandle h = StackDepotPut_WithHandle(*stack);
+ if (!h.valid())
+ return prev;
+
+ if (flags().origin_history_per_stack_limit > 0) {
+ int use_count = h.use_count();
+ if (use_count > flags().origin_history_per_stack_limit)
+ return prev;
+ }
+
+ u32 chained_id;
+ bool inserted =
+ GetChainedOriginDepot()->Put(h.id(), prev.raw_id(), &chained_id);
+ CHECK((chained_id & kChainedIdMask) == chained_id);
+
+ if (inserted && flags().origin_history_per_stack_limit > 0)
+ h.inc_use_count_unsafe();
+
+ return Origin((depth << kDepthShift) | chained_id);
+ }
+
+ static Origin FromRawId(u32 id) { return Origin(id); }
+
+ private:
+ static const int kDepthBits = 4;
+ static const int kDepthShift = 32 - kDepthBits;
+
+ static const u32 kChainedIdMask = ((u32)-1) >> kDepthBits;
+
+ u32 raw_id_;
+
+ explicit Origin(u32 raw_id) : raw_id_(raw_id) {}
+
+ int depth() const {
+ CHECK(isChainedOrigin());
+ return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1);
+ }
+
+ public:
+ static const int kMaxDepth = (1 << kDepthBits) - 1;
+};
+
+} // namespace __dfsan
+
+#endif // DFSAN_ORIGIN_H
More information about the llvm-commits
mailing list