[compiler-rt] 5748219 - [DFSan] Add dfsan-combine-taint-lookup-table option as work around for

Andrew Browne via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 5 11:07:20 PDT 2022


Author: Andrew Browne
Date: 2022-04-05T11:05:10-07:00
New Revision: 5748219fd2ce57f81e7a8880302bab69744fb004

URL: https://github.com/llvm/llvm-project/commit/5748219fd2ce57f81e7a8880302bab69744fb004
DIFF: https://github.com/llvm/llvm-project/commit/5748219fd2ce57f81e7a8880302bab69744fb004.diff

LOG: [DFSan] Add dfsan-combine-taint-lookup-table option as work around for
false negatives when dfsan-combine-pointer-labels-on-load=0 and
dfsan-combine-offset-labels-on-gep=0 miss data flows through lookup tables.

Example case:
https://github.com/abseil/abseil-cpp/blob/628a2825f8dc0219964886e7cc3f7f519e3bd950/absl/strings/ascii.h#L182

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D122787

Added: 
    compiler-rt/test/dfsan/lookup_table.c
    llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/test/dfsan/lookup_table.c b/compiler-rt/test/dfsan/lookup_table.c
new file mode 100644
index 0000000000000..76d38c117984a
--- /dev/null
+++ b/compiler-rt/test/dfsan/lookup_table.c
@@ -0,0 +1,61 @@
+// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -mllvm -dfsan-combine-taint-lookup-table=remap_to_upper -DLOOKUP_TABLE -o %t && %run %t
+// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -mllvm -dfsan-combine-taint-lookup-table=no_match -o %t && %run %t
+// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -o %t && %run %t
+//
+// REQUIRES: x86_64-target-arch
+
+#include <sanitizer/dfsan_interface.h>
+#include <assert.h>
+
+const char remap_to_upper[256] = {
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', 'A', 'B',
+    'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
+    'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+    'U', 'V', 'W', 'X', 'Y', 'Z', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.', '.', '.', '.', '.', '.',
+    '.', '.', '.', '.',
+};
+
+char character_mapping(unsigned char c) {
+  return remap_to_upper[c];
+}
+
+int main(void) {
+  char a = 'b';
+  dfsan_label i_label = 1;
+  dfsan_set_label(i_label, &a, sizeof(a));
+  assert(dfsan_read_label(&a, sizeof(a)) == i_label);
+
+  char b = character_mapping(a);
+  assert(b == 'B');
+
+#ifdef LOOKUP_TABLE
+  assert(dfsan_read_label(&b, sizeof(b)) == i_label);
+#else
+  assert(dfsan_read_label(&b, sizeof(b)) == 0);
+#endif
+  return 0;
+}

diff  --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 909864bb4d3a4..5b9e778363870 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -67,6 +67,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -183,6 +184,15 @@ static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
         "doing pointer arithmetic."),
     cl::Hidden, cl::init(true));
 
+static cl::list<std::string> ClCombineTaintLookupTables(
+    "dfsan-combine-taint-lookup-table",
+    cl::desc(
+        "When dfsan-combine-offset-labels-on-gep and/or "
+        "dfsan-combine-pointer-labels-on-load are false, this flag can "
+        "be used to re-enable combining offset and/or pointer taint when "
+        "loading specific constant global variables (i.e. lookup tables)."),
+    cl::Hidden);
+
 static cl::opt<bool> ClDebugNonzeroLabels(
     "dfsan-debug-nonzero-labels",
     cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
@@ -463,6 +473,7 @@ class DataFlowSanitizer {
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
   AttributeMask ReadOnlyNoneAttrs;
+  StringSet<> CombineTaintLookupTableNames;
 
   /// Memory map parameters used in calculation mapping application addresses
   /// to shadow addresses and origin addresses.
@@ -652,6 +663,8 @@ struct DFSanFunction {
   // branch instruction using the given conditional expression.
   void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
 
+  bool isLookupTableConstant(Value *P);
+
 private:
   /// Collapses the shadow with aggregate type into a single primitive shadow
   /// value.
@@ -786,6 +799,9 @@ DataFlowSanitizer::DataFlowSanitizer(
   // FIXME: should we propagate vfs::FileSystem to this constructor?
   ABIList.set(
       SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
+
+  for (StringRef v : ClCombineTaintLookupTables)
+    CombineTaintLookupTableNames.insert(v);
 }
 
 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
@@ -1831,6 +1847,14 @@ Align DFSanFunction::getOriginAlign(Align InstAlignment) {
   return Align(std::max(MinOriginAlignment, Alignment));
 }
 
+bool DFSanFunction::isLookupTableConstant(Value *P) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
+    if (GV->isConstant() && GV->hasName())
+      return DFS.CombineTaintLookupTableNames.count(GV->getName());
+
+  return false;
+}
+
 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
                                                   Align InstAlignment) {
   // When enabling tracking load instructions, we always use
@@ -2084,6 +2108,29 @@ static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
   llvm_unreachable("Unknown ordering");
 }
 
+Value *StripPointerGEPsAndCasts(Value *V) {
+  if (!V->getType()->isPointerTy())
+    return V;
+
+  // DFSan pass should be running on valid IR, but we'll
+  // keep a seen set to ensure there are no issues.
+  SmallPtrSet<const Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (auto *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+      if (!V->getType()->isPointerTy())
+        return V;
+    } else if (isa<GlobalAlias>(V)) {
+      V = cast<GlobalAlias>(V)->getAliasee();
+    }
+  } while (Visited.insert(V).second);
+
+  return V;
+}
+
 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
   auto &DL = LI.getModule()->getDataLayout();
   uint64_t Size = DL.getTypeStoreSize(LI.getType());
@@ -2112,7 +2159,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
     Shadows.push_back(PrimitiveShadow);
     Origins.push_back(Origin);
   }
-  if (ClCombinePointerLabelsOnLoad) {
+  if (ClCombinePointerLabelsOnLoad ||
+      DFSF.isLookupTableConstant(
+          StripPointerGEPsAndCasts(LI.getPointerOperand()))) {
     Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
     PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
     if (ShouldTrackOrigins) {
@@ -2474,7 +2523,9 @@ void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
 }
 
 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
-  if (ClCombineOffsetLabelsOnGEP) {
+  if (ClCombineOffsetLabelsOnGEP ||
+      DFSF.isLookupTableConstant(
+          StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) {
     visitInstOperands(GEPI);
     return;
   }

diff  --git a/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll b/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll
new file mode 100644
index 0000000000000..789594c21994f
--- /dev/null
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=false -dfsan-combine-offset-labels-on-gep=false -dfsan-combine-taint-lookup-table=lookup_table_a -S | FileCheck %s --check-prefixes=CHECK,LOOKUP_A
+; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=false -dfsan-combine-offset-labels-on-gep=false -S | FileCheck %s --check-prefixes=CHECK,NO_LOOKUP_A
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
+; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]]
+; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
+; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
+
+ at lookup_table_a = external local_unnamed_addr constant [256 x i8], align 16
+ at lookup_table_b = external local_unnamed_addr constant [256 x i8], align 16
+
+define i8 @load_lookup_table_a(i8 %p) {
+  ; CHECK-LABEL:           @load_lookup_table_a.dfsan
+  ; CHECK-NEXT:            %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN:2]]
+  ; CHECK-NEXT:            %c = zext i8 %p to i64
+  ; CHECK-NEXT:            %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_a, i64 0, i64 %c
+  ; CHECK-NEXT:            %a = load i8, i8* %b, align 1
+  ; Propagates p shadow when lookup_table_a flag is provided, otherwise propagates 0 shadow
+  ; LOOKUP_A-NEXT:         store i[[#SBITS]] %[[#PS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]]
+  ; NO_LOOKUP_A-NEXT:      store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]]
+  ; CHECK-NEXT:            ret i8 %a
+
+  %c = zext i8 %p to i64
+  %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_a, i64 0, i64 %c
+  %a = load i8, i8* %b
+  ret i8 %a
+}
+
+define i8 @load_lookup_table_b(i8 %p) {
+  ; CHECK-LABEL:           @load_lookup_table_b.dfsan
+  ; CHECK-NEXT:            %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align 2
+  ; CHECK-NEXT:            %c = zext i8 %p to i64
+  ; CHECK-NEXT:            %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_b, i64 0, i64 %c
+  ; CHECK-NEXT:            %a = load i8, i8* %b, align 1
+  ; Propagates 0 shadow
+  ; CHECK-NEXT:            store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]]
+  ; CHECK-NEXT:            ret i8 %a
+
+  %c = zext i8 %p to i64
+  %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_b, i64 0, i64 %c
+  %a = load i8, i8* %b, align 1
+  ret i8 %a
+}


        


More information about the llvm-commits mailing list