[compiler-rt] r363358 - Revert r363326 "[libFuzzer] simplify the DFT trace collection using the new faster DFSan mode that traces up to 16 labels at a time and never runs out of labels."

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 14 00:32:23 PDT 2019


Author: hans
Date: Fri Jun 14 00:32:22 2019
New Revision: 363358

URL: http://llvm.org/viewvc/llvm-project?rev=363358&view=rev
Log:
Revert r363326 "[libFuzzer] simplify the DFT trace collection using the new faster DFSan mode that traces up to 16 labels at a time and never runs out of labels."

It broke the Windows build:

C:\b\s\w\ir\cache\builder\src\third_party\llvm\compiler-rt\lib\fuzzer\FuzzerDataFlowTrace.cpp(243): error C3861: 'setenv': identifier not found

This also reverts the follow-up r363327.

Removed:
    compiler-rt/trunk/test/fuzzer/Labels20Test.cpp
Modified:
    compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp
    compiler-rt/trunk/lib/fuzzer/FuzzerFork.cpp
    compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
    compiler-rt/trunk/test/fuzzer/OnlySomeBytesTest.cpp
    compiler-rt/trunk/test/fuzzer/dataflow.test

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp?rev=363358&r1=363357&r2=363358&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp Fri Jun 14 00:32:22 2019
@@ -120,6 +120,12 @@ static Vector<uint8_t> DFTStringToVector
   return DFT;
 }
 
+static std::ostream &operator<<(std::ostream &OS, const Vector<uint8_t> &DFT) {
+  for (auto B : DFT)
+    OS << (B ? "1" : "0");
+  return OS;
+}
+
 static bool ParseError(const char *Err, const std::string &Line) {
   Printf("DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str());
   return false;
@@ -240,24 +246,74 @@ int CollectDataFlow(const std::string &D
                     const Vector<SizedFile> &CorporaFiles) {
   Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
          DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size());
-  setenv("DFSAN_OPTIONS", "fast16labels=1:warn_unimplemented=0", 1);
   MkDir(DirPath);
+  auto Temp = TempPath(".dft");
   for (auto &F : CorporaFiles) {
     // For every input F we need to collect the data flow and the coverage.
     // Data flow collection may fail if we request too many DFSan tags at once.
     // So, we start from requesting all tags in range [0,Size) and if that fails
     // we then request tags in [0,Size/2) and [Size/2, Size), and so on.
     // Function number => DFT.
-    auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File)));
     std::unordered_map<size_t, Vector<uint8_t>> DFTMap;
     std::unordered_set<std::string> Cov;
-    Command Cmd;
-    Cmd.addArgument(DFTBinary);
-    Cmd.addArgument(F.File);
-    Cmd.addArgument(OutPath);
-    Printf("CMD: %s\n", Cmd.toString().c_str());
-    ExecuteCommand(Cmd);
+    std::queue<std::pair<size_t, size_t>> Q;
+    Q.push({0, F.Size});
+    while (!Q.empty()) {
+      auto R = Q.front();
+      Printf("\n\n\n********* Trying: [%zd, %zd)\n", R.first, R.second);
+      Q.pop();
+      Command Cmd;
+      Cmd.addArgument(DFTBinary);
+      Cmd.addArgument(std::to_string(R.first));
+      Cmd.addArgument(std::to_string(R.second));
+      Cmd.addArgument(F.File);
+      Cmd.addArgument(Temp);
+      Printf("CMD: %s\n", Cmd.toString().c_str());
+      if (ExecuteCommand(Cmd)) {
+        // DFSan has failed, collect tags for two subsets.
+        if (R.second - R.first >= 2) {
+          size_t Mid = (R.second + R.first) / 2;
+          Q.push({R.first, Mid});
+          Q.push({Mid, R.second});
+        }
+      } else {
+        Printf("********* Success: [%zd, %zd)\n", R.first, R.second);
+        std::ifstream IF(Temp);
+        std::string L;
+        while (std::getline(IF, L, '\n')) {
+          // Data flow collection has succeeded.
+          // Merge the results with the other runs.
+          if (L.empty()) continue;
+          if (L[0] == 'C') {
+            // Take coverage lines as is, they will be the same in all attempts.
+            Cov.insert(L);
+          } else if (L[0] == 'F') {
+            size_t FunctionNum = 0;
+            std::string DFTString;
+            if (ParseDFTLine(L, &FunctionNum, &DFTString)) {
+              auto &DFT = DFTMap[FunctionNum];
+              if (DFT.empty()) {
+                // Haven't seen this function before, take DFT as is.
+                DFT = DFTStringToVector(DFTString);
+              } else if (DFT.size() == DFTString.size()) {
+                // Have seen this function already, merge DFTs.
+                DFTStringAppendToVector(&DFT, DFTString);
+              }
+            }
+          }
+        }
+      }
+    }
+    auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File)));
+    // Dump combined DFT to disk.
+    Printf("Producing DFT for %s\n", OutPath.c_str());
+    std::ofstream OF(OutPath);
+    for (auto &DFT: DFTMap)
+      OF << "F" << DFT.first << " " << DFT.second << std::endl;
+    for (auto &C : Cov)
+      OF << C << std::endl;
   }
+  RemoveFile(Temp);
   // Write functions.txt if it's currently empty or doesn't exist.
   auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt);
   if (FileToString(FunctionsTxtPath).empty()) {

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerFork.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerFork.cpp?rev=363358&r1=363357&r2=363358&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerFork.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerFork.cpp Fri Jun 14 00:32:22 2019
@@ -89,7 +89,6 @@ struct GlobalEnv {
   std::string DFTDir;
   std::string DataFlowBinary;
   Set<uint32_t> Features, Cov;
-  Set<std::string> FilesWithDFT;
   Vector<std::string> Files;
   Random *Rand;
   std::chrono::system_clock::time_point ProcessStartTime;
@@ -127,13 +126,10 @@ struct GlobalEnv {
     auto Job = new FuzzJob;
     std::string Seeds;
     if (size_t CorpusSubsetSize =
-            std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
-      for (size_t i = 0; i < CorpusSubsetSize; i++) {
-        auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
-        Seeds += (Seeds.empty() ? "" : ",") + SF;
-        CollectDFT(SF);
-      }
-    }
+            std::min(Files.size(), (size_t)sqrt(Files.size() + 2)))
+      for (size_t i = 0; i < CorpusSubsetSize; i++)
+        Seeds += (Seeds.empty() ? "" : ",") +
+                 Files[Rand->SkewTowardsLast(Files.size())];
     if (!Seeds.empty()) {
       Job->SeedListPath =
           DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
@@ -200,6 +196,7 @@ struct GlobalEnv {
       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
       WriteToFile(U, NewPath);
       Files.push_back(NewPath);
+      CollectDFT(NewPath);
     }
     Features.insert(NewFeatures.begin(), NewFeatures.end());
     Cov.insert(NewCov.begin(), NewCov.end());
@@ -220,7 +217,6 @@ struct GlobalEnv {
 
   void CollectDFT(const std::string &InputPath) {
     if (DataFlowBinary.empty()) return;
-    if (!FilesWithDFT.insert(InputPath).second) return;
     Command Cmd(Args);
     Cmd.removeFlag("fork");
     Cmd.removeFlag("runs");
@@ -230,7 +226,7 @@ struct GlobalEnv {
       Cmd.removeArgument(C);
     Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
     Cmd.combineOutAndErr();
-    // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
+    // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str());
     ExecuteCommand(Cmd);
   }
 
@@ -300,6 +296,9 @@ void FuzzWithFork(Random &Rand, const Fu
   CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
                       {}, &Env.Cov,
                       CFPath, false);
+  for (auto &F : Env.Files)
+    Env.CollectDFT(F);
+
   RemoveFile(CFPath);
   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
          Env.Files.size(), Env.TempDir.c_str());

Modified: compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp?rev=363358&r1=363357&r2=363358&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp Fri Jun 14 00:32:22 2019
@@ -35,8 +35,7 @@
 // Run:
 //   # Collect data flow and coverage for INPUT_FILE
 //   # write to OUTPUT_FILE (default: stdout)
-//   export DFSAN_OPTIONS=fast16labels=1:warn_unimplemented=0
-//   ./a.out INPUT_FILE [OUTPUT_FILE]
+//   ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE]
 //
 //   # Print all instrumented functions. llvm-symbolizer must be present in PATH
 //   ./a.out
@@ -49,6 +48,8 @@
 //  C1 8
 //  ===============
 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
+//    The byte string is LEN+1 bytes. The last byte is set if the function
+//    depends on the input length.
 // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
 //    in addition to the function's entry block, out of T total instrumented
 //    blocks.
@@ -71,20 +72,22 @@ __attribute__((weak)) extern int LLVMFuz
 } // extern "C"
 
 static size_t InputLen;
-static size_t NumIterations;
+static size_t InputLabelBeg;
+static size_t InputLabelEnd;
+static size_t InputSizeLabel;
 static size_t NumFuncs, NumGuards;
 static uint32_t *GuardsBeg, *GuardsEnd;
 static const uintptr_t *PCsBeg, *PCsEnd;
-static __thread size_t CurrentFunc, CurrentIteration;
-static dfsan_label **FuncLabels;  // NumFuncs x NumIterations.
+static __thread size_t CurrentFunc;
+static dfsan_label *FuncLabels;  // Array of NumFuncs elements.
 static bool *BBExecuted;  // Array of NumGuards elements.
+static char *PrintableStringForLabel;  // InputLen + 2 bytes.
+static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
 
 enum {
   PCFLAG_FUNC_ENTRY = 1,
 };
 
-const int kNumLabels = 16;
-
 static inline bool BlockIsEntry(size_t BlockIdx) {
   return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
 }
@@ -109,32 +112,35 @@ static int PrintFunctions() {
   return 0;
 }
 
-static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) {
-  char buf[kNumLabels + 1];
-  assert(Len <= kNumLabels);
-  for (int i = 0; i < kNumLabels; i++)
-    buf[i] = (L & (1 << i)) ? '1' : '0';
-  buf[Len] = 0;
-  fprintf(Out, "%s", buf);
+extern "C"
+void SetBytesForLabel(dfsan_label L, char *Bytes) {
+  if (LabelSeen[L])
+    return;
+  LabelSeen[L] = true;
+  assert(L);
+  if (L < InputSizeLabel) {
+    Bytes[L + InputLabelBeg - 1] = '1';
+  } else if (L == InputSizeLabel) {
+    Bytes[InputLen] = '1';
+  } else {
+    auto *DLI = dfsan_get_label_info(L);
+    SetBytesForLabel(DLI->l1, Bytes);
+    SetBytesForLabel(DLI->l2, Bytes);
+  }
+}
+
+static char *GetPrintableStringForLabel(dfsan_label L) {
+  memset(PrintableStringForLabel, '0', InputLen + 1);
+  PrintableStringForLabel[InputLen + 1] = 0;
+  memset(LabelSeen, 0, sizeof(LabelSeen));
+  SetBytesForLabel(L, PrintableStringForLabel);
+  return PrintableStringForLabel;
 }
 
 static void PrintDataFlow(FILE *Out) {
-  for (size_t Func = 0; Func < NumFuncs; Func++) {
-    bool HasAny = false;
-    for (size_t Iter = 0; Iter < NumIterations; Iter++)
-      if (FuncLabels[Func][Iter])
-        HasAny = true;
-    if (!HasAny)
-      continue;
-    fprintf(Out, "F%zd ", Func);
-    size_t LenOfLastIteration = kNumLabels;
-    if (auto Tail = InputLen % kNumLabels)
-        LenOfLastIteration = Tail;
-    for (size_t Iter = 0; Iter < NumIterations; Iter++)
-      PrintBinary(Out, FuncLabels[Func][Iter],
-                  Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels);
-    fprintf(Out, "\n");
-  }
+  for (size_t I = 0; I < NumFuncs; I++)
+    if (FuncLabels[I])
+      fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
 }
 
 static void PrintCoverage(FILE *Out) {
@@ -163,9 +169,12 @@ int main(int argc, char **argv) {
     LLVMFuzzerInitialize(&argc, &argv);
   if (argc == 1)
     return PrintFunctions();
-  assert(argc == 2 || argc == 3);
+  assert(argc == 4 || argc == 5);
+  InputLabelBeg = atoi(argv[1]);
+  InputLabelEnd = atoi(argv[2]);
+  assert(InputLabelBeg < InputLabelEnd);
 
-  const char *Input = argv[1];
+  const char *Input = argv[3];
   fprintf(stderr, "INFO: reading '%s'\n", Input);
   FILE *In = fopen(Input, "r");
   assert(In);
@@ -175,35 +184,30 @@ int main(int argc, char **argv) {
   unsigned char *Buf = (unsigned char*)malloc(InputLen);
   size_t NumBytesRead = fread(Buf, 1, InputLen, In);
   assert(NumBytesRead == InputLen);
+  PrintableStringForLabel = (char*)malloc(InputLen + 2);
   fclose(In);
 
-  NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels;
-  FuncLabels = (dfsan_label**)calloc(NumFuncs, sizeof(dfsan_label*));
-  for (size_t Func = 0; Func < NumFuncs; Func++)
-    FuncLabels[Func] =
-        (dfsan_label *)calloc(NumIterations, sizeof(dfsan_label));
-
-  for (CurrentIteration = 0; CurrentIteration < NumIterations;
-       CurrentIteration++) {
-    fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, CurrentIteration,
-            NumIterations);
-    dfsan_flush();
-    dfsan_set_label(0, Buf, InputLen);
-
-    size_t BaseIdx = CurrentIteration * kNumLabels;
-    size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels
-                                                         : NumBytesRead;
-    assert(BaseIdx < LastIdx);
-    for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++)
-      dfsan_set_label(1 << (Idx - BaseIdx), Buf + Idx, 1);
-    LLVMFuzzerTestOneInput(Buf, InputLen);
+  fprintf(stderr, "INFO: running '%s'\n", Input);
+  for (size_t I = 1; I <= InputLen; I++) {
+    size_t Idx = I - 1;
+    if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
+      dfsan_label L = dfsan_create_label("", nullptr);
+      assert(L == I - InputLabelBeg);
+      dfsan_set_label(L, Buf + Idx, 1);
+    }
   }
+  dfsan_label SizeL = dfsan_create_label("", nullptr);
+  InputSizeLabel = SizeL;
+  assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
+  dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
+
+  LLVMFuzzerTestOneInput(Buf, InputLen);
   free(Buf);
 
-  bool OutIsStdout = argc == 2;
+  bool OutIsStdout = argc == 4;
   fprintf(stderr, "INFO: writing dataflow to %s\n",
-          OutIsStdout ? "<stdout>" : argv[2]);
-  FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w");
+          OutIsStdout ? "<stdout>" : argv[4]);
+  FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
   PrintDataFlow(Out);
   PrintCoverage(Out);
   if (!OutIsStdout) fclose(Out);
@@ -233,6 +237,7 @@ void __sanitizer_cov_pcs_init(const uint
       GuardsBeg[i] = NumFuncs;
     }
   }
+  FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
   BBExecuted = (bool*)calloc(NumGuards, sizeof(bool));
   fprintf(stderr, "INFO: %zd instrumented function(s) observed "
           "and %zd basic blocks\n", NumFuncs, NumGuards);
@@ -253,13 +258,14 @@ void __sanitizer_cov_trace_pc_guard(uint
 void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
                                          dfsan_label L1, dfsan_label UnusedL) {
   assert(CurrentFunc < NumFuncs);
-  FuncLabels[CurrentFunc][CurrentIteration] |= L1;
+  FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
 }
 
 #define HOOK(Name, Type)                                                       \
   void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) {            \
     assert(CurrentFunc < NumFuncs);                                            \
-    FuncLabels[CurrentFunc][CurrentIteration] |= L1 | L2;                      \
+    FuncLabels[CurrentFunc] =                                                  \
+        dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2));             \
   }
 
 HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)

Removed: compiler-rt/trunk/test/fuzzer/Labels20Test.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/Labels20Test.cpp?rev=363357&view=auto
==============================================================================
--- compiler-rt/trunk/test/fuzzer/Labels20Test.cpp (original)
+++ compiler-rt/trunk/test/fuzzer/Labels20Test.cpp (removed)
@@ -1,41 +0,0 @@
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// Simple test for a fuzzer.
-// Needs to find a string "FUZZxxxxxxxxxxxxMxxE", where 'x' is any byte.
-#include <assert.h>
-#include <cstddef>
-#include <cstdint>
-#include <cstdlib>
-#include <cstdio>
-
-extern "C" bool Func1(const uint8_t *Data, size_t Size);
-extern "C" bool Func2(const uint8_t *Data, size_t Size);
-
-extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
-  if (Size >= 20
-      && Data[0] == 'F'
-      && Data[1] == 'U'
-      && Data[2] == 'Z'
-      && Data[3] == 'Z'
-      && Func1(Data, Size)
-      && Func2(Data, Size)) {
-        fprintf(stderr, "BINGO\n");
-        abort();
-  }
-  return 0;
-}
-
-extern "C"
-__attribute__((noinline))
-bool Func1(const uint8_t *Data, size_t Size) {
-  // assumes Size >= 5, doesn't check it.
-  return Data[16] == 'M';
-}
-
-extern "C"
-__attribute__((noinline))
-bool Func2(const uint8_t *Data, size_t Size) {
-  return Size >= 20 && Data[19] == 'E';
-}

Modified: compiler-rt/trunk/test/fuzzer/OnlySomeBytesTest.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/OnlySomeBytesTest.cpp?rev=363358&r1=363357&r2=363358&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/OnlySomeBytesTest.cpp (original)
+++ compiler-rt/trunk/test/fuzzer/OnlySomeBytesTest.cpp Fri Jun 14 00:32:22 2019
@@ -36,8 +36,7 @@ __attribute__((noinline)) void f0(IN in)
   }
 }
 
-__attribute__((noinline)) void fD(IN in) { f0(in); }
-__attribute__((noinline)) void fC(IN in) { if (in[2] == 'C') fD(in); }
+__attribute__((noinline)) void fC(IN in) { if (in[2] == 'C') f0(in); }
 __attribute__((noinline)) void fB(IN in) { if (in[1] == 'B') fC(in); }
 __attribute__((noinline)) void fA(IN in) { if (in[0] == 'A') fB(in); }
 

Modified: compiler-rt/trunk/test/fuzzer/dataflow.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/dataflow.test?rev=363358&r1=363357&r2=363358&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/dataflow.test (original)
+++ compiler-rt/trunk/test/fuzzer/dataflow.test Fri Jun 14 00:32:22 2019
@@ -4,7 +4,7 @@ REQUIRES: linux, x86_64
 # Build the tracer and the test.
 RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow  %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o  %t-DataFlow.o
 RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/ThreeFunctionsTest.cpp     %t-DataFlow.o -o %t-ThreeFunctionsTestDF
-RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/Labels20Test.cpp     %t-DataFlow.o -o %t-Labels20TestDF
+RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF
 RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest
 
 # Dump the function list.
@@ -14,8 +14,8 @@ FUNC_LIST-DAG: Func1
 FUNC_LIST-DAG: Func2
 
 # Prepare the inputs.
-RUN: rm -rf %t/IN %t/IN20
-RUN: mkdir -p %t/IN %t/IN20
+RUN: rm -rf %t/IN
+RUN: mkdir -p %t/IN
 RUN: echo -n ABC    > %t/IN/ABC
 RUN: echo -n FUABC  > %t/IN/FUABC
 RUN: echo -n FUZZR  > %t/IN/FUZZR
@@ -23,83 +23,71 @@ RUN: echo -n FUZZM  > %t/IN/FUZZM
 RUN: echo -n FUZZMU > %t/IN/FUZZMU
 RUN: echo -n 1234567890123456 > %t/IN/1234567890123456
 
-RUN: echo -n FUZZxxxxxxxxxxxxxxxx > %t/IN20/FUZZxxxxxxxxxxxxxxxx
-RUN: echo -n FUZZxxxxxxxxxxxxMxxx > %t/IN20/FUZZxxxxxxxxxxxxMxxx
-RUN: echo -n FUZxxxxxxxxxxxxxxxxx > %t/IN20/FUZxxxxxxxxxxxxxxxxx
-RUN: echo -n FUxxxxxxxxxxxxxxxxxx > %t/IN20/FUxxxxxxxxxxxxxxxxxx
-
-
-RUN: export DFSAN_OPTIONS=fast16labels=1:warn_unimplemented=0
-
 # This test assumes that the functions in ThreeFunctionsTestDF are instrumented
 # in a specific order:
 # LLVMFuzzerTestOneInput: F0
 # Func1: F1
 # Func2: F2
 
-# ABC: No data is used
-RUN:%t-ThreeFunctionsTestDF %t/IN/ABC    | FileCheck %s --check-prefix=IN_ABC
-IN_ABC-NOT: F0
-IN_ABC: C0
+# ABC: No data is used, the only used label is 4 (corresponds to the size)
+RUN:%t-ThreeFunctionsTestDF 0 3 %t/IN/ABC    | FileCheck %s --check-prefix=IN_ABC
+IN_ABC: F0 0001
+IN_ABC-NOT: F
+IN_ABC-NEXT: C0
 IN_ABC-NOT: C
 
 # FUABC: First 3 bytes are checked, Func1/Func2 are not called.
-RUN:%t-ThreeFunctionsTestDF %t/IN/FUABC  | FileCheck %s --check-prefix=IN_FUABC
-IN_FUABC: F0 11100{{$}}
+RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUABC  | FileCheck %s --check-prefix=IN_FUABC
+IN_FUABC: F0 111001
 IN_FUABC-NOT: F
 IN_FUABC-NEXT: C0
 IN_FUABC-NOT: C
 
 # FUZZR: 5 bytes are used (4 in one function, 5-th in the other), Func2 is not called.
-RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZR  | FileCheck %s --check-prefix=IN_FUZZR
-IN_FUZZR: F0 11110
-IN_FUZZR: F1 00001
+RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUZZR  | FileCheck %s --check-prefix=IN_FUZZR
+IN_FUZZR: F0 111101
+IN_FUZZR: F1 000010
 IN_FUZZR-NOT: F
 IN_FUZZR: C0
 IN_FUZZR: C1
 IN_FUZZR-NOT: C
 
-# FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size.
-RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZM  | FileCheck %s --check-prefix=IN_FUZZM
-IN_FUZZM: F0 11110
-IN_FUZZM: F1 00001
-IN_FUZZM-NOT: F2
+# FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size (label 6).
+RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUZZM  | FileCheck %s --check-prefix=IN_FUZZM
+IN_FUZZM: F0 111101
+IN_FUZZM: F1 000010
+IN_FUZZM: F2 000001
 IN_FUZZM: C0
 IN_FUZZM: C1
 IN_FUZZM: C2
 
-# FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size.
-RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZMU  | FileCheck %s --check-prefix=IN_FUZZMU
-
+# FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size (label 7)
+RUN:%t-ThreeFunctionsTestDF 0 6 %t/IN/FUZZMU  | FileCheck %s --check-prefix=IN_FUZZMU
 
-# Test Labels20TestDF
-RUN:%t-Labels20TestDF %t/IN20/FUxxxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FU
-L20_FU: F0 11100000000000000000{{$}}
-L20_FU-NOT: F
-
-RUN:%t-Labels20TestDF %t/IN20/FUZxxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FUZ
-L20_FUZ: F0 11110000000000000000{{$}}
-L20_FUZ-NOT: F
-
-RUN:%t-Labels20TestDF %t/IN20/FUZZxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FUZZ
-L20_FUZZ: F0 11110000000000000000{{$}}
-L20_FUZZ-NEXT: F1 00000000000000001000{{$}}
-L20_FUZZ-NOT: F
-
-RUN:%t-Labels20TestDF %t/IN20/FUZZxxxxxxxxxxxxMxxx | FileCheck %s --check-prefix=L20_FUZZM
-L20_FUZZM: F0 11110000000000000000{{$}}
-L20_FUZZM-NEXT: F1 00000000000000001000{{$}}
-L20_FUZZM-NEXT: F2 00000000000000000001{{$}}
-L20_FUZZM-NOT: F
+# Test merge_data_flow
+RUN:rm -f %t-merge-*
+RUN:%t-ThreeFunctionsTestDF 0 2 %t/IN/FUZZMU > %t-merge-1
+RUN:%t-ThreeFunctionsTestDF 2 4 %t/IN/FUZZMU > %t-merge-2
+RUN:%t-ThreeFunctionsTestDF 4 6 %t/IN/FUZZMU > %t-merge-3
 
 # Test libFuzzer's built in DFT collection.
 RUN: rm -rf %t-DFT
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t-DFT %t/IN/FUZZMU
 RUN: cat %t-DFT/* | sort | FileCheck %s --check-prefix=IN_FUZZMU
 
-IN_FUZZMU: F0 111100
-IN_FUZZMU: F1 000010
-IN_FUZZMU: F2 000001
+IN_FUZZMU: F0 1111001
+IN_FUZZMU: F1 0000100
+IN_FUZZMU: F2 0000011
+
+# A very simple test will cause DFSan to die with "out of labels"
+RUN: not %t-ExplodeDFSanLabelsTestDF 0 16 %t/IN/1234567890123456 2>&1 | FileCheck %s --check-prefix=OUT_OF_LABELS
+OUT_OF_LABELS: ==FATAL: DataFlowSanitizer: out of labels
+# However we can run the same test piece by piece.
+RUN: %t-ExplodeDFSanLabelsTestDF 0 2  %t/IN/1234567890123456
+RUN: %t-ExplodeDFSanLabelsTestDF 2 4  %t/IN/1234567890123456
+RUN: %t-ExplodeDFSanLabelsTestDF 4 6  %t/IN/1234567890123456
+# Test libFuzzer's builtin collect_data_flow.
+RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t-DFT %t/IN/1234567890123456
 
 # Test that we can run collect_data_flow on the entire corpus dir
 RUN: rm -rf %t/OUT
@@ -108,12 +96,18 @@ RUN: %t-ThreeFunctionsTest -data_flow_tr
 
 
 USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT
-USE_DATA_FLOW_TRACE: d28cb407e8e1a702c72d25473f0553d3ec172262 => |000001|
-USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 1 traces with focus function
+USE_DATA_FLOW_TRACE-DAG: ca8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001|
+USE_DATA_FLOW_TRACE-DAG: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011|
+USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function
 USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2'
 
 # Test that we can run collect_data_flow on a long input (>2**16 bytes)
 RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input
 RUN: rm -rf %t/OUT
-RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN/very_long_input
+RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN/very_long_input 2>&1 | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT
 RUN: rm %t/IN/very_long_input
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001
+COLLECT_TRACE_FOR_LONG_INPUT-DAG: ******* Trying:{{[ ]+}}[75000, 150001
+COLLECT_TRACE_FOR_LONG_INPUT-DAG: ******* Trying:{{[ ]+}}[112500, 150001
+COLLECT_TRACE_FOR_LONG_INPUT-DAG: ******* Success:{{[ ]+}}[{{[0123456789]+}}, 150001
+COLLECT_TRACE_FOR_LONG_INPUT-DAG: ******* Success:{{[ ]+}}[0, {{[0123456789]+}}




More information about the llvm-commits mailing list