[compiler-rt] e6597db - Greedy set cover implementation of `Merger::Merge`
Matt Morehouse via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 7 09:43:00 PDT 2021
Author: aristotelis
Date: 2021-09-07T09:42:38-07:00
New Revision: e6597dbae84034ce8ef97802584039b723adb526
URL: https://github.com/llvm/llvm-project/commit/e6597dbae84034ce8ef97802584039b723adb526
DIFF: https://github.com/llvm/llvm-project/commit/e6597dbae84034ce8ef97802584039b723adb526.diff
LOG: Greedy set cover implementation of `Merger::Merge`
Extend the existing single-pass algorithm for `Merger::Merge` with an algorithm that gives better results. This new implementation can be used with a new **set_cover_merge=1** flag.
This greedy set cover implementation gives a substantially smaller final corpus (40%-80% less testcases) while preserving the same features/coverage. At the same time, the execution time penalty is not that significant (+50% for ~1M corpus files and far less for smaller corpora). These results were obtained by comparing several targets with varying size corpora.
Change `Merger::CrashResistantMergeInternalStep` to collect all features from each file and not just unique ones. This is needed for the set cover algorithm to work correctly. The implementation of the algorithm in `Merger::SetCoverMerge` uses a bitvector to store features that are covered by a file while performing the pass. Collisions while indexing the bitvector are ignored similarly to the fuzzer.
Reviewed By: morehouse
Differential Revision: https://reviews.llvm.org/D105284
Added:
compiler-rt/test/fuzzer/set_cover_merge.test
Modified:
compiler-rt/lib/fuzzer/FuzzerDriver.cpp
compiler-rt/lib/fuzzer/FuzzerFlags.def
compiler-rt/lib/fuzzer/FuzzerFork.cpp
compiler-rt/lib/fuzzer/FuzzerInternal.h
compiler-rt/lib/fuzzer/FuzzerMerge.cpp
compiler-rt/lib/fuzzer/FuzzerMerge.h
compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
Removed:
################################################################################
diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index 653c6ca6b2b60..c1a7e1b4dabc9 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -523,7 +523,7 @@ void Merge(Fuzzer *F, FuzzingOptions &Options,
std::vector<std::string> NewFiles;
std::set<uint32_t> NewFeatures, NewCov;
CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures,
- {}, &NewCov, CFPath, true);
+ {}, &NewCov, CFPath, true, Flags.set_cover_merge);
for (auto &Path : NewFiles)
F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
// We are done, delete the control file if it was a temporary one.
@@ -797,7 +797,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Flags.verbosity)
Printf("INFO: Seed: %u\n", Seed);
- if (Flags.collect_data_flow && !Flags.fork && !Flags.merge) {
+ if (Flags.collect_data_flow && !Flags.fork &&
+ !(Flags.merge || Flags.set_cover_merge)) {
if (RunIndividualFiles)
return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace,
ReadCorpora({}, *Inputs));
@@ -872,7 +873,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Flags.fork)
FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);
- if (Flags.merge)
+ if (Flags.merge || Flags.set_cover_merge)
Merge(F, Options, Args, *Inputs, Flags.merge_control_file);
if (Flags.merge_inner) {
@@ -880,7 +881,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Options.MaxLen == 0)
F->SetMaxInputLen(kDefaultMaxMergeLen);
assert(Flags.merge_control_file);
- F->CrashResistantMergeInternalStep(Flags.merge_control_file);
+ F->CrashResistantMergeInternalStep(Flags.merge_control_file,
+ !strncmp(Flags.merge_inner, "2", 1));
exit(0);
}
diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index ab31da0ae5d60..d0cf0e904d8b6 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -64,6 +64,11 @@ FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")
FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
"merged into the 1-st corpus. Only interesting units will be taken. "
"This flag can be used to minimize a corpus.")
+FUZZER_FLAG_INT(set_cover_merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
+ "merged into the 1-st corpus. Same as the 'merge' flag, but uses the "
+ "standard greedy algorithm for the set cover problem to "
+ "compute an approximation of the minimum set of testcases that "
+ "provide the same coverage as the initial corpora")
FUZZER_FLAG_STRING(stop_file, "Stop fuzzing ASAP if this file exists")
FUZZER_FLAG_STRING(merge_inner, "internal flag")
FUZZER_FLAG_STRING(merge_control_file,
diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
index f7a17fdc65251..8771ab3613190 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
@@ -213,8 +213,11 @@ struct GlobalEnv {
std::vector<std::string> FilesToAdd;
std::set<uint32_t> NewFeatures, NewCov;
+ bool IsSetCoverMerge =
+ !Job->Cmd.getFlagValue("set_cover_merge").compare("1");
CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
- &NewFeatures, Cov, &NewCov, Job->CFPath, false);
+ &NewFeatures, Cov, &NewCov, Job->CFPath, false,
+ IsSetCoverMerge);
for (auto &Path : FilesToAdd) {
auto U = FileToVector(Path);
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
@@ -318,7 +321,8 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
std::set<uint32_t> NewFeatures, NewCov;
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
- &NewFeatures, Env.Cov, &NewCov, CFPath, false);
+ &NewFeatures, Env.Cov, &NewCov, CFPath,
+ /*Verbose=*/false, /*IsSetCoverMerge=*/false);
Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
RemoveFile(CFPath);
diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h
index bfce1dfcb2ae5..6637b0034e552 100644
--- a/compiler-rt/lib/fuzzer/FuzzerInternal.h
+++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h
@@ -73,7 +73,8 @@ class Fuzzer {
// Merge Corpora[1:] into Corpora[0].
void Merge(const std::vector<std::string> &Corpora);
- void CrashResistantMergeInternalStep(const std::string &ControlFilePath);
+ void CrashResistantMergeInternalStep(const std::string &ControlFilePath,
+ bool IsSetCoverMerge);
MutationDispatcher &GetMD() { return MD; }
void PrintFinalStats();
void SetMaxInputLen(size_t MaxInputLen);
diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
index 4bae7827f80b3..24bd11958e807 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
@@ -197,7 +197,8 @@ std::set<uint32_t> Merger::AllFeatures() const {
}
// Inner process. May crash if the target crashes.
-void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
+void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath,
+ bool IsSetCoverMerge) {
Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
Merger M;
std::ifstream IF(CFPath);
@@ -235,13 +236,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
// Collect coverage. We are iterating over the files in this order:
// * First, files in the initial corpus ordered by size, smallest first.
// * Then, all other files, smallest first.
- // So it makes no sense to record all features for all files, instead we
- // only record features that were not seen before.
- std::set<size_t> UniqFeatures;
- TPC.CollectFeatures([&](size_t Feature) {
- if (AllFeatures.insert(Feature).second)
- UniqFeatures.insert(Feature);
- });
+ std::set<size_t> Features;
+ if (IsSetCoverMerge)
+ TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); });
+ else
+ TPC.CollectFeatures([&](size_t Feature) {
+ if (AllFeatures.insert(Feature).second)
+ Features.insert(Feature);
+ });
TPC.UpdateObservedPCs();
// Show stats.
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
@@ -250,7 +252,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
PrintStatsWrapper("LOADED");
// Write the post-run marker and the coverage.
OF << "FT " << i;
- for (size_t F : UniqFeatures)
+ for (size_t F : Features)
OF << " " << F;
OF << "\n";
OF << "COV " << i;
@@ -264,6 +266,127 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
PrintStatsWrapper("DONE ");
}
+// Merges all corpora into the first corpus. A file is added into
+// the first corpus only if it adds new features. Unlike `Merger::Merge`,
+// this implementation calculates an approximation of the minimum set
+// of corpora files, that cover all known features (set cover problem).
+// Generally, this means that files with more features are preferred for
+// merge into the first corpus. When two files have the same number of
+// features, the smaller one is preferred.
+size_t Merger::SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles) {
+ assert(NumFilesInFirstCorpus <= Files.size());
+ NewFiles->clear();
+ NewFeatures->clear();
+ NewCov->clear();
+ std::set<uint32_t> AllFeatures;
+ // 1 << 21 - 1 is the maximum feature index.
+ // See 'kFeatureSetSize' in 'FuzzerCorpus.h'.
+ const uint32_t kFeatureSetSize = 1 << 21;
+ std::vector<bool> Covered(kFeatureSetSize, false);
+ size_t NumCovered = 0;
+
+ std::set<uint32_t> ExistingFeatures = InitialFeatures;
+ for (size_t i = 0; i < NumFilesInFirstCorpus; ++i)
+ ExistingFeatures.insert(Files[i].Features.begin(), Files[i].Features.end());
+
+ // Mark the existing features as covered.
+ for (const auto &F : ExistingFeatures) {
+ if (!Covered[F % kFeatureSetSize]) {
+ ++NumCovered;
+ Covered[F % kFeatureSetSize] = true;
+ }
+ // Calculate an underestimation of the set of covered features
+ // since the `Covered` bitvector is smaller than the feature range.
+ AllFeatures.insert(F % kFeatureSetSize);
+ }
+
+ std::set<size_t> RemainingFiles;
+ for (size_t i = NumFilesInFirstCorpus; i < Files.size(); ++i) {
+ // Construct an incremental sequence which represent the
+ // indices to all files (excluding those in the initial corpus).
+ // RemainingFiles = range(NumFilesInFirstCorpus..Files.size()).
+ RemainingFiles.insert(i);
+ // Insert this file's unique features to all features.
+ for (const auto &F : Files[i].Features)
+ AllFeatures.insert(F % kFeatureSetSize);
+ }
+
+ // Integrate files into Covered until set is complete.
+ while (NumCovered != AllFeatures.size()) {
+ // Index to file with largest number of unique features.
+ size_t MaxFeaturesIndex = NumFilesInFirstCorpus;
+ // Indices to remove from RemainingFiles.
+ std::set<size_t> RemoveIndices;
+ // Running max unique feature count.
+ // Updated upon finding a file with more features.
+ size_t MaxNumFeatures = 0;
+
+ // Iterate over all files not yet integrated into Covered,
+ // to find the file which has the largest number of
+ // features that are not already in Covered.
+ for (const auto &i : RemainingFiles) {
+ const auto &File = Files[i];
+ size_t CurrentUnique = 0;
+ // Count number of features in this file
+ // which are not yet in Covered.
+ for (const auto &F : File.Features)
+ if (!Covered[F % kFeatureSetSize])
+ ++CurrentUnique;
+
+ if (CurrentUnique == 0) {
+ // All features in this file are already in Covered: skip next time.
+ RemoveIndices.insert(i);
+ } else if (CurrentUnique > MaxNumFeatures ||
+ (CurrentUnique == MaxNumFeatures &&
+ File.Size < Files[MaxFeaturesIndex].Size)) {
+ // Update the max features file based on unique features
+ // Break ties by selecting smaller files.
+ MaxNumFeatures = CurrentUnique;
+ MaxFeaturesIndex = i;
+ }
+ }
+ // Must be a valid index/
+ assert(MaxFeaturesIndex < Files.size());
+ // Remove any feature-less files found.
+ for (const auto &i : RemoveIndices)
+ RemainingFiles.erase(i);
+ if (MaxNumFeatures == 0) {
+ // Did not find a file that adds unique features.
+ // This means that we should have no remaining files.
+ assert(RemainingFiles.size() == 0);
+ assert(NumCovered == AllFeatures.size());
+ break;
+ }
+
+ // MaxFeaturesIndex must be an element of Remaining.
+ assert(RemainingFiles.find(MaxFeaturesIndex) != RemainingFiles.end());
+ // Remove the file with the most features from Remaining.
+ RemainingFiles.erase(MaxFeaturesIndex);
+ const auto &MaxFeatureFile = Files[MaxFeaturesIndex];
+ // Add the features of the max feature file to Covered.
+ for (const auto &F : MaxFeatureFile.Features) {
+ if (!Covered[F % kFeatureSetSize]) {
+ ++NumCovered;
+ Covered[F % kFeatureSetSize] = true;
+ NewFeatures->insert(F);
+ }
+ }
+ // Add the index to this file to the result.
+ NewFiles->push_back(MaxFeatureFile.Name);
+ // Update NewCov with the additional coverage
+ // that MaxFeatureFile provides.
+ for (const auto &C : MaxFeatureFile.Cov)
+ if (InitialCov.find(C) == InitialCov.end())
+ NewCov->insert(C);
+ }
+
+ return NewFeatures->size();
+}
+
static size_t
WriteNewControlFile(const std::string &CFPath,
const std::vector<SizedFile> &OldCorpus,
@@ -309,7 +432,8 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
std::set<uint32_t> *NewFeatures,
const std::set<uint32_t> &InitialCov,
std::set<uint32_t> *NewCov, const std::string &CFPath,
- bool V /*Verbose*/) {
+ bool V, /*Verbose*/
+ bool IsSetCoverMerge) {
if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge.
size_t NumAttempts = 0;
std::vector<MergeFileInfo> KnownFiles;
@@ -364,6 +488,7 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
// Every inner process should execute at least one input.
Command BaseCmd(Args);
BaseCmd.removeFlag("merge");
+ BaseCmd.removeFlag("set_cover_merge");
BaseCmd.removeFlag("fork");
BaseCmd.removeFlag("collect_data_flow");
for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) {
@@ -371,7 +496,9 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt);
Command Cmd(BaseCmd);
Cmd.addFlag("merge_control_file", CFPath);
- Cmd.addFlag("merge_inner", "1");
+ // If we are going to use the set cover implementation for
+ // minimization add the merge_inner=2 internal flag.
+ Cmd.addFlag("merge_inner", IsSetCoverMerge ? "2" : "1");
if (!V) {
Cmd.setOutputFile(getDevNull());
Cmd.combineOutAndErr();
@@ -396,7 +523,10 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end());
- M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
+ if (IsSetCoverMerge)
+ M.SetCoverMerge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
+ else
+ M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; "
"%zd new coverage edges\n",
NewFiles->size(), NewFeatures->size(), NewCov->size());
diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.h b/compiler-rt/lib/fuzzer/FuzzerMerge.h
index de213018ee134..42f798e1da18a 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMerge.h
+++ b/compiler-rt/lib/fuzzer/FuzzerMerge.h
@@ -69,6 +69,11 @@ struct Merger {
std::set<uint32_t> *NewFeatures,
const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
std::vector<std::string> *NewFiles);
+ size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles);
size_t ApproximateMemoryConsumption() const;
std::set<uint32_t> AllFeatures() const;
};
@@ -81,7 +86,7 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
std::set<uint32_t> *NewFeatures,
const std::set<uint32_t> &InitialCov,
std::set<uint32_t> *NewCov, const std::string &CFPath,
- bool Verbose);
+ bool Verbose, bool IsSetCoverMerge);
} // namespace fuzzer
diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
index 99cf79c3aaa79..1b0e7342240cd 100644
--- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
+++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
@@ -865,6 +865,137 @@ TEST(Merger, Merge) {
TRACED_EQ(NewFeatures, {1, 2, 3});
}
+TEST(Merger, SetCoverMerge) {
+ Merger M;
+ std::set<uint32_t> Features, NewFeatures;
+ std::set<uint32_t> Cov, NewCov;
+ std::vector<std::string> NewFiles;
+
+ // Adds new files and features
+ EXPECT_TRUE(M.Parse("3\n0\nA\nB\nC\n"
+ "STARTED 0 1000\n"
+ "FT 0 1 2 3\n"
+ "STARTED 1 1001\n"
+ "FT 1 4 5 6 \n"
+ "STARTED 2 1002\n"
+ "FT 2 6 1 3\n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 6U);
+ TRACED_EQ(M.Files, {"A", "B", "C"});
+ TRACED_EQ(NewFiles, {"A", "B"});
+ TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5, 6});
+
+ // Doesn't return features or files in the initial corpus.
+ EXPECT_TRUE(M.Parse("3\n1\nA\nB\nC\n"
+ "STARTED 0 1000\n"
+ "FT 0 1 2 3\n"
+ "STARTED 1 1001\n"
+ "FT 1 4 5 6 \n"
+ "STARTED 2 1002\n"
+ "FT 2 6 1 3\n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 3U);
+ TRACED_EQ(M.Files, {"A", "B", "C"});
+ TRACED_EQ(NewFiles, {"B"});
+ TRACED_EQ(NewFeatures, {4, 5, 6});
+
+ // No new features, so no new files
+ EXPECT_TRUE(M.Parse("3\n2\nA\nB\nC\n"
+ "STARTED 0 1000\n"
+ "FT 0 1 2 3\n"
+ "STARTED 1 1001\n"
+ "FT 1 4 5 6 \n"
+ "STARTED 2 1002\n"
+ "FT 2 6 1 3\n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 0U);
+ TRACED_EQ(M.Files, {"A", "B", "C"});
+ TRACED_EQ(NewFiles, {});
+ TRACED_EQ(NewFeatures, {});
+
+ // Can pass initial features and coverage.
+ Features = {1, 2, 3};
+ Cov = {};
+ EXPECT_TRUE(M.Parse("2\n0\nA\nB\n"
+ "STARTED 0 1000\n"
+ "FT 0 1 2 3\n"
+ "STARTED 1 1001\n"
+ "FT 1 4 5 6\n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 3U);
+ TRACED_EQ(M.Files, {"A", "B"});
+ TRACED_EQ(NewFiles, {"B"});
+ TRACED_EQ(NewFeatures, {4, 5, 6});
+ Features.clear();
+ Cov.clear();
+
+ // Prefer files with a lot of features first (C has 4 features)
+ // Then prefer B over A due to the smaller size. After choosing C and B,
+ // A and D have no new features to contribute.
+ EXPECT_TRUE(M.Parse("4\n0\nA\nB\nC\nD\n"
+ "STARTED 0 2000\n"
+ "FT 0 3 5 6\n"
+ "STARTED 1 1000\n"
+ "FT 1 4 5 6 \n"
+ "STARTED 2 1000\n"
+ "FT 2 1 2 3 4 \n"
+ "STARTED 3 500\n"
+ "FT 3 1 \n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 6U);
+ TRACED_EQ(M.Files, {"A", "B", "C", "D"});
+ TRACED_EQ(NewFiles, {"C", "B"});
+ TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5, 6});
+
+ // Only 1 file covers all features.
+ EXPECT_TRUE(M.Parse("4\n1\nA\nB\nC\nD\n"
+ "STARTED 0 2000\n"
+ "FT 0 4 5 6 7 8\n"
+ "STARTED 1 1100\n"
+ "FT 1 1 2 3 \n"
+ "STARTED 2 1100\n"
+ "FT 2 2 3 \n"
+ "STARTED 3 1000\n"
+ "FT 3 1 \n"
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 3U);
+ TRACED_EQ(M.Files, {"A", "B", "C", "D"});
+ TRACED_EQ(NewFiles, {"B"});
+ TRACED_EQ(NewFeatures, {1, 2, 3});
+
+ // A Feature has a value greater than (1 << 21) and hence
+ // there are collisions in the underlying `covered features`
+ // bitvector.
+ EXPECT_TRUE(M.Parse("3\n0\nA\nB\nC\n"
+ "STARTED 0 2000\n"
+ "FT 0 1 2 3\n"
+ "STARTED 1 1000\n"
+ "FT 1 3 4 5 \n"
+ "STARTED 2 1000\n"
+ "FT 2 3 2097153 \n" // Last feature is (2^21 + 1).
+ "",
+ true));
+ EXPECT_EQ(M.SetCoverMerge(Features, &NewFeatures, Cov, &NewCov, &NewFiles),
+ 5U);
+ TRACED_EQ(M.Files, {"A", "B", "C"});
+ // File 'C' is not added because it's last feature is considered
+ // covered due to collision with feature 1.
+ TRACED_EQ(NewFiles, {"B", "A"});
+ TRACED_EQ(NewFeatures, {1, 2, 3, 4, 5});
+}
+
#undef TRACED_EQ
TEST(DFT, BlockCoverage) {
diff --git a/compiler-rt/test/fuzzer/set_cover_merge.test b/compiler-rt/test/fuzzer/set_cover_merge.test
new file mode 100644
index 0000000000000..0a5d0abb0660f
--- /dev/null
+++ b/compiler-rt/test/fuzzer/set_cover_merge.test
@@ -0,0 +1,72 @@
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+
+RUN: rm -rf %t/T1 %t/T2
+RUN: mkdir -p %t/T1 %t/T2
+RUN: echo F..... > %t/T1/1
+RUN: echo .U.... > %t/T1/2
+RUN: echo ..Z... > %t/T1/3
+
+# T1 has 3 elements, T2 is empty.
+RUN: %run %t-FullCoverageSetTest -set_cover_merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: MERGE-OUTER: 3 files, 3 in the initial corpus
+CHECK1: MERGE-OUTER: 0 new files with 0 new features added
+
+RUN: echo ...Z.. > %t/T2/1
+RUN: echo ....E. > %t/T2/2
+RUN: echo .....R > %t/T2/3
+RUN: echo F..... > %t/T2/a
+RUN: echo .U.... > %t/T2/b
+RUN: echo ..Z... > %t/T2/c
+
+# T1 has 3 elements, T2 has 6 elements, only 3 are new.
+RUN: %run %t-FullCoverageSetTest -set_cover_merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: MERGE-OUTER: 9 files, 3 in the initial corpus
+CHECK2: MERGE-OUTER: 3 new files with 3 new features added
+
+# Now, T1 has 6 units and T2 has no new interesting units.
+RUN: %run %t-FullCoverageSetTest -set_cover_merge=1 %t/T1 %t/T2 2>&1 | FileCheck %s --check-prefix=CHECK3
+CHECK3: MERGE-OUTER: 12 files, 6 in the initial corpus
+CHECK3: MERGE-OUTER: 0 new files with 0 new features added
+
+RUN: rm -rf %t/T1/* %t/T2/*
+RUN: mkdir -p %t/T3
+RUN: echo ...... > %t/T1/1
+RUN: echo F..... > %t/T2/a
+RUN: echo .U.... > %t/T2/b
+RUN: echo ..Z... > %t/T2/c
+RUN: echo ...Z.. > %t/T3/1
+RUN: echo ....E. > %t/T3/2
+RUN: echo .....R > %t/T3/3
+RUN: echo .UZZER. > %t/T3/a
+RUN: echo .UZZER.. > %t/T3/b
+RUN: echo ...... > %t/T3/c
+
+# T1 is empty, T2 and T3 have overlapping features. The optimal solution
+# consists of 2 files: T2/a, T3/a. These files cover 6 new features.
+# Although T3/a is larger in size (1 more byte) we prefer it because
+# it covers more features than any other file.
+RUN: %run %t-FullCoverageSetTest -set_cover_merge=1 %t/T1 %t/T2 %t/T3 2>&1 | FileCheck %s --check-prefix=CHECK_OVERLAP
+CHECK_OVERLAP: MERGE-OUTER: 10 files, 1 in the initial corpus
+CHECK_OVERLAP: MERGE-OUTER: 2 new files with 6 new features added
+# Make sure that we are prefering smaller files (T3/a over T3/b).
+RUN:
diff %t/T1/1b2301992a0266982b135fee5164937d7f7abca3 %t/T3/a
+
+RUN: rm -rf %t/T1/* %t/T2/* %t/T3/*
+RUN: echo ...... > %t/T1/1
+RUN: echo F..... > %t/T2/a
+RUN: echo .U.... > %t/T2/b
+RUN: echo ..Z... > %t/T2/c
+RUN: echo ...Z.. > %t/T3/1
+RUN: echo ....E. > %t/T3/2
+RUN: echo .....R > %t/T3/3
+RUN: echo .UZZER. > %t/T3/a
+RUN: echo .UZZER.. > %t/T3/b
+RUN: echo ...... > %t/T3/c
+
+# Test the previous scenario with the '-merge=1' flag.
+# Expect more files to be added to the first corpus, since '-merge=1'
+# is going to prefer smaller files and ignore the fact that
+# T3/a covers almost all of the available features.
+RUN: %run %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 %t/T3 2>&1 | FileCheck %s --check-prefix=CHECK_OVERLAP2
+CHECK_OVERLAP2: MERGE-OUTER: 10 files, 1 in the initial corpus
+CHECK_OVERLAP2: MERGE-OUTER: 6 new files with 6 new features added
More information about the llvm-commits
mailing list