diff options
author | Kostya Serebryany <kcc@google.com> | 2017-08-29 20:51:24 +0000 |
---|---|---|
committer | Kostya Serebryany <kcc@google.com> | 2017-08-29 20:51:24 +0000 |
commit | 04d0769c4c9fe8e3204424cfc5368b4a78006d03 (patch) | |
tree | e9b3183e57698fc206a88e2d2fc3cac5325a1ba9 /lib/fuzzer | |
parent | 21ee37b2d0d672da99b525418598056ed6a7ba79 (diff) |
[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@312033 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/fuzzer')
-rw-r--r-- | lib/fuzzer/FuzzerIO.h | 1 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerIOPosix.cpp | 7 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerInternal.h | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerLoop.cpp | 107 |
4 files changed, 66 insertions, 51 deletions
diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h index 8ed0e003d..5059c11ac 100644 --- a/lib/fuzzer/FuzzerIO.h +++ b/lib/fuzzer/FuzzerIO.h @@ -53,6 +53,7 @@ void RawPrint(const char *Str); // Platform specific functions: bool IsFile(const std::string &Path); +size_t FileSize(const std::string &Path); void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector<std::string> *V, bool TopDir); diff --git a/lib/fuzzer/FuzzerIOPosix.cpp b/lib/fuzzer/FuzzerIOPosix.cpp index d642b3424..2c452a7dd 100644 --- a/lib/fuzzer/FuzzerIOPosix.cpp +++ b/lib/fuzzer/FuzzerIOPosix.cpp @@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) { return S_ISREG(St.st_mode); } +size_t FileSize(const std::string &Path) { + struct stat St; + if (stat(Path.c_str(), &St)) + return 0; + return St.st_size; +} + void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, Vector<std::string> *V, bool TopDir) { auto E = GetEpoch(Dir); diff --git a/lib/fuzzer/FuzzerInternal.h b/lib/fuzzer/FuzzerInternal.h index 70136a30b..34fdeb821 100644 --- a/lib/fuzzer/FuzzerInternal.h +++ b/lib/fuzzer/FuzzerInternal.h @@ -38,7 +38,6 @@ public: void Loop(const Vector<std::string> &CorpusDirs); void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs); void MinimizeCrashLoop(const Unit &U); - void ShuffleAndMinimize(UnitVector *V); void RereadOutputCorpus(size_t MaxSize); size_t secondsSinceProcessStartUp() { @@ -103,7 +102,6 @@ private: void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); void PrintStatusForNewUnit(const Unit &U, const char *Text); - void ShuffleCorpus(UnitVector *V); void CheckExitOnSrcPosOrItem(); static void StaticDeathCallback(); diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp index 84ea2d6e8..97fc31cc8 100644 --- a/lib/fuzzer/FuzzerLoop.cpp +++ b/lib/fuzzer/FuzzerLoop.cpp @@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { PrintStats("RELOAD"); } -void Fuzzer::ShuffleCorpus(UnitVector *V) { - std::shuffle(V->begin(), V->end(), MD.GetRand()); - if (Options.PreferSmall) - std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) { - return A.size() < B.size(); - }); -} - -void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { - Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(), - GetPeakRSSMb()); - if (Options.ShuffleAtStartUp) - ShuffleCorpus(InitialCorpus); - - // Test the callback with empty input and never try it again. - uint8_t dummy; - ExecuteCallback(&dummy, 0); - - for (auto &U : *InitialCorpus) { - RunOne(U.data(), U.size()); - CheckExitOnSrcPosOrItem(); - TryDetectingAMemoryLeak(U.data(), U.size(), - /*DuringInitialCorpusExecution*/ true); - U.clear(); - } - PrintStats("INITED"); - if (Corpus.empty()) { - Printf("ERROR: no interesting inputs were found. " - "Is the code instrumented for coverage? Exiting.\n"); - exit(1); - } -} - void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { auto TimeOfUnit = duration_cast<seconds>(UnitStopTime - UnitStartTime).count(); @@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() { void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) { const size_t kMaxSaneLen = 1 << 20; const size_t kMinDefaultLen = 4096; - size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - UnitVector InitialCorpus; - for (auto &Inp : CorpusDirs) { - Printf("Loading corpus dir: %s\n", Inp.c_str()); - ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, - TemporaryMaxLen, /*ExitOnError=*/false); + struct SizedFile { + std::string File; + size_t Size; + }; + Vector<SizedFile> SizedFiles; + size_t MaxSize = 0; + size_t MinSize = -1; + size_t TotalSize = 0; + for (auto &Dir : CorpusDirs) { + Vector<std::string> Files; + ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true); + Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str()); + for (auto &File : Files) { + if (size_t Size = FileSize(File)) { + MaxSize = Max(Size, MaxSize); + MinSize = Min(Size, MinSize); + TotalSize += Size; + SizedFiles.push_back({File, Size}); + } + } } - if (Options.MaxLen == 0) { - size_t MaxLen = 0; - for (auto &U : InitialCorpus) - MaxLen = std::max(U.size(), MaxLen); - SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen)); + if (Options.MaxLen == 0) + SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen)); + assert(MaxInputLen > 0); + + if (SizedFiles.empty()) { + Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); + Unit U({'\n'}); // Valid ASCII input. + RunOne(U.data(), U.size()); + } else { + Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb" + " rss: %zdMb\n", + SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb()); + if (Options.ShuffleAtStartUp) + std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand()); + + if (Options.PreferSmall) + std::stable_sort( + SizedFiles.begin(), SizedFiles.end(), + [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; }); + + // Load and execute inputs one by one. + for (auto &SF : SizedFiles) { + auto U = FileToVector(SF.File, MaxInputLen); + assert(U.size() <= MaxInputLen); + RunOne(U.data(), U.size()); + CheckExitOnSrcPosOrItem(); + TryDetectingAMemoryLeak(U.data(), U.size(), + /*DuringInitialCorpusExecution*/ true); + } } - if (InitialCorpus.empty()) { - InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input. - if (Options.Verbosity) - Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); + // Test the callback with empty input and never try it again. + uint8_t dummy; + ExecuteCallback(&dummy, 0); + + PrintStats("INITED"); + if (Corpus.empty()) { + Printf("ERROR: no interesting inputs were found. " + "Is the code instrumented for coverage? Exiting.\n"); + exit(1); } - ShuffleAndMinimize(&InitialCorpus); } void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) { |