summaryrefslogtreecommitdiff
path: root/lib/fuzzer
diff options
context:
space:
mode:
authorKostya Serebryany <kcc@google.com>2017-08-29 20:51:24 +0000
committerKostya Serebryany <kcc@google.com>2017-08-29 20:51:24 +0000
commit04d0769c4c9fe8e3204424cfc5368b4a78006d03 (patch)
treee9b3183e57698fc206a88e2d2fc3cac5325a1ba9 /lib/fuzzer
parent21ee37b2d0d672da99b525418598056ed6a7ba79 (diff)
[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@312033 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/fuzzer')
-rw-r--r--lib/fuzzer/FuzzerIO.h1
-rw-r--r--lib/fuzzer/FuzzerIOPosix.cpp7
-rw-r--r--lib/fuzzer/FuzzerInternal.h2
-rw-r--r--lib/fuzzer/FuzzerLoop.cpp107
4 files changed, 66 insertions, 51 deletions
diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h
index 8ed0e003d..5059c11ac 100644
--- a/lib/fuzzer/FuzzerIO.h
+++ b/lib/fuzzer/FuzzerIO.h
@@ -53,6 +53,7 @@ void RawPrint(const char *Str);
// Platform specific functions:
bool IsFile(const std::string &Path);
+size_t FileSize(const std::string &Path);
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir);
diff --git a/lib/fuzzer/FuzzerIOPosix.cpp b/lib/fuzzer/FuzzerIOPosix.cpp
index d642b3424..2c452a7dd 100644
--- a/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/lib/fuzzer/FuzzerIOPosix.cpp
@@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
return S_ISREG(St.st_mode);
}
+size_t FileSize(const std::string &Path) {
+ struct stat St;
+ if (stat(Path.c_str(), &St))
+ return 0;
+ return St.st_size;
+}
+
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);
diff --git a/lib/fuzzer/FuzzerInternal.h b/lib/fuzzer/FuzzerInternal.h
index 70136a30b..34fdeb821 100644
--- a/lib/fuzzer/FuzzerInternal.h
+++ b/lib/fuzzer/FuzzerInternal.h
@@ -38,7 +38,6 @@ public:
void Loop(const Vector<std::string> &CorpusDirs);
void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
void MinimizeCrashLoop(const Unit &U);
- void ShuffleAndMinimize(UnitVector *V);
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
@@ -103,7 +102,6 @@ private:
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
void PrintStatusForNewUnit(const Unit &U, const char *Text);
- void ShuffleCorpus(UnitVector *V);
void CheckExitOnSrcPosOrItem();
static void StaticDeathCallback();
diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp
index 84ea2d6e8..97fc31cc8 100644
--- a/lib/fuzzer/FuzzerLoop.cpp
+++ b/lib/fuzzer/FuzzerLoop.cpp
@@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
PrintStats("RELOAD");
}
-void Fuzzer::ShuffleCorpus(UnitVector *V) {
- std::shuffle(V->begin(), V->end(), MD.GetRand());
- if (Options.PreferSmall)
- std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
- return A.size() < B.size();
- });
-}
-
-void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
- Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
- GetPeakRSSMb());
- if (Options.ShuffleAtStartUp)
- ShuffleCorpus(InitialCorpus);
-
- // Test the callback with empty input and never try it again.
- uint8_t dummy;
- ExecuteCallback(&dummy, 0);
-
- for (auto &U : *InitialCorpus) {
- RunOne(U.data(), U.size());
- CheckExitOnSrcPosOrItem();
- TryDetectingAMemoryLeak(U.data(), U.size(),
- /*DuringInitialCorpusExecution*/ true);
- U.clear();
- }
- PrintStats("INITED");
- if (Corpus.empty()) {
- Printf("ERROR: no interesting inputs were found. "
- "Is the code instrumented for coverage? Exiting.\n");
- exit(1);
- }
-}
-
void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
const size_t kMaxSaneLen = 1 << 20;
const size_t kMinDefaultLen = 4096;
- size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
- UnitVector InitialCorpus;
- for (auto &Inp : CorpusDirs) {
- Printf("Loading corpus dir: %s\n", Inp.c_str());
- ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
- TemporaryMaxLen, /*ExitOnError=*/false);
+ struct SizedFile {
+ std::string File;
+ size_t Size;
+ };
+ Vector<SizedFile> SizedFiles;
+ size_t MaxSize = 0;
+ size_t MinSize = -1;
+ size_t TotalSize = 0;
+ for (auto &Dir : CorpusDirs) {
+ Vector<std::string> Files;
+ ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
+ Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
+ for (auto &File : Files) {
+ if (size_t Size = FileSize(File)) {
+ MaxSize = Max(Size, MaxSize);
+ MinSize = Min(Size, MinSize);
+ TotalSize += Size;
+ SizedFiles.push_back({File, Size});
+ }
+ }
}
- if (Options.MaxLen == 0) {
- size_t MaxLen = 0;
- for (auto &U : InitialCorpus)
- MaxLen = std::max(U.size(), MaxLen);
- SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
+ if (Options.MaxLen == 0)
+ SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
+ assert(MaxInputLen > 0);
+
+ if (SizedFiles.empty()) {
+ Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ Unit U({'\n'}); // Valid ASCII input.
+ RunOne(U.data(), U.size());
+ } else {
+ Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
+ " rss: %zdMb\n",
+ SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
+ if (Options.ShuffleAtStartUp)
+ std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
+
+ if (Options.PreferSmall)
+ std::stable_sort(
+ SizedFiles.begin(), SizedFiles.end(),
+ [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
+
+ // Load and execute inputs one by one.
+ for (auto &SF : SizedFiles) {
+ auto U = FileToVector(SF.File, MaxInputLen);
+ assert(U.size() <= MaxInputLen);
+ RunOne(U.data(), U.size());
+ CheckExitOnSrcPosOrItem();
+ TryDetectingAMemoryLeak(U.data(), U.size(),
+ /*DuringInitialCorpusExecution*/ true);
+ }
}
- if (InitialCorpus.empty()) {
- InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input.
- if (Options.Verbosity)
- Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ // Test the callback with empty input and never try it again.
+ uint8_t dummy;
+ ExecuteCallback(&dummy, 0);
+
+ PrintStats("INITED");
+ if (Corpus.empty()) {
+ Printf("ERROR: no interesting inputs were found. "
+ "Is the code instrumented for coverage? Exiting.\n");
+ exit(1);
}
- ShuffleAndMinimize(&InitialCorpus);
}
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {