summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/fuzzer/FuzzerIO.h1
-rw-r--r--lib/fuzzer/FuzzerIOPosix.cpp7
-rw-r--r--lib/fuzzer/FuzzerInternal.h2
-rw-r--r--lib/fuzzer/FuzzerLoop.cpp107
-rw-r--r--test/fuzzer/fuzzer-dirs.test2
-rw-r--r--test/fuzzer/reduce_inputs.test2
6 files changed, 68 insertions, 53 deletions
diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h
index 8ed0e003d..5059c11ac 100644
--- a/lib/fuzzer/FuzzerIO.h
+++ b/lib/fuzzer/FuzzerIO.h
@@ -53,6 +53,7 @@ void RawPrint(const char *Str);
// Platform specific functions:
bool IsFile(const std::string &Path);
+size_t FileSize(const std::string &Path);
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir);
diff --git a/lib/fuzzer/FuzzerIOPosix.cpp b/lib/fuzzer/FuzzerIOPosix.cpp
index d642b3424..2c452a7dd 100644
--- a/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/lib/fuzzer/FuzzerIOPosix.cpp
@@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
return S_ISREG(St.st_mode);
}
+size_t FileSize(const std::string &Path) {
+ struct stat St;
+ if (stat(Path.c_str(), &St))
+ return 0;
+ return St.st_size;
+}
+
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);
diff --git a/lib/fuzzer/FuzzerInternal.h b/lib/fuzzer/FuzzerInternal.h
index 70136a30b..34fdeb821 100644
--- a/lib/fuzzer/FuzzerInternal.h
+++ b/lib/fuzzer/FuzzerInternal.h
@@ -38,7 +38,6 @@ public:
void Loop(const Vector<std::string> &CorpusDirs);
void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
void MinimizeCrashLoop(const Unit &U);
- void ShuffleAndMinimize(UnitVector *V);
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
@@ -103,7 +102,6 @@ private:
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
void PrintStatusForNewUnit(const Unit &U, const char *Text);
- void ShuffleCorpus(UnitVector *V);
void CheckExitOnSrcPosOrItem();
static void StaticDeathCallback();
diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp
index 84ea2d6e8..97fc31cc8 100644
--- a/lib/fuzzer/FuzzerLoop.cpp
+++ b/lib/fuzzer/FuzzerLoop.cpp
@@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
PrintStats("RELOAD");
}
-void Fuzzer::ShuffleCorpus(UnitVector *V) {
- std::shuffle(V->begin(), V->end(), MD.GetRand());
- if (Options.PreferSmall)
- std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
- return A.size() < B.size();
- });
-}
-
-void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
- Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
- GetPeakRSSMb());
- if (Options.ShuffleAtStartUp)
- ShuffleCorpus(InitialCorpus);
-
- // Test the callback with empty input and never try it again.
- uint8_t dummy;
- ExecuteCallback(&dummy, 0);
-
- for (auto &U : *InitialCorpus) {
- RunOne(U.data(), U.size());
- CheckExitOnSrcPosOrItem();
- TryDetectingAMemoryLeak(U.data(), U.size(),
- /*DuringInitialCorpusExecution*/ true);
- U.clear();
- }
- PrintStats("INITED");
- if (Corpus.empty()) {
- Printf("ERROR: no interesting inputs were found. "
- "Is the code instrumented for coverage? Exiting.\n");
- exit(1);
- }
-}
-
void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
const size_t kMaxSaneLen = 1 << 20;
const size_t kMinDefaultLen = 4096;
- size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
- UnitVector InitialCorpus;
- for (auto &Inp : CorpusDirs) {
- Printf("Loading corpus dir: %s\n", Inp.c_str());
- ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
- TemporaryMaxLen, /*ExitOnError=*/false);
+ struct SizedFile {
+ std::string File;
+ size_t Size;
+ };
+ Vector<SizedFile> SizedFiles;
+ size_t MaxSize = 0;
+ size_t MinSize = -1;
+ size_t TotalSize = 0;
+ for (auto &Dir : CorpusDirs) {
+ Vector<std::string> Files;
+ ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
+ Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
+ for (auto &File : Files) {
+ if (size_t Size = FileSize(File)) {
+ MaxSize = Max(Size, MaxSize);
+ MinSize = Min(Size, MinSize);
+ TotalSize += Size;
+ SizedFiles.push_back({File, Size});
+ }
+ }
}
- if (Options.MaxLen == 0) {
- size_t MaxLen = 0;
- for (auto &U : InitialCorpus)
- MaxLen = std::max(U.size(), MaxLen);
- SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
+ if (Options.MaxLen == 0)
+ SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
+ assert(MaxInputLen > 0);
+
+ if (SizedFiles.empty()) {
+ Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ Unit U({'\n'}); // Valid ASCII input.
+ RunOne(U.data(), U.size());
+ } else {
+ Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
+ " rss: %zdMb\n",
+ SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
+ if (Options.ShuffleAtStartUp)
+ std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
+
+ if (Options.PreferSmall)
+ std::stable_sort(
+ SizedFiles.begin(), SizedFiles.end(),
+ [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
+
+ // Load and execute inputs one by one.
+ for (auto &SF : SizedFiles) {
+ auto U = FileToVector(SF.File, MaxInputLen);
+ assert(U.size() <= MaxInputLen);
+ RunOne(U.data(), U.size());
+ CheckExitOnSrcPosOrItem();
+ TryDetectingAMemoryLeak(U.data(), U.size(),
+ /*DuringInitialCorpusExecution*/ true);
+ }
}
- if (InitialCorpus.empty()) {
- InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input.
- if (Options.Verbosity)
- Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ // Test the callback with empty input and never try it again.
+ uint8_t dummy;
+ ExecuteCallback(&dummy, 0);
+
+ PrintStats("INITED");
+ if (Corpus.empty()) {
+ Printf("ERROR: no interesting inputs were found. "
+ "Is the code instrumented for coverage? Exiting.\n");
+ exit(1);
}
- ShuffleAndMinimize(&InitialCorpus);
}
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
diff --git a/test/fuzzer/fuzzer-dirs.test b/test/fuzzer/fuzzer-dirs.test
index ef0888f3b..9b6e4d1ee 100644
--- a/test/fuzzer/fuzzer-dirs.test
+++ b/test/fuzzer/fuzzer-dirs.test
@@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a
RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
-SUBDIRS: READ units: 3
+SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024
diff --git a/test/fuzzer/reduce_inputs.test b/test/fuzzer/reduce_inputs.test
index 02e090ebd..94f8cc4f3 100644
--- a/test/fuzzer/reduce_inputs.test
+++ b/test/fuzzer/reduce_inputs.test
@@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
# Test that reduce_inputs deletes redundant files in the corpus.
RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
-COUNT: READ units: 4
+COUNT: seed corpus: files: 4
# a bit longer test
RUN: %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -runs=1000000 2>&1 | FileCheck %s