From 452879a466facf705d4f5b12617c2396b5689f35 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 11 Oct 2017 01:44:26 +0000 Subject: [libFuzzer] experimental flag to tweak the corpus distribution. Seems to improve the situation dramatically on the png benchmark and make things worse on a number of micro-puzzles. Needs more A/B testing git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@315407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/fuzzer/FuzzerCorpus.h | 32 +++++++++++++++++++++++--------- lib/fuzzer/FuzzerDriver.cpp | 1 + lib/fuzzer/FuzzerFlags.def | 1 + lib/fuzzer/FuzzerLoop.cpp | 3 +++ lib/fuzzer/FuzzerOptions.h | 1 + 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/lib/fuzzer/FuzzerCorpus.h b/lib/fuzzer/FuzzerCorpus.h index 2384d5082..c9f7cc9dd 100644 --- a/lib/fuzzer/FuzzerCorpus.h +++ b/lib/fuzzer/FuzzerCorpus.h @@ -36,6 +36,7 @@ struct InputInfo { bool MayDeleteFile = false; bool Reduced = false; Vector UniqFeatureSet; + float FeatureFrequencyScore = 1.0; }; class InputCorpus { @@ -44,6 +45,7 @@ class InputCorpus { InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); + memset(FeatureFrequency, 0, sizeof(FeatureFrequency)); } ~InputCorpus() { for (auto II : Inputs) @@ -134,6 +136,7 @@ class InputCorpus { Hashes.insert(Sha1ToString(II->Sha1)); II->U = U; II->Reduced = true; + UpdateCorpusDistribution(); } bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } @@ -145,8 +148,6 @@ class InputCorpus { }; // Returns an index of random unit from the corpus to mutate. - // Hypothesis: units added to the corpus last are more likely to be - // interesting. This function gives more weight to the more recent units. size_t ChooseUnitIdxToMutate(Random &Rand) { size_t Idx = static_cast(CorpusDistribution(Rand)); assert(Idx < Inputs.size()); @@ -212,15 +213,22 @@ class InputCorpus { return false; } + void UpdateFeatureFrequency(size_t Idx) { + FeatureFrequency[Idx % kFeatureSetSize]++; + } + float GetFeatureFrequency(size_t Idx) const { + return FeatureFrequency[Idx % kFeatureSetSize]; + } + void UpdateFeatureFrequencyScore(InputInfo *II) { + II->FeatureFrequencyScore = 0.00000001; + for (auto Idx : II->UniqFeatureSet) + II->FeatureFrequencyScore += + 1. / (GetFeatureFrequency(Idx) * GetFeatureFrequency(Idx) + 1.); + } + size_t NumFeatures() const { return NumAddedFeatures; } size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } - void ResetFeatureSet() { - assert(Inputs.empty()); - memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); - memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); - } - private: static const bool FeatureDebug = false; @@ -243,6 +251,10 @@ private: // Updates the probability distribution for the units in the corpus. // Must be called whenever the corpus or unit weights are changed. + // + // Hypothesis: units added to the corpus last are more interesting. + // + // Hypothesis: inputs with infrequent features are more interesting. void UpdateCorpusDistribution() { size_t N = Inputs.size(); assert(N); @@ -250,7 +262,8 @@ private: Weights.resize(N); std::iota(Intervals.begin(), Intervals.end(), 0); for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures * (i + 1); + Weights[i] = + Inputs[i]->NumFeatures * (i + 1) * Inputs[i]->FeatureFrequencyScore; CorpusDistribution = std::piecewise_constant_distribution( Intervals.begin(), Intervals.end(), Weights.begin()); } @@ -266,6 +279,7 @@ private: size_t NumUpdatedFeatures = 0; uint32_t InputSizesPerFeature[kFeatureSetSize]; uint32_t SmallestElementPerFeature[kFeatureSetSize]; + float FeatureFrequency[kFeatureSetSize]; std::string OutputCorpus; }; diff --git a/lib/fuzzer/FuzzerDriver.cpp b/lib/fuzzer/FuzzerDriver.cpp index dcf8d505f..29248dcee 100644 --- a/lib/fuzzer/FuzzerDriver.cpp +++ b/lib/fuzzer/FuzzerDriver.cpp @@ -605,6 +605,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.PrintCoverage = Flags.print_coverage; Options.DumpCoverage = Flags.dump_coverage; Options.UseClangCoverage = Flags.use_clang_coverage; + Options.UseFeatureFrequency = Flags.use_feature_frequency; if (Flags.exit_on_src_pos) Options.ExitOnSrcPos = Flags.exit_on_src_pos; if (Flags.exit_on_item) diff --git a/lib/fuzzer/FuzzerFlags.def b/lib/fuzzer/FuzzerFlags.def index a23818fd5..41b94c69c 100644 --- a/lib/fuzzer/FuzzerFlags.def +++ b/lib/fuzzer/FuzzerFlags.def @@ -132,6 +132,7 @@ FUZZER_FLAG_STRING(run_equivalence_server, "Experimental") FUZZER_FLAG_STRING(use_equivalence_server, "Experimental") FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_FLAG_INT(use_clang_coverage, 0, "Experimental") +FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental") FUZZER_DEPRECATED_FLAG(exit_on_first) FUZZER_DEPRECATED_FLAG(save_minimized_corpus) diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp index 4ccfe3f2a..c095fed12 100644 --- a/lib/fuzzer/FuzzerLoop.cpp +++ b/lib/fuzzer/FuzzerLoop.cpp @@ -396,6 +396,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, size_t FoundUniqFeaturesOfII = 0; size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); TPC.CollectFeatures([&](size_t Feature) { + Corpus.UpdateFeatureFrequency(Feature); if (Corpus.AddFeature(Feature, Size, Options.Shrink)) UniqFeatureSetTmp.push_back(Feature); if (Options.ReduceInputs && II) @@ -565,6 +566,8 @@ void Fuzzer::MutateAndTestOne() { MD.StartMutationSequence(); auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); + if (Options.UseFeatureFrequency) + Corpus.UpdateFeatureFrequencyScore(&II); const auto &U = II.U; memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); assert(CurrentUnitData); diff --git a/lib/fuzzer/FuzzerOptions.h b/lib/fuzzer/FuzzerOptions.h index ddf6e4201..e57c7df5b 100644 --- a/lib/fuzzer/FuzzerOptions.h +++ b/lib/fuzzer/FuzzerOptions.h @@ -54,6 +54,7 @@ struct FuzzingOptions { bool DumpCoverage = false; bool UseClangCoverage = false; bool DetectLeaks = true; + int UseFeatureFrequency = false; int TraceMalloc = 0; bool HandleAbrt = false; bool HandleBus = false; -- cgit v1.2.3