diff options
author | Kostya Serebryany <kcc@google.com> | 2018-07-19 01:23:32 +0000 |
---|---|---|
committer | Kostya Serebryany <kcc@google.com> | 2018-07-19 01:23:32 +0000 |
commit | b245ab336ef0cef374b36d0fa4be432b3e6afcd2 (patch) | |
tree | c20a0ef23c15af14c1c7be55749b4ed7b4dba6b2 /lib | |
parent | 385df7539a532c43119655dec04b7ac50111b2cb (diff) |
[libFuzzer] first experimental attempt at DFT-based mutations (DFT=data-flow-trace)
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@337434 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/fuzzer/FuzzerCorpus.h | 9 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerDataFlowTrace.cpp | 2 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerDataFlowTrace.h | 4 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerLoop.cpp | 10 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerMutate.cpp | 27 | ||||
-rw-r--r-- | lib/fuzzer/FuzzerMutate.h | 8 | ||||
-rw-r--r-- | lib/fuzzer/tests/FuzzerUnittest.cpp | 3 |
7 files changed, 54 insertions, 9 deletions
diff --git a/lib/fuzzer/FuzzerCorpus.h b/lib/fuzzer/FuzzerCorpus.h index c1603e1ad..f844c07c7 100644 --- a/lib/fuzzer/FuzzerCorpus.h +++ b/lib/fuzzer/FuzzerCorpus.h @@ -38,7 +38,7 @@ struct InputInfo { bool Reduced = false; bool HasFocusFunction = false; Vector<uint32_t> UniqFeatureSet; - Vector<bool> DataFlowTraceForFocusFunction; + Vector<uint8_t> DataFlowTraceForFocusFunction; }; class InputCorpus { @@ -88,7 +88,7 @@ class InputCorpus { const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, bool HasFocusFunction, const Vector<uint32_t> &FeatureSet, - const DataFlowTrace &DFT) { + const DataFlowTrace &DFT, const InputInfo *BaseII) { assert(!U.empty()); if (FeatureDebug) Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); @@ -106,6 +106,11 @@ class InputCorpus { if (HasFocusFunction) if (auto V = DFT.Get(Sha1Str)) II.DataFlowTraceForFocusFunction = *V; + // This is a gross heuristic. + // Ideally, when we add an element to a corpus we need to know its DFT. + // But if we don't, we'll use the DFT of its base input. + if (II.DataFlowTraceForFocusFunction.empty() && BaseII) + II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; UpdateCorpusDistribution(); PrintCorpus(); // ValidateFeatureSet(); diff --git a/lib/fuzzer/FuzzerDataFlowTrace.cpp b/lib/fuzzer/FuzzerDataFlowTrace.cpp index 114034c5b..764f3e49f 100644 --- a/lib/fuzzer/FuzzerDataFlowTrace.cpp +++ b/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -67,7 +67,7 @@ void DataFlowTrace::Init(const std::string &DirPath, const char *End = L.c_str() + L.size(); assert(Beg < End); size_t Len = End - Beg; - Vector<bool> V(Len); + Vector<uint8_t> V(Len); for (size_t I = 0; I < Len; I++) { if (Beg[I] != '0' && Beg[I] != '1') ParseError("the trace should contain only 0 or 1"); diff --git a/lib/fuzzer/FuzzerDataFlowTrace.h b/lib/fuzzer/FuzzerDataFlowTrace.h index 1511430c3..ad4faeab7 100644 --- a/lib/fuzzer/FuzzerDataFlowTrace.h +++ b/lib/fuzzer/FuzzerDataFlowTrace.h @@ -40,7 +40,7 @@ class DataFlowTrace { public: void Init(const std::string &DirPath, const std::string &FocusFunction); void Clear() { Traces.clear(); } - const Vector<bool> *Get(const std::string &InputSha1) const { + const Vector<uint8_t> *Get(const std::string &InputSha1) const { auto It = Traces.find(InputSha1); if (It != Traces.end()) return &It->second; @@ -49,7 +49,7 @@ class DataFlowTrace { private: // Input's sha1 => DFT for the FocusFunction. - std::unordered_map<std::string, Vector<bool> > Traces; + std::unordered_map<std::string, Vector<uint8_t> > Traces; }; } // namespace fuzzer diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp index 1ba0765fa..ffcd3419c 100644 --- a/lib/fuzzer/FuzzerLoop.cpp +++ b/lib/fuzzer/FuzzerLoop.cpp @@ -503,8 +503,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, if (NumNewFeatures) { TPC.UpdateObservedPCs(); Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, - TPC.ObservedFocusFunction(), - UniqFeatureSetTmp, DFT); + TPC.ObservedFocusFunction(), UniqFeatureSetTmp, DFT, II); return true; } if (II && FoundUniqFeaturesOfII && @@ -687,7 +686,12 @@ void Fuzzer::MutateAndTestOne() { break; MaybeExitGracefully(); size_t NewSize = 0; - NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); + if (II.HasFocusFunction && !II.DataFlowTraceForFocusFunction.empty() && + Size <= CurrentMaxMutationLen) + NewSize = MD.MutateWithMask(CurrentUnitData, Size, Size, + II.DataFlowTraceForFocusFunction); + else + NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); assert(NewSize > 0 && "Mutator returned empty unit"); assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit"); Size = NewSize; diff --git a/lib/fuzzer/FuzzerMutate.cpp b/lib/fuzzer/FuzzerMutate.cpp index e89e1a475..9c2b8a562 100644 --- a/lib/fuzzer/FuzzerMutate.cpp +++ b/lib/fuzzer/FuzzerMutate.cpp @@ -529,6 +529,33 @@ size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, return 1; // Fallback, should not happen frequently. } +// Mask represents the set of Data bytes that are worth mutating. +size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, + size_t MaxSize, + const Vector<uint8_t> &Mask) { + assert(Size <= Mask.size()); + // * Copy the worthy bytes into a temporary array T + // * Mutate T + // * Copy T back. + // This is totally unoptimized. + auto &T = MutateWithMaskTemp; + if (T.size() < Size) + T.resize(Size); + size_t OneBits = 0; + for (size_t I = 0; I < Size; I++) + if (Mask[I]) + T[OneBits++] = Data[I]; + + assert(!T.empty()); + size_t NewSize = Mutate(T.data(), OneBits, OneBits); + assert(NewSize <= OneBits); + // Even if NewSize < OneBits we still use all OneBits bytes. + for (size_t I = 0, J = 0; I < Size; I++) + if (Mask[I]) + Data[I] = T[J++]; + return Size; +} + void MutationDispatcher::AddWordToManualDictionary(const Word &W) { ManualDictionary.push_back( {W, std::numeric_limits<size_t>::max()}); diff --git a/lib/fuzzer/FuzzerMutate.h b/lib/fuzzer/FuzzerMutate.h index d8c1ddc52..828ecc13d 100644 --- a/lib/fuzzer/FuzzerMutate.h +++ b/lib/fuzzer/FuzzerMutate.h @@ -70,6 +70,13 @@ public: /// Applies one of the configured mutations. /// Returns the new size of data which could be up to MaxSize. size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations to the bytes of Data + /// that have '1' in Mask. + /// Mask.size() should be >= Size. + size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, + const Vector<uint8_t> &Mask); + /// Applies one of the default mutations. Provided as a service /// to mutation authors. size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); @@ -142,6 +149,7 @@ public: const InputCorpus *Corpus = nullptr; Vector<uint8_t> MutateInPlaceHere; + Vector<uint8_t> MutateWithMaskTemp; // CustomCrossOver needs its own buffer as a custom implementation may call // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. Vector<uint8_t> CustomCrossOverInPlaceHere; diff --git a/lib/fuzzer/tests/FuzzerUnittest.cpp b/lib/fuzzer/tests/FuzzerUnittest.cpp index 1b3a0934a..e3b067026 100644 --- a/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -588,7 +588,8 @@ TEST(Corpus, Distribution) { size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {}, DFT); + C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT, + nullptr); Vector<size_t> Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { |