[CodeView] Refactor / Rewrite TypeSerializer and TypeTableBuilder.

The motivation behind this patch is that future directions require us to be able to compute the hash value of records independently of actually using them for de-duplication. The current structure of TypeSerializer / TypeTableBuilder being a single entry point that takes an unserialized type record, and then hashes and de-duplicates it is not flexible enough to allow this. At the same time, the existing TypeSerializer is already extremely complex for this very reason -- it tries to be too many things. In addition to serializing, hashing, and de-duplicating, ti also supports splitting up field list records and adding continuations. All of this functionality crammed into this one class makes it very complicated to work with and hard to maintain. To solve all of these problems, I've re-written everything from scratch and split the functionality into separate pieces that can easily be reused. The end result is that one class TypeSerializer is turned into 3 new classes SimpleTypeSerializer, ContinuationRecordBuilder, and TypeTableBuilder, each of which in isolation is simple and straightforward. A quick summary of these new classes and their responsibilities are: - SimpleTypeSerializer : Turns a non-FieldList leaf type into a series of bytes. Does not do any hashing. Every time you call it, it will re-serialize and return bytes again. The same instance can be re-used over and over to avoid re-allocations, and in exchange for this optimization the bytes returned by the serializer only live until the caller attempts to serialize a new record. - ContinuationRecordBuilder : Turns a FieldList-like record into a series of fragments. Does not do any hashing. Like SimpleTypeSerializer, returns references to privately owned bytes, so the storage is invalidated as soon as the caller tries to re-use the instance. Works equally well for LF_FIELDLIST as it does for LF_METHODLIST, solving a long-standing theoretical limitation of the previous implementation. - TypeTableBuilder : Accepts sequences of bytes that the user has already serialized, and inserts them by de-duplicating with a hash table. For the sake of convenience and efficiency, this class internally stores a SimpleTypeSerializer so that it can accept unserialized records. The same is not true of ContinuationRecordBuilder. The user is required to create their own instance of ContinuationRecordBuilder. Differential Revision: https://reviews.llvm.org/D40518 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319198 91177308-0d34-0410-b5e6-96231b3b80d8
author: Zachary Turner <zturner@google.com> 2017-11-28 18:33:17 +0000
committer: Zachary Turner <zturner@google.com> 2017-11-28 18:33:17 +0000
commit: ab45c0673e7510b52995f560cce64b002601ed9f (patch)
tree: 5070004d67b7856a64a77c523faeb3c426adbc1c /lib/DebugInfo
parent: a710db28b7f3ef5d7fe54930822b88ec8901c02f (diff)
8 files changed, 530 insertions, 393 deletions
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index b0cefe64fdd..566ebaeae80 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMDebugInfoCodeView
   CodeViewError.cpp
   CodeViewRecordIO.cpp
+  ContinuationRecordBuilder.cpp
   CVSymbolVisitor.cpp
   CVTypeVisitor.cpp
   DebugChecksumsSubsection.cpp
@@ -21,6 +22,7 @@ add_llvm_library(LLVMDebugInfoCodeView
   Line.cpp
   RecordName.cpp
   RecordSerialization.cpp
+  SimpleTypeSerializer.cpp
   StringsAndChecksums.cpp
   SymbolRecordMapping.cpp
   SymbolDumper.cpp
@@ -29,7 +31,7 @@ add_llvm_library(LLVMDebugInfoCodeView
   TypeIndex.cpp
   TypeIndexDiscovery.cpp
   TypeRecordMapping.cpp
-  TypeSerializer.cpp
+  TypeTableBuilder.cpp
   TypeStreamMerger.cpp
   TypeTableCollection.cpp
 
diff --git a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
new file mode 100644
index 00000000000..40e794b3b7b
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -0,0 +1,256 @@
+#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+namespace {
+struct ContinuationRecord {
+  ulittle16_t Kind{uint16_t(TypeLeafKind::LF_INDEX)};
+  ulittle16_t Size{0};
+  ulittle32_t IndexRef{0xB0C0B0C0};
+};
+
+struct SegmentInjection {
+  SegmentInjection(TypeLeafKind Kind) { Prefix.RecordKind = Kind; }
+
+  ContinuationRecord Cont;
+  RecordPrefix Prefix;
+};
+} // namespace
+
+static void addPadding(BinaryStreamWriter &Writer) {
+  uint32_t Align = Writer.getOffset() % 4;
+  if (Align == 0)
+    return;
+
+  int PaddingBytes = 4 - Align;
+  while (PaddingBytes > 0) {
+    uint8_t Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
+    cantFail(Writer.writeInteger(Pad));
+    --PaddingBytes;
+  }
+}
+
+static SegmentInjection InjectFieldList(TypeLeafKind::LF_FIELDLIST);
+static SegmentInjection InjectMethodOverloadList(TypeLeafKind::LF_METHODLIST);
+
+static constexpr uint32_t ContinuationLength = sizeof(ContinuationRecord);
+static constexpr uint32_t MaxSegmentLength =
+    MaxRecordLength - ContinuationLength;
+
+static inline TypeLeafKind getTypeLeafKind(ContinuationRecordKind CK) {
+  return (CK == ContinuationRecordKind::FieldList) ? LF_FIELDLIST
+                                                   : LF_METHODLIST;
+}
+
+ContinuationRecordBuilder::ContinuationRecordBuilder()
+    : SegmentWriter(Buffer), Mapping(SegmentWriter) {}
+
+ContinuationRecordBuilder::~ContinuationRecordBuilder() {}
+
+void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) {
+  assert(!Kind.hasValue());
+  Kind = RecordKind;
+  Buffer.clear();
+  SegmentWriter.setOffset(0);
+  SegmentOffsets.clear();
+  SegmentOffsets.push_back(0);
+  assert(SegmentWriter.getOffset() == 0);
+  assert(SegmentWriter.getLength() == 0);
+
+  const SegmentInjection *FLI =
+      (RecordKind == ContinuationRecordKind::FieldList)
+          ? &InjectFieldList
+          : &InjectMethodOverloadList;
+  const uint8_t *FLIB = reinterpret_cast<const uint8_t *>(FLI);
+  InjectedSegmentBytes =
+      ArrayRef<uint8_t>(FLIB, FLIB + sizeof(SegmentInjection));
+
+  CVType Type;
+  Type.Type = getTypeLeafKind(RecordKind);
+  cantFail(Mapping.visitTypeBegin(Type));
+
+  // Seed the first trecord with an appropriate record prefix.
+  RecordPrefix Prefix;
+  Prefix.RecordLen = 0;
+  Prefix.RecordKind = Type.Type;
+  cantFail(SegmentWriter.writeObject(Prefix));
+}
+
+template <typename RecordType>
+void ContinuationRecordBuilder::writeMemberType(RecordType &Record) {
+  assert(Kind.hasValue());
+
+  uint32_t OriginalOffset = SegmentWriter.getOffset();
+  CVMemberRecord CVMR;
+  CVMR.Kind = static_cast<TypeLeafKind>(Record.getKind());
+
+  // Member Records aren't length-prefixed, they only have a 2-byte TypeLeafKind
+  // at the beginning.
+  cantFail(SegmentWriter.writeEnum(CVMR.Kind));
+
+  // Let the Mapping handle the rest.
+  cantFail(Mapping.visitMemberBegin(CVMR));
+  cantFail(Mapping.visitKnownMember(CVMR, Record));
+  cantFail(Mapping.visitMemberEnd(CVMR));
+
+  // Make sure it's padded to 4 bytes.
+  addPadding(SegmentWriter);
+  assert(getCurrentSegmentLength() % 4 == 0);
+
+  // The maximum length of a single segment is 64KB minus the size to insert a
+  // continuation.  So if we are over that, inject a continuation between the
+  // previous member and the member that was just written, then end the previous
+  // segment after the continuation and begin a new one with the just-written
+  // member.
+  if (getCurrentSegmentLength() > MaxSegmentLength) {
+    // We need to inject some bytes before the member we just wrote but after
+    // the previous member.  Save off the length of the member we just wrote so
+    // that we can do some sanity checking on it.
+    uint32_t MemberLength = SegmentWriter.getOffset() - OriginalOffset;
+    insertSegmentEnd(OriginalOffset);
+    // Since this member now becomes a new top-level record, it should have
+    // gotten a RecordPrefix injected, and that RecordPrefix + the member we
+    // just wrote should now constitute the entirety of the current "new"
+    // segment.
+    assert(getCurrentSegmentLength() == MemberLength + sizeof(RecordPrefix));
+  }
+
+  assert(getCurrentSegmentLength() % 4 == 0);
+  assert(getCurrentSegmentLength() <= MaxSegmentLength);
+}
+
+uint32_t ContinuationRecordBuilder::getCurrentSegmentLength() const {
+  return SegmentWriter.getOffset() - SegmentOffsets.back();
+}
+
+void ContinuationRecordBuilder::insertSegmentEnd(uint32_t Offset) {
+  uint32_t SegmentBegin = SegmentOffsets.back();
+  assert(Offset > SegmentBegin);
+  assert(Offset - SegmentBegin <= MaxSegmentLength);
+
+  // We need to make space for the continuation record.  For now we can't fill
+  // out the length or the TypeIndex of the back-reference, but we need the
+  // space to at least be there.
+  Buffer.insert(Offset, InjectedSegmentBytes);
+
+  uint32_t NewSegmentBegin = Offset + ContinuationLength;
+  uint32_t SegmentLength = NewSegmentBegin - SegmentOffsets.back();
+
+  assert(SegmentLength % 4 == 0);
+  assert(SegmentLength <= MaxRecordLength);
+  SegmentOffsets.push_back(NewSegmentBegin);
+
+  // Seek to the end so that we can keep writing against the new segment.
+  SegmentWriter.setOffset(SegmentWriter.getLength());
+  assert(SegmentWriter.bytesRemaining() == 0);
+}
+
+CVType ContinuationRecordBuilder::createSegmentRecord(
+    uint32_t OffBegin, uint32_t OffEnd, Optional<TypeIndex> RefersTo) {
+  assert(OffEnd - OffBegin <= USHRT_MAX);
+
+  MutableArrayRef<uint8_t> Data = Buffer.data();
+  Data = Data.slice(OffBegin, OffEnd - OffBegin);
+
+  CVType Type;
+  Type.Type = getTypeLeafKind(*Kind);
+  Type.RecordData = Data;
+
+  // Write the length to the RecordPrefix, making sure it does not include
+  // sizeof(RecordPrefix.Length)
+  RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(Data.data());
+  assert(Prefix->RecordKind == Type.Type);
+  Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen);
+
+  if (RefersTo.hasValue()) {
+    auto Continuation = Data.take_back(ContinuationLength);
+    ContinuationRecord *CR =
+        reinterpret_cast<ContinuationRecord *>(Continuation.data());
+    assert(CR->Kind == TypeLeafKind::LF_INDEX);
+    assert(CR->IndexRef == 0xB0C0B0C0);
+    CR->IndexRef = RefersTo->getIndex();
+  }
+
+  return Type;
+}
+
+std::vector<CVType> ContinuationRecordBuilder::end(TypeIndex Index) {
+  CVType Type;
+  Type.Type = getTypeLeafKind(*Kind);
+  cantFail(Mapping.visitTypeEnd(Type));
+
+  // We're now done, and we have a series of segments each beginning at an
+  // offset specified in the SegmentOffsets array.  We now need to iterate
+  // over each segment and post-process them in the following two ways:
+  // 1) Each top-level record has a RecordPrefix whose type is either
+  //    LF_FIELDLIST or LF_METHODLIST, but the Length field is still 0.
+  //    Those should all be set to the correct length now.
+  // 2) Each continuation record has an IndexRef field which we set to the
+  //    magic value 0xB0C0B0C0.  Now that the caller has told us the TypeIndex
+  //    they want this sequence to start from, we can go through and update
+  //    each one.
+  //
+  // Logically, the sequence of records we've built up looks like this:
+  //
+  // SegmentOffsets[0]:   <Length>                    (Initially: uninitialized)
+  // SegmentOffsets[0]+2: LF_FIELDLIST
+  // SegmentOffsets[0]+4: Member[0]
+  // SegmentOffsets[0]+?: ...
+  // SegmentOffsets[0]+?: Member[4]
+  // SegmentOffsets[1]-8: LF_INDEX
+  // SegmentOffsets[1]-6: 0
+  // SegmentOffsets[1]-4: <Type Index of Next Record> (Initially: 0xB0C0B0C0)
+  //
+  // SegmentOffsets[1]:   <Length>                    (Initially: uninitialized)
+  // SegmentOffsets[1]+2: LF_FIELDLIST
+  // SegmentOffsets[1]+4: Member[0]
+  // SegmentOffsets[1]+?: ...
+  // SegmentOffsets[1]+?: Member[s]
+  // SegmentOffsets[2]-8: LF_INDEX
+  // SegmentOffsets[2]-6: 0
+  // SegmentOffsets[2]-4: <Type Index of Next Record> (Initially: 0xB0C0B0C0)
+  //
+  // ...
+  //
+  // SegmentOffsets[N]:   <Length>                    (Initially: uninitialized)
+  // SegmentOffsets[N]+2: LF_FIELDLIST
+  // SegmentOffsets[N]+4: Member[0]
+  // SegmentOffsets[N]+?: ...
+  // SegmentOffsets[N]+?: Member[t]
+  //
+  // And this is the way we have laid them out in the serialization buffer.  But
+  // we cannot actually commit them to the underlying stream this way, due to
+  // the topological sorting requirement of a type stream (specifically,
+  // TypeIndex references can only point backwards, not forwards).  So the
+  // sequence that we return to the caller contains the records in reverse
+  // order, which is the proper order for committing the serialized records.
+
+  std::vector<CVType> Types;
+  Types.reserve(SegmentOffsets.size());
+
+  auto SO = makeArrayRef(SegmentOffsets);
+
+  uint32_t End = SegmentWriter.getOffset();
+
+  Optional<TypeIndex> RefersTo;
+  for (uint32_t Offset : reverse(SO)) {
+    Types.push_back(createSegmentRecord(Offset, End, RefersTo));
+
+    End = Offset;
+    RefersTo = Index++;
+  }
+
+  Kind.reset();
+  return Types;
+}
+
+// Explicitly instantiate the member function for each known type so that we can
+// implement this in the cpp file.
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  template void llvm::codeview::ContinuationRecordBuilder::writeMemberType(    \
+      Name##Record &Record);
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
diff --git a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
new file mode 100644
index 00000000000..d28b7c3c2d8
--- /dev/null
+++ b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -0,0 +1,62 @@
+#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+static void writeRecordPrefix(BinaryStreamWriter &Writer, TypeLeafKind Kind) {
+  RecordPrefix Prefix;
+  Prefix.RecordKind = Kind;
+  Prefix.RecordLen = 0;
+  cantFail(Writer.writeObject(Prefix));
+}
+
+static void addPadding(BinaryStreamWriter &Writer) {
+  uint32_t Align = Writer.getOffset() % 4;
+  if (Align == 0)
+    return;
+
+  int PaddingBytes = 4 - Align;
+  while (PaddingBytes > 0) {
+    uint8_t Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
+    cantFail(Writer.writeInteger(Pad));
+    --PaddingBytes;
+  }
+}
+
+SimpleTypeSerializer::SimpleTypeSerializer() : ScratchBuffer(MaxRecordLength) {}
+
+SimpleTypeSerializer::~SimpleTypeSerializer() {}
+
+template <typename T>
+ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
+  BinaryStreamWriter Writer(ScratchBuffer, support::little);
+  TypeRecordMapping Mapping(Writer);
+
+  CVType CVT;
+  CVT.Type = static_cast<TypeLeafKind>(Record.getKind());
+
+  writeRecordPrefix(Writer, CVT.Type);
+
+  cantFail(Mapping.visitTypeBegin(CVT));
+  cantFail(Mapping.visitKnownRecord(CVT, Record));
+  cantFail(Mapping.visitTypeEnd(CVT));
+
+  addPadding(Writer);
+
+  RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(ScratchBuffer.data());
+
+  Prefix->RecordKind = CVT.kind();
+  Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t);
+
+  return {ScratchBuffer.data(), Writer.getOffset()};
+}
+
+// Explicitly instantiate the member function for each known type so that we can
+// implement this in the cpp file.
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  template ArrayRef<uint8_t> llvm::codeview::SimpleTypeSerializer::serialize(  \
+      Name##Record &Record);
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 114f6fd2897..9b8a6053da8 100644
--- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -426,7 +426,8 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           OneMethodRecord &Record) {
-  MapOneMethodRecord Mapper(false);
+  const bool IsFromOverloadList = (TypeKind == LF_METHODLIST);
+  MapOneMethodRecord Mapper(IsFromOverloadList);
   return Mapper(IO, Record);
 }
 
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
deleted file mode 100644
index 003c13b4a20..00000000000
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ /dev/null
@@ -1,389 +0,0 @@
-//===- TypeSerialzier.cpp -------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-namespace {
-
-struct HashedType {
-  uint64_t Hash;
-  const uint8_t *Data;
-  unsigned Size; // FIXME: Go to uint16_t?
-  TypeIndex Index;
-};
-
-/// Wrapper around a poitner to a HashedType. Hash and equality operations are
-/// based on data in the pointee.
-struct HashedTypePtr {
-  HashedTypePtr() = default;
-  HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
-
-  HashedType *Ptr = nullptr;
-};
-
-} // end anonymous namespace
-
-namespace llvm {
-
-template <> struct DenseMapInfo<HashedTypePtr> {
-  static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
-
-  static inline HashedTypePtr getTombstoneKey() {
-    return HashedTypePtr(reinterpret_cast<HashedType *>(1));
-  }
-
-  static unsigned getHashValue(HashedTypePtr Val) {
-    assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
-    return Val.Ptr->Hash;
-  }
-
-  static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
-    HashedType *LHS = LHSP.Ptr;
-    HashedType *RHS = RHSP.Ptr;
-    if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr)
-      return LHS == RHS;
-    if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size)
-      return false;
-    return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
-  }
-};
-
-} // end namespace llvm
-
-/// Private implementation so that we don't leak our DenseMap instantiations to
-/// users.
-class llvm::codeview::TypeHasher {
-private:
-  /// Storage for type record provided by the caller. Records will outlive the
-  /// hasher object, so they should be allocated here.
-  BumpPtrAllocator &RecordStorage;
-
-  /// Storage for hash keys. These only need to live as long as the hashing
-  /// operation.
-  BumpPtrAllocator KeyStorage;
-
-  /// Hash table. We really want a DenseMap<ArrayRef<uint8_t>, TypeIndex> here,
-  /// but DenseMap is inefficient when the keys are long (like type records)
-  /// because it recomputes the hash value of every key when it grows. This
-  /// value type stores the hash out of line in KeyStorage, so that table
-  /// entries are small and easy to rehash.
-  DenseSet<HashedTypePtr> HashedRecords;
-
-public:
-  TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {}
-
-  void reset() { HashedRecords.clear(); }
-
-  /// Takes the bytes of type record, inserts them into the hash table, saves
-  /// them, and returns a pointer to an identical stable type record along with
-  /// its type index in the destination stream.
-  TypeIndex getOrCreateRecord(ArrayRef<uint8_t> &Record, TypeIndex TI);
-};
-
-TypeIndex TypeHasher::getOrCreateRecord(ArrayRef<uint8_t> &Record,
-                                        TypeIndex TI) {
-  assert(Record.size() < UINT32_MAX && "Record too big");
-  assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
-
-  // Compute the hash up front so we can store it in the key.
-  HashedType TempHashedType = {hash_value(Record), Record.data(),
-                               unsigned(Record.size()), TI};
-  auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType));
-  HashedType *&Hashed = Result.first->Ptr;
-
-  if (Result.second) {
-    // This was a new type record. We need stable storage for both the key and
-    // the record. The record should outlive the hashing operation.
-    Hashed = KeyStorage.Allocate<HashedType>();
-    *Hashed = TempHashedType;
-
-    uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
-    memcpy(Stable, Record.data(), Record.size());
-    Hashed->Data = Stable;
-    assert(Hashed->Size == Record.size());
-  }
-
-  // Update the caller's copy of Record to point a stable copy.
-  Record = ArrayRef<uint8_t>(Hashed->Data, Hashed->Size);
-  return Hashed->Index;
-}
-
-TypeIndex TypeSerializer::nextTypeIndex() const {
-  return TypeIndex::fromArrayIndex(SeenRecords.size());
-}
-
-bool TypeSerializer::isInFieldList() const {
-  return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST;
-}
-
-MutableArrayRef<uint8_t> TypeSerializer::getCurrentSubRecordData() {
-  assert(isInFieldList());
-  return getCurrentRecordData().drop_front(CurrentSegment.length());
-}
-
-MutableArrayRef<uint8_t> TypeSerializer::getCurrentRecordData() {
-  return MutableArrayRef<uint8_t>(RecordBuffer).take_front(Writer.getOffset());
-}
-
-Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) {
-  RecordPrefix Prefix;
-  Prefix.RecordKind = Kind;
-  Prefix.RecordLen = 0;
-  if (auto EC = Writer.writeObject(Prefix))
-    return EC;
-  return Error::success();
-}
-
-Expected<MutableArrayRef<uint8_t>>
-TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
-  uint32_t Align = Record.size() % 4;
-  if (Align == 0)
-    return Record;
-
-  int PaddingBytes = 4 - Align;
-  int N = PaddingBytes;
-  while (PaddingBytes > 0) {
-    uint8_t Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
-    if (auto EC = Writer.writeInteger(Pad))
-      return std::move(EC);
-    --PaddingBytes;
-  }
-  return MutableArrayRef<uint8_t>(Record.data(), Record.size() + N);
-}
-
-TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash)
-    : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2),
-      Stream(RecordBuffer, support::little), Writer(Stream),
-      Mapping(Writer) {
-  // RecordBuffer needs to be able to hold enough data so that if we are 1
-  // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
-  // we won't overflow.
-  if (Hash)
-    Hasher = llvm::make_unique<TypeHasher>(Storage);
-}
-
-TypeSerializer::~TypeSerializer() = default;
-
-ArrayRef<ArrayRef<uint8_t>> TypeSerializer::records() const {
-  return SeenRecords;
-}
-
-void TypeSerializer::reset() {
-  if (Hasher)
-    Hasher->reset();
-  Writer.setOffset(0);
-  CurrentSegment = RecordSegment();
-  FieldListSegments.clear();
-  TypeKind.reset();
-  MemberKind.reset();
-  SeenRecords.clear();
-}
-
-TypeIndex TypeSerializer::insertRecordBytes(ArrayRef<uint8_t> &Record) {
-  assert(!TypeKind.hasValue() && "Already in a type mapping!");
-  assert(Writer.getOffset() == 0 && "Stream has data already!");
-
-  if (Hasher) {
-    TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex());
-    if (nextTypeIndex() == ActualTI)
-      SeenRecords.push_back(Record);
-    return ActualTI;
-  }
-
-  TypeIndex NewTI = nextTypeIndex();
-  uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
-  memcpy(Stable, Record.data(), Record.size());
-  Record = ArrayRef<uint8_t>(Stable, Record.size());
-  SeenRecords.push_back(Record);
-  return NewTI;
-}
-
-TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) {
-  assert(!TypeKind.hasValue() && "Already in a type mapping!");
-  assert(Writer.getOffset() == 0 && "Stream has data already!");
-
-  TypeIndex TI;
-  ArrayRef<uint8_t> OriginalData = Record.OriginalRecord.RecordData;
-  if (Record.Mappings.empty()) {
-    // This record did not remap any type indices.  Just write it.
-    return insertRecordBytes(OriginalData);
-  }
-
-  // At least one type index was remapped.  Before we can hash it we have to
-  // copy the full record bytes, re-write each type index, then hash the copy.
-  // We do this in temporary storage since only the DenseMap can decide whether
-  // this record already exists, and if it does we don't want the memory to
-  // stick around.
-  RemapStorage.resize(OriginalData.size());
-  ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size());
-  uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix);
-  for (const auto &M : Record.Mappings) {
-    // First 4 bytes of every record are the record prefix, but the mapping
-    // offset is relative to the content which starts after.
-    *(TypeIndex *)(ContentBegin + M.first) = M.second;
-  }
-  auto RemapRef = makeArrayRef(RemapStorage);
-  return insertRecordBytes(RemapRef);
-}
-
-Error TypeSerializer::visitTypeBegin(CVType &Record) {
-  assert(!TypeKind.hasValue() && "Already in a type mapping!");
-  assert(Writer.getOffset() == 0 && "Stream has data already!");
-
-  if (auto EC = writeRecordPrefix(Record.kind()))
-    return EC;
-
-  TypeKind = Record.kind();
-  if (auto EC = Mapping.visitTypeBegin(Record))
-    return EC;
-
-  return Error::success();
-}
-
-Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
-  assert(TypeKind.hasValue() && "Not in a type mapping!");
-  if (auto EC = Mapping.visitTypeEnd(Record))
-    return std::move(EC);
-
-  // Update the record's length and fill out the CVType members to point to
-  // the stable memory holding the record's data.
-  auto ThisRecordData = getCurrentRecordData();
-  auto ExpectedData = addPadding(ThisRecordData);
-  if (!ExpectedData)
-    return ExpectedData.takeError();
-  ThisRecordData = *ExpectedData;
-
-  RecordPrefix *Prefix =
-      reinterpret_cast<RecordPrefix *>(ThisRecordData.data());
-  Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t);
-
-  Record.Type = *TypeKind;
-  Record.RecordData = ThisRecordData;
-
-  // insertRecordBytes assumes we're not in a mapping, so do this first.
-  TypeKind.reset();
-  Writer.setOffset(0);
-
-  TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData);
-
-  // Write out each additional segment in reverse order, and update each
-  // record's continuation index to point to the previous one.
-  for (auto X : reverse(FieldListSegments)) {
-    auto CIBytes = X.take_back(sizeof(uint32_t));
-    support::ulittle32_t *CI =
-        reinterpret_cast<support::ulittle32_t *>(CIBytes.data());
-    assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder");
-    *CI = InsertedTypeIndex.getIndex();
-    InsertedTypeIndex = insertRecordBytes(X);
-  }
-
-  FieldListSegments.clear();
-  CurrentSegment.SubRecords.clear();
-
-  return InsertedTypeIndex;
-}
-
-Error TypeSerializer::visitTypeEnd(CVType &Record) {
-  auto ExpectedIndex = visitTypeEndGetIndex(Record);
-  if (!ExpectedIndex)
-    return ExpectedIndex.takeError();
-  return Error::success();
-}
-
-Error TypeSerializer::visitMemberBegin(CVMemberRecord &Record) {
-  assert(isInFieldList() && "Not in a field list!");
-  assert(!MemberKind.hasValue() && "Already in a member record!");
-  MemberKind = Record.Kind;
-
-  if (auto EC = Mapping.visitMemberBegin(Record))
-    return EC;
-
-  return Error::success();
-}
-
-Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
-  if (auto EC = Mapping.visitMemberEnd(Record))
-    return EC;
-
-  // Check if this subrecord makes the current segment not fit in 64K minus
-  // the space for a continuation record (8 bytes). If the segment does not
-  // fit, insert a continuation record.
-  if (Writer.getOffset() > MaxRecordLength - ContinuationLength) {
-    MutableArrayRef<uint8_t> Data = getCurrentRecordData();
-    SubRecord LastSubRecord = CurrentSegment.SubRecords.back();
-    uint32_t CopySize = CurrentSegment.length() - LastSubRecord.Size;
-    auto CopyData = Data.take_front(CopySize);
-    auto LeftOverData = Data.drop_front(CopySize);
-    assert(LastSubRecord.Size == LeftOverData.size());
-
-    // Allocate stable storage for the record and copy the old record plus
-    // continuation over.
-    uint16_t LengthWithSize = CopySize + ContinuationLength;
-    assert(LengthWithSize <= MaxRecordLength);
-    RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(CopyData.data());
-    Prefix->RecordLen = LengthWithSize - sizeof(uint16_t);
-
-    uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
-    auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
-    MutableBinaryByteStream CS(SavedSegment, support::little);
-    BinaryStreamWriter CW(CS);
-    if (auto EC = CW.writeBytes(CopyData))
-      return EC;
-    if (auto EC = CW.writeEnum(TypeLeafKind::LF_INDEX))
-      return EC;
-    if (auto EC = CW.writeInteger<uint16_t>(0))
-      return EC;
-    if (auto EC = CW.writeInteger<uint32_t>(0xB0C0B0C0))
-      return EC;
-    FieldListSegments.push_back(SavedSegment);
-
-    // Write a new placeholder record prefix to mark the start of this new
-    // top-level record.
-    Writer.setOffset(0);
-    if (auto EC = writeRecordPrefix(TypeLeafKind::LF_FIELDLIST))
-      return EC;
-
-    // Then move over the subrecord that overflowed the old segment to the
-    // beginning of this segment.  Note that we have to use memmove here
-    // instead of Writer.writeBytes(), because the new and old locations
-    // could overlap.
-    ::memmove(Stream.data().data() + sizeof(RecordPrefix), LeftOverData.data(),
-              LeftOverData.size());
-    // And point the segment writer at the end of that subrecord.
-    Writer.setOffset(LeftOverData.size() + sizeof(RecordPrefix));
-
-    CurrentSegment.SubRecords.clear();
-    CurrentSegment.SubRecords.push_back(LastSubRecord);
-  }
-
-  // Update the CVMemberRecord since we may have shifted around or gotten
-  // padded.
-  Record.Data = getCurrentSubRecordData();
-
-  MemberKind.reset();
-  return Error::success();
-}
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index bff3516203a..06f819df7ec 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -100,7 +100,7 @@ private:
                     bool RemapSuccess) {
     TypeIndex DestIdx = Untranslated;
     if (RemapSuccess)
-      DestIdx = Dest.writeSerializedRecord(Record);
+      DestIdx = Dest.insertRecord(Record);
     addMapping(DestIdx);
     return Error::success();
   }
diff --git a/lib/DebugInfo/CodeView/TypeTableBuilder.cpp b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
new file mode 100644
index 00000000000..bce636f3894
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
@@ -0,0 +1,206 @@
+//===- TypeSerialzier.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+namespace {
+
+struct HashedType {
+  uint64_t Hash;
+  const uint8_t *Data;
+  unsigned Size; // FIXME: Go to uint16_t?
+  TypeIndex Index;
+};
+
+/// Wrapper around a poitner to a HashedType. Hash and equality operations are
+/// based on data in the pointee.
+struct HashedTypePtr {
+  HashedTypePtr() = default;
+  HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {}
+
+  HashedType *Ptr = nullptr;
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<HashedTypePtr> {
+  static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); }
+
+  static inline HashedTypePtr getTombstoneKey() {
+    return HashedTypePtr(reinterpret_cast<HashedType *>(1));
+  }
+
+  static unsigned getHashValue(HashedTypePtr Val) {
+    assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr);
+    return Val.Ptr->Hash;
+  }
+
+  static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) {
+    HashedType *LHS = LHSP.Ptr;
+    HashedType *RHS = RHSP.Ptr;
+    if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr)
+      return LHS == RHS;
+    if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size)
+      return false;
+    return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0;
+  }
+};
+
+} // end namespace llvm
+
+/// Private implementation so that we don't leak our DenseMap instantiations to
+/// users.
+class llvm::codeview::TypeHasher {
+private:
+  /// Storage for type record provided by the caller. Records will outlive the
+  /// hasher object, so they should be allocated here.
+  BumpPtrAllocator &RecordStorage;
+
+  /// Storage for hash keys. These only need to live as long as the hashing
+  /// operation.
+  BumpPtrAllocator KeyStorage;
+
+  /// Hash table. We really want a DenseMap<ArrayRef<uint8_t>, TypeIndex> here,
+  /// but DenseMap is inefficient when the keys are long (like type records)
+  /// because it recomputes the hash value of every key when it grows. This
+  /// value type stores the hash out of line in KeyStorage, so that table
+  /// entries are small and easy to rehash.
+  DenseSet<HashedTypePtr> HashedRecords;
+
+public:
+  TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {}
+
+  void reset() { HashedRecords.clear(); }
+
+  /// Takes the bytes of type record, inserts them into the hash table, saves
+  /// them, and returns a pointer to an identical stable type record along with
+  /// its type index in the destination stream.
+  TypeIndex getOrCreateRecord(ArrayRef<uint8_t> &Record, TypeIndex TI);
+};
+
+TypeIndex TypeHasher::getOrCreateRecord(ArrayRef<uint8_t> &Record,
+                                        TypeIndex TI) {
+  assert(Record.size() < UINT32_MAX && "Record too big");
+  assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
+
+  // Compute the hash up front so we can store it in the key.
+  HashedType TempHashedType = {hash_value(Record), Record.data(),
+                               unsigned(Record.size()), TI};
+  auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType));
+  HashedType *&Hashed = Result.first->Ptr;
+
+  if (Result.second) {
+    // This was a new type record. We need stable storage for both the key and
+    // the record. The record should outlive the hashing operation.
+    Hashed = KeyStorage.Allocate<HashedType>();
+    *Hashed = TempHashedType;
+
+    uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
+    memcpy(Stable, Record.data(), Record.size());
+    Hashed->Data = Stable;
+    assert(Hashed->Size == Record.size());
+  }
+
+  // Update the caller's copy of Record to point a stable copy.
+  Record = ArrayRef<uint8_t>(Hashed->Data, Hashed->Size);
+  return Hashed->Index;
+}
+
+TypeIndex TypeTableBuilder::nextTypeIndex() const {
+  return TypeIndex::fromArrayIndex(SeenRecords.size());
+}
+
+TypeTableBuilder::TypeTableBuilder(BumpPtrAllocator &Storage, bool Hash)
+    : RecordStorage(Storage) {
+  if (Hash)
+    Hasher = llvm::make_unique<TypeHasher>(Storage);
+}
+
+TypeTableBuilder::~TypeTableBuilder() = default;
+
+ArrayRef<ArrayRef<uint8_t>> TypeTableBuilder::records() const {
+  return SeenRecords;
+}
+
+void TypeTableBuilder::reset() {
+  if (Hasher)
+    Hasher->reset();
+  SeenRecords.clear();
+}
+
+TypeIndex TypeTableBuilder::insertRecordBytes(ArrayRef<uint8_t> &Record) {
+  if (Hasher) {
+    TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex());
+    if (nextTypeIndex() == ActualTI)
+      SeenRecords.push_back(Record);
+    return ActualTI;
+  }
+
+  TypeIndex NewTI = nextTypeIndex();
+  uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size());
+  memcpy(Stable, Record.data(), Record.size());
+  Record = ArrayRef<uint8_t>(Stable, Record.size());
+  SeenRecords.push_back(Record);
+  return NewTI;
+}
+
+TypeIndex TypeTableBuilder::insertRecord(const RemappedType &Record) {
+  TypeIndex TI;
+  ArrayRef<uint8_t> OriginalData = Record.OriginalRecord.RecordData;
+  if (Record.Mappings.empty()) {
+    // This record did not remap any type indices.  Just write it.
+    return insertRecordBytes(OriginalData);
+  }
+
+  // At least one type index was remapped.  Before we can hash it we have to
+  // copy the full record bytes, re-write each type index, then hash the copy.
+  // We do this in temporary storage since only the DenseMap can decide whether
+  // this record already exists, and if it does we don't want the memory to
+  // stick around.
+  RemapStorage.resize(OriginalData.size());
+  ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size());
+  uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix);
+  for (const auto &M : Record.Mappings) {
+    // First 4 bytes of every record are the record prefix, but the mapping
+    // offset is relative to the content which starts after.
+    *(TypeIndex *)(ContentBegin + M.first) = M.second;
+  }
+  auto RemapRef = makeArrayRef(RemapStorage);
+  return insertRecordBytes(RemapRef);
+}
+
+TypeIndex TypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) {
+  TypeIndex TI;
+  auto Fragments = Builder.end(nextTypeIndex());
+  assert(!Fragments.empty());
+  for (auto C : Fragments)
+    TI = insertRecordBytes(C.RecordData);
+  return TI;
+}
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index 456d6f19b23..9262bebf56f 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -11,7 +11,6 @@
 
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/RecordName.h"
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
author	Zachary Turner <zturner@google.com>	2017-11-28 18:33:17 +0000
committer	Zachary Turner <zturner@google.com>	2017-11-28 18:33:17 +0000
commit	ab45c0673e7510b52995f560cce64b002601ed9f (patch)
tree	5070004d67b7856a64a77c523faeb3c426adbc1c /lib/DebugInfo
parent	a710db28b7f3ef5d7fe54930822b88ec8901c02f (diff)