summaryrefslogtreecommitdiff
path: root/lib/Bitcode
diff options
context:
space:
mode:
authorHaojie Wang <haojie0429@gmail.com>2017-07-21 17:25:20 +0000
committerHaojie Wang <haojie0429@gmail.com>2017-07-21 17:25:20 +0000
commite7ac9f3115aa972dff61dfacac847c55d768dd6f (patch)
tree1872b4c437e11531245420dc8ddab09db3037ce3 /lib/Bitcode
parentabccd7d6bca2260f90ad5a9d8c1470377969d9ba (diff)
ThinLTO Minimized Bitcode File Size Reduction
Summary: Currently the ThinLTO minimized bitcode file only strip the debug info, but there is still a lot of information in the minimized bit code file that will be not used for thin linker. In this patch, most of the extra information is striped to reduce the minimized bitcode file. Now only ModuleVersion, ModuleInfo, ModuleGlobalValueSummary, ModuleHash, Symtab and Strtab are left. Now the minimized bitcode file size is reduced to 15%-30% of the debug info stripped bitcode file size. Reviewers: danielcdh, tejohnson, pcc Reviewed By: pcc Subscribers: mehdi_amini, aprantl, inglorion, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D35334 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308760 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Bitcode')
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp301
1 files changed, 236 insertions, 65 deletions
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index dcffde1742c..330aab9ad23 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -97,11 +97,10 @@ void BitcodeWriterBase::writeModuleVersion() {
Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<uint64_t>{2});
}
-/// Class to manage the bitcode writing for a module.
-class ModuleBitcodeWriter : public BitcodeWriterBase {
- /// Pointer to the buffer allocated by caller for bitcode writing.
- const SmallVectorImpl<char> &Buffer;
-
+/// Base class to manage the module bitcode writing, currently subclassed for
+/// ModuleBitcodeWriter and ThinLinkBitcodeWriter.
+class ModuleBitcodeWriterBase : public BitcodeWriterBase {
+protected:
/// The Module to write to bitcode.
const Module &M;
@@ -111,22 +110,6 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Optional per-module index to write for ThinLTO.
const ModuleSummaryIndex *Index;
- /// True if a module hash record should be written.
- bool GenerateHash;
-
- SHA1 Hasher;
-
- /// If non-null, when GenerateHash is true, the resulting hash is written
- /// into ModHash. When GenerateHash is false, that specified value
- /// is used as the hash instead of computing from the generated bitcode.
- /// Can be used to produce the same module hash for a minimized bitcode
- /// used just for the thin link as in the regular full bitcode that will
- /// be used in the backend.
- ModuleHash *ModHash;
-
- /// The start bit of the identification block.
- uint64_t BitcodeStartBit;
-
/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
/// class to use in the VST and summary block records.
@@ -140,17 +123,14 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
uint64_t VSTOffsetPlaceholder = 0;
public:
- /// Constructs a ModuleBitcodeWriter object for the given Module,
+ /// Constructs a ModuleBitcodeWriterBase object for the given Module,
/// writing to the provided \p Buffer.
- ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
- StringTableBuilder &StrtabBuilder,
- BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
- const ModuleSummaryIndex *Index, bool GenerateHash,
- ModuleHash *ModHash = nullptr)
- : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M),
- VE(*M, ShouldPreserveUseListOrder), Index(Index),
- GenerateHash(GenerateHash), ModHash(ModHash),
- BitcodeStartBit(Stream.GetCurrentBitNo()) {
+ ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder,
+ BitstreamWriter &Stream,
+ bool ShouldPreserveUseListOrder,
+ const ModuleSummaryIndex *Index)
+ : BitcodeWriterBase(Stream, StrtabBuilder), M(*M),
+ VE(*M, ShouldPreserveUseListOrder), Index(Index) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
@@ -172,6 +152,70 @@ public:
assignValueId(CallEdge.first.getGUID());
}
+protected:
+ void writePerModuleGlobalValueSummary();
+
+private:
+ void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
+ GlobalValueSummary *Summary,
+ unsigned ValueID,
+ unsigned FSCallsAbbrev,
+ unsigned FSCallsProfileAbbrev,
+ const Function &F);
+ void writeModuleLevelReferences(const GlobalVariable &V,
+ SmallVector<uint64_t, 64> &NameVals,
+ unsigned FSModRefsAbbrev);
+
+ void assignValueId(GlobalValue::GUID ValGUID) {
+ GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
+ }
+ unsigned getValueId(GlobalValue::GUID ValGUID) {
+ const auto &VMI = GUIDToValueIdMap.find(ValGUID);
+ // Expect that any GUID value had a value Id assigned by an
+ // earlier call to assignValueId.
+ assert(VMI != GUIDToValueIdMap.end() &&
+ "GUID does not have assigned value Id");
+ return VMI->second;
+ }
+ // Helper to get the valueId for the type of value recorded in VI.
+ unsigned getValueId(ValueInfo VI) {
+ if (!VI.getValue())
+ return getValueId(VI.getGUID());
+ return VE.getValueID(VI.getValue());
+ }
+ std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
+};
+
+/// Class to manage the bitcode writing for a module.
+class ModuleBitcodeWriter : public ModuleBitcodeWriterBase {
+ /// Pointer to the buffer allocated by caller for bitcode writing.
+ const SmallVectorImpl<char> &Buffer;
+
+ /// True if a module hash record should be written.
+ bool GenerateHash;
+
+ /// If non-null, when GenerateHash is true, the resulting hash is written
+ /// into ModHash.
+ ModuleHash *ModHash;
+
+ SHA1 Hasher;
+
+ /// The start bit of the identification block.
+ uint64_t BitcodeStartBit;
+
+public:
+ /// Constructs a ModuleBitcodeWriter object for the given Module,
+ /// writing to the provided \p Buffer.
+ ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
+ StringTableBuilder &StrtabBuilder,
+ BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
+ const ModuleSummaryIndex *Index, bool GenerateHash,
+ ModuleHash *ModHash = nullptr)
+ : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
+ ShouldPreserveUseListOrder, Index),
+ Buffer(Buffer), GenerateHash(GenerateHash), ModHash(ModHash),
+ BitcodeStartBit(Stream.GetCurrentBitNo()) {}
+
/// Emit the current module to the bitstream.
void write();
@@ -287,37 +331,8 @@ private:
writeFunction(const Function &F,
DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
void writeBlockInfo();
- void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
- GlobalValueSummary *Summary,
- unsigned ValueID,
- unsigned FSCallsAbbrev,
- unsigned FSCallsProfileAbbrev,
- const Function &F);
- void writeModuleLevelReferences(const GlobalVariable &V,
- SmallVector<uint64_t, 64> &NameVals,
- unsigned FSModRefsAbbrev);
- void writePerModuleGlobalValueSummary();
void writeModuleHash(size_t BlockStartPos);
- void assignValueId(GlobalValue::GUID ValGUID) {
- GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
- }
- unsigned getValueId(GlobalValue::GUID ValGUID) {
- const auto &VMI = GUIDToValueIdMap.find(ValGUID);
- // Expect that any GUID value had a value Id assigned by an
- // earlier call to assignValueId.
- assert(VMI != GUIDToValueIdMap.end() &&
- "GUID does not have assigned value Id");
- return VMI->second;
- }
- // Helper to get the valueId for the type of value recorded in VI.
- unsigned getValueId(ValueInfo VI) {
- if (!VI.getValue())
- return getValueId(VI.getGUID());
- return VE.getValueID(VI.getValue());
- }
- std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
-
unsigned getEncodedSyncScopeID(SyncScope::ID SSID) {
return unsigned(SSID);
}
@@ -3267,7 +3282,7 @@ static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
}
// Helper to emit a single function summary record.
-void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
+void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
const Function &F) {
@@ -3301,7 +3316,7 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
// Collect the global value references in the given variable's initializer,
// and emit them in a summary record.
-void ModuleBitcodeWriter::writeModuleLevelReferences(
+void ModuleBitcodeWriterBase::writeModuleLevelReferences(
const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev) {
auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName()));
@@ -3335,7 +3350,7 @@ static const uint64_t INDEX_VERSION = 3;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
-void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
+void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
// By default we compile with ThinLTO if the module has a summary, but the
// client can request full LTO with a module flag.
bool IsThinLTO = true;
@@ -3689,8 +3704,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
if (ModHash)
// Save the written hash value.
std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
- } else if (ModHash)
- Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));
+ }
}
void ModuleBitcodeWriter::write() {
@@ -3985,3 +3999,160 @@ void llvm::WriteIndexToFile(
Out.write((char *)&Buffer.front(), Buffer.size());
}
+
+/// Class to manage the bitcode writing for a thin link bitcode file.
+class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase {
+ /// ModHash is for use in ThinLTO incremental build, generated while writing
+ /// the module bitcode file.
+ const ModuleHash *ModHash;
+
+public:
+ ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder,
+ BitstreamWriter &Stream,
+ const ModuleSummaryIndex &Index,
+ const ModuleHash &ModHash)
+ : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream,
+ /*ShouldPreserveUseListOrder=*/false, &Index),
+ ModHash(&ModHash) {}
+
+ void write();
+
+private:
+ void writeSimplifiedModuleInfo();
+};
+
+// This function writes a simpilified module info for thin link bitcode file.
+// It only contains the source file name along with the name(the offset and
+// size in strtab) and linkage for global values. For the global value info
+// entry, in order to keep linkage at offset 5, there are three zeros used
+// as padding.
+void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() {
+ SmallVector<unsigned, 64> Vals;
+ // Emit the module's source file name.
+ {
+ StringEncoding Bits = getStringEncoding(M.getSourceFileName());
+ BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+ if (Bits == SE_Char6)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+ else if (Bits == SE_Fixed7)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+ // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(AbbrevOpToUse);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ for (const auto P : M.getSourceFileName())
+ Vals.push_back((unsigned char)P);
+
+ Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
+ Vals.clear();
+ }
+
+ // Emit the global variable information.
+ for (const GlobalVariable &GV : M.globals()) {
+ // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage]
+ Vals.push_back(StrtabBuilder.add(GV.getName()));
+ Vals.push_back(GV.getName().size());
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(getEncodedLinkage(GV));
+
+ Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals);
+ Vals.clear();
+ }
+
+ // Emit the function proto information.
+ for (const Function &F : M) {
+ // FUNCTION: [strtab offset, strtab size, 0, 0, 0, linkage]
+ Vals.push_back(StrtabBuilder.add(F.getName()));
+ Vals.push_back(F.getName().size());
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(getEncodedLinkage(F));
+
+ Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals);
+ Vals.clear();
+ }
+
+ // Emit the alias information.
+ for (const GlobalAlias &A : M.aliases()) {
+ // ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage]
+ Vals.push_back(StrtabBuilder.add(A.getName()));
+ Vals.push_back(A.getName().size());
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(getEncodedLinkage(A));
+
+ Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals);
+ Vals.clear();
+ }
+
+ // Emit the ifunc information.
+ for (const GlobalIFunc &I : M.ifuncs()) {
+ // IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage]
+ Vals.push_back(StrtabBuilder.add(I.getName()));
+ Vals.push_back(I.getName().size());
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(0);
+ Vals.push_back(getEncodedLinkage(I));
+
+ Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
+ Vals.clear();
+ }
+}
+
+void ThinLinkBitcodeWriter::write() {
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ writeModuleVersion();
+
+ writeSimplifiedModuleInfo();
+
+ writePerModuleGlobalValueSummary();
+
+ // Write module hash.
+ Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));
+
+ Stream.ExitBlock();
+}
+
+void BitcodeWriter::writeThinLinkBitcode(const Module *M,
+ const ModuleSummaryIndex &Index,
+ const ModuleHash &ModHash) {
+ assert(!WroteStrtab);
+
+ // The Mods vector is used by irsymtab::build, which requires non-const
+ // Modules in case it needs to materialize metadata. But the bitcode writer
+ // requires that the module is materialized, so we can cast to non-const here,
+ // after checking that it is in fact materialized.
+ assert(M->isMaterialized());
+ Mods.push_back(const_cast<Module *>(M));
+
+ ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index,
+ ModHash);
+ ThinLinkWriter.write();
+}
+
+// Write the specified thin link bitcode file to the given raw output stream,
+// where it will be written in a new bitcode block. This is used when
+// writing the per-module index file for ThinLTO.
+void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out,
+ const ModuleSummaryIndex &Index,
+ const ModuleHash &ModHash) {
+ SmallVector<char, 0> Buffer;
+ Buffer.reserve(256 * 1024);
+
+ BitcodeWriter Writer(Buffer);
+ Writer.writeThinLinkBitcode(M, Index, ModHash);
+ Writer.writeSymtab();
+ Writer.writeStrtab();
+
+ Out.write((char *)&Buffer.front(), Buffer.size());
+}