summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Bitcode/BitstreamReader.h2
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h4
-rw-r--r--include/llvm/Bitcode/ReaderWriter.h3
-rw-r--r--include/llvm/IR/ModuleSummaryIndex.h43
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp54
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp74
-rw-r--r--lib/IR/ModuleSummaryIndex.cpp8
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp2
-rw-r--r--test/Bitcode/Inputs/module_hash.ll4
-rw-r--r--test/Bitcode/module_hash.ll35
-rw-r--r--tools/llvm-as/llvm-as.cpp5
-rw-r--r--tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp40
12 files changed, 236 insertions, 38 deletions
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 628a83cb0de..b331ceea051 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -446,6 +446,8 @@ public:
using SimpleBitstreamCursor::canSkipToPos;
using SimpleBitstreamCursor::AtEndOfStream;
using SimpleBitstreamCursor::GetCurrentBitNo;
+ using SimpleBitstreamCursor::getCurrentByteNo;
+ using SimpleBitstreamCursor::getPointerToByte;
using SimpleBitstreamCursor::getBitStreamReader;
using SimpleBitstreamCursor::JumpToBit;
using SimpleBitstreamCursor::fillCurWord;
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index a93ed58ebb9..0c4cc854cdc 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -107,6 +107,9 @@ enum ModuleCodes {
// SOURCE_FILENAME: [namechar x N]
MODULE_CODE_SOURCE_FILENAME = 16,
+
+ // HASH: [5*i32]
+ MODULE_CODE_HASH = 17,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
@@ -183,6 +186,7 @@ enum ValueSymtabCodes {
// The module path symbol table only has one code (MST_CODE_ENTRY).
enum ModulePathSymtabCodes {
MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
+ MST_CODE_HASH = 2, // MST_HASH: [5*i32]
};
// The summary section uses different codes in the per-module
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index bbce15a8f93..1afffa05527 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -107,7 +107,8 @@ namespace llvm {
/// for use in ThinLTO optimization).
void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder = false,
- bool EmitSummaryIndex = false);
+ bool EmitSummaryIndex = false,
+ bool GenerateHash = false);
/// Write the specified module summary index to the given raw output stream,
/// where it will be written in a new bitcode block. This is used when
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index 30a7145cb42..fe0385e76b9 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -25,6 +25,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <array>
+
namespace llvm {
/// \brief Class to accumulate and hold information about a callee.
@@ -228,6 +230,9 @@ public:
void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; }
};
+/// 160 bits SHA1
+typedef std::array<uint32_t, 5> ModuleHash;
+
/// List of global value info structures for a particular value held
/// in the GlobalValueMap. Requires a vector in the case of multiple
/// COMDAT values of the same name.
@@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::const_iterator const_globalvalueinfo_iterator;
typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator;
/// String table to hold/own module path strings, which additionally holds the
-/// module ID assigned to each module during the plugin step. The StringMap
-/// makes a copy of and owns inserted strings.
-typedef StringMap<uint64_t> ModulePathStringTableTy;
+/// module ID assigned to each module during the plugin step, as well as a hash
+/// of the module. The StringMap makes a copy of and owns inserted strings.
+typedef StringMap<std::pair<uint64_t, ModuleHash>> ModulePathStringTableTy;
/// Class to hold module path string table and global value map,
/// and encapsulate methods for operating on them.
@@ -304,17 +309,26 @@ public:
GlobalValueMap[ValueGUID].push_back(std::move(Info));
}
- /// Table of modules, containing an id.
- const StringMap<uint64_t> &modulePaths() const {
+ /// Table of modules, containing module hash and id.
+ const StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() const {
return ModulePathStringTable;
}
- /// Table of modules, containing an id.
- StringMap<uint64_t> &modulePaths() { return ModulePathStringTable; }
+ /// Table of modules, containing hash and id.
+ StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() {
+ return ModulePathStringTable;
+ }
/// Get the module ID recorded for the given module path.
uint64_t getModuleId(const StringRef ModPath) const {
- return ModulePathStringTable.lookup(ModPath);
+ return ModulePathStringTable.lookup(ModPath).first;
+ }
+
+ /// Get the module SHA1 hash recorded for the given module path.
+ const ModuleHash &getModuleHash(const StringRef ModPath) const {
+ auto It = ModulePathStringTable.find(ModPath);
+ assert(It != ModulePathStringTable.end() && "Module not registered");
+ return It->second.second;
}
/// Add the given per-module index into this module index/summary,
@@ -333,11 +347,14 @@ public:
return NewName.str();
}
- /// Add a new module path, mapped to the given module Id, and return StringRef
- /// owned by string table map.
- StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
- return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
- .first->first();
+ /// Add a new module path with the given \p Hash, mapped to the given \p
+ /// ModID, and return an iterator to the entry in the index.
+ ModulePathStringTableTy::iterator
+ addModulePath(StringRef ModPath, uint64_t ModId,
+ ModuleHash Hash = ModuleHash{{0}}) {
+ return ModulePathStringTable.insert(std::make_pair(
+ ModPath,
+ std::make_pair(ModId, Hash))).first;
}
/// Check if the given Module has any functions available for exporting
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 93496fe8e6f..1840b60cc01 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5632,11 +5632,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
}
continue;
- case BitstreamEntry::Record:
- // Once we find the last record of interest, skip the rest.
- if (VSTOffset > 0)
- Stream.skipRecord(Entry.ID);
- else {
+ case BitstreamEntry::Record: {
Record.clear();
auto BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
@@ -5650,6 +5646,25 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
SourceFileName = ValueName.c_str();
break;
}
+ /// MODULE_CODE_HASH: [5*i32]
+ case bitc::MODULE_CODE_HASH: {
+ if (Record.size() != 5)
+ return error("Invalid hash length " + Twine(Record.size()).str());
+ if (!TheIndex)
+ break;
+ if (TheIndex->modulePaths().empty())
+ // Does not have any summary emitted.
+ break;
+ if (TheIndex->modulePaths().size() != 1)
+ return error("Don't expect multiple modules defined?");
+ auto &Hash = TheIndex->modulePaths().begin()->second.second;
+ int Pos = 0;
+ for (auto &Val : Record) {
+ assert(!(Val >> 32) && "Unexpected high bits set");
+ Hash[Pos++] = Val;
+ }
+ break;
+ }
/// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET:
if (Record.size() < 1)
@@ -5761,7 +5776,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
// module path string table entry with an empty (0) ID to take
// ownership.
FS->setModulePath(
- TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+ TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
static int RefListStartIndex = 4;
int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
assert(Record.size() >= RefListStartIndex + NumRefs &&
@@ -5799,7 +5814,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
std::unique_ptr<GlobalVarSummary> FS =
llvm::make_unique<GlobalVarSummary>(getDecodedLinkage(RawLinkage));
FS->setModulePath(
- TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+ TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
for (unsigned I = 2, E = Record.size(); I != E; ++I) {
unsigned RefValueId = Record[I];
uint64_t RefGUID = getGUIDFromValueId(RefValueId);
@@ -5887,6 +5902,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
SmallVector<uint64_t, 64> Record;
SmallString<128> ModulePath;
+ ModulePathStringTableTy::iterator LastSeenModulePath;
while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -5907,14 +5923,32 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
break;
case bitc::MST_CODE_ENTRY: {
// MST_ENTRY: [modid, namechar x N]
+ uint64_t ModuleId = Record[0];
+
if (convertToString(Record, 1, ModulePath))
return error("Invalid record");
- uint64_t ModuleId = Record[0];
- StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
- ModuleIdMap[ModuleId] = ModulePathInMap;
+
+ LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
+ ModuleIdMap[ModuleId] = LastSeenModulePath->first();
+
ModulePath.clear();
break;
}
+ /// MST_CODE_HASH: [5*i32]
+ case bitc::MST_CODE_HASH: {
+ if (Record.size() != 5)
+ return error("Invalid hash length " + Twine(Record.size()).str());
+ if (LastSeenModulePath == TheIndex->modulePaths().end())
+ return error("Invalid hash that does not follow a module path");
+ int Pos = 0;
+ for (auto &Val : Record) {
+ assert(!(Val >> 32) && "Unexpected high bits set");
+ LastSeenModulePath->second.second[Pos++] = Val;
+ }
+ // Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
+ LastSeenModulePath = TheIndex->modulePaths().end();
+ break;
+ }
}
}
llvm_unreachable("Exit infinite loop");
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index c1513416817..18fb7ad8d11 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ValueEnumerator.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -39,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SHA1.h"
#include <cctype>
#include <map>
using namespace llvm;
@@ -2852,8 +2854,18 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
- SmallVector<unsigned, 64> NameVals;
- for (const StringMapEntry<uint64_t> &MPSE : I.modulePaths()) {
+ // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
+
+ SmallVector<unsigned, 64> Vals;
+ for (const auto &MPSE : I.modulePaths()) {
StringEncoding Bits =
getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
unsigned AbbrevToUse = Abbrev8Bit;
@@ -2862,14 +2874,29 @@ static void WriteModStrings(const ModuleSummaryIndex &I,
else if (Bits == SE_Fixed7)
AbbrevToUse = Abbrev7Bit;
- NameVals.push_back(MPSE.getValue());
+ Vals.push_back(MPSE.getValue().first);
for (const auto P : MPSE.getKey())
- NameVals.push_back((unsigned char)P);
+ Vals.push_back((unsigned char)P);
// Emit the finished record.
- Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
- NameVals.clear();
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+ Vals.clear();
+ // Emit an optional hash for the module now
+ auto &Hash = MPSE.getValue().second;
+ bool AllZero = true; // Detect if the hash is empty, and do not generate it
+ for (auto Val : Hash) {
+ if (Val)
+ AllZero = false;
+ Vals.push_back(Val);
+ }
+ if (!AllZero) {
+ // Emit the hash record.
+ Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+ }
+
+ Vals.clear();
}
Stream.ExitBlock();
}
@@ -3177,11 +3204,36 @@ static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+static void writeModuleHash(BitstreamWriter &Stream,
+ SmallVectorImpl<char> &Buffer,
+ size_t BlockStartPos) {
+ // Emit the module's hash.
+ // MODULE_CODE_HASH: [5*i32]
+ SHA1 Hasher;
+ Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Buffer[BlockStartPos],
+ Buffer.size() - BlockStartPos));
+ auto Hash = Hasher.result();
+ SmallVector<uint64_t, 20> Vals;
+ auto LShift = [&](unsigned char Val, unsigned Amount)
+ -> uint64_t { return ((uint64_t)Val) << Amount; };
+ for (int Pos = 0; Pos < 20; Pos += 4) {
+ uint32_t SubHash = LShift(Hash[Pos + 0], 24);
+ SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
+ (unsigned)(unsigned char)Hash[Pos + 3];
+ Vals.push_back(SubHash);
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
+}
+
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream,
bool ShouldPreserveUseListOrder,
- uint64_t BitcodeStartBit, bool EmitSummaryIndex) {
+ uint64_t BitcodeStartBit, bool EmitSummaryIndex,
+ bool GenerateHash, SmallVectorImpl<char> &Buffer) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+ size_t BlockStartPos = Buffer.size();
SmallVector<unsigned, 1> Vals;
unsigned CurVersion = 1;
@@ -3238,6 +3290,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
+ if (GenerateHash) {
+ writeModuleHash(Stream, Buffer, BlockStartPos);
+ }
+
Stream.ExitBlock();
}
@@ -3322,7 +3378,7 @@ static void WriteBitcodeHeader(BitstreamWriter &Stream) {
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder,
- bool EmitSummaryIndex) {
+ bool EmitSummaryIndex, bool GenerateHash) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256*1024);
@@ -3348,7 +3404,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Emit the module.
WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
- EmitSummaryIndex);
+ EmitSummaryIndex, GenerateHash, Buffer);
}
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp
index 16b58ddbeff..c23e072b48a 100644
--- a/lib/IR/ModuleSummaryIndex.cpp
+++ b/lib/IR/ModuleSummaryIndex.cpp
@@ -37,9 +37,11 @@ void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
// Add the module path string ref for this module if we haven't already
// saved a reference to it.
- if (ModPath.empty())
- ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId);
- else
+ if (ModPath.empty()) {
+ auto Path = Info->summary()->modulePath();
+ ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
+ ->first();
+ } else
assert(ModPath == Info->summary()->modulePath() &&
"Each module in the combined map should have a unique ID");
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 255a5582745..b5f1ffb046f 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -61,7 +61,7 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
/* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
- return nullptr;
+ report_fatal_error("Abort");
}
return Result;
diff --git a/test/Bitcode/Inputs/module_hash.ll b/test/Bitcode/Inputs/module_hash.ll
new file mode 100644
index 00000000000..1d422c6e817
--- /dev/null
+++ b/test/Bitcode/Inputs/module_hash.ll
@@ -0,0 +1,4 @@
+; Needs a function for the combined index to be populated
+define void @bar() {
+ ret void
+}
diff --git a/test/Bitcode/module_hash.ll b/test/Bitcode/module_hash.ll
new file mode 100644
index 00000000000..fda6030a903
--- /dev/null
+++ b/test/Bitcode/module_hash.ll
@@ -0,0 +1,35 @@
+; Check per module hash.
+; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
+; MOD1: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
+; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
+; MOD2: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
+
+; Check that the hash matches in the combined index.
+
+; First regenerate the modules with a summary
+; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc
+; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc
+
+; Recover the hashes from the modules themselves.
+; RUN: llvm-bcanalyzer -dump %t.m1.bc | grep '<HASH' > %t.hash
+; RUN: llvm-bcanalyzer -dump %t.m2.bc | grep '<HASH' >> %t.hash
+
+; Generate the combined index and gather the hashes there.
+; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '<HASH ' >> %t.hash
+
+; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
+; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED
+
+; First capture the value of the hash for the two modules.
+; COMBINED: <HASH op0=[[HASH1_1:[0-9]*]] op1=[[HASH1_2:[0-9]*]] op2=[[HASH1_3:[0-9]*]] op3=[[HASH1_4:[0-9]*]] op4=[[HASH1_5:[0-9]*]] (match)/>
+; COMBINED: <HASH op0=[[HASH2_1:[0-9]*]] op1=[[HASH2_2:[0-9]*]] op2=[[HASH2_3:[0-9]*]] op3=[[HASH2_4:[0-9]*]] op4=[[HASH2_5:[0-9]*]] (match)/>
+
+; Validate against the value extracted from the combined index
+; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH1_1]] op1=[[HASH1_2]] op2=[[HASH1_3]] op3=[[HASH1_4]] op4=[[HASH1_5]]/>
+; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH2_1]] op1=[[HASH2_2]] op2=[[HASH2_3]] op3=[[HASH2_4]] op4=[[HASH2_5]]/>
+
+
+; Need a function for the combined index to be populated.
+define void @foo() {
+ ret void
+}
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 7318bfe341d..7e9500a6672 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -48,6 +48,9 @@ static cl::opt<bool> EmitSummaryIndex("module-summary",
cl::desc("Emit module summary index"),
cl::init(false));
+static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
+ cl::init(false));
+
static cl::opt<bool>
DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
@@ -82,7 +85,7 @@ static void WriteOutputFile(const Module *M) {
if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
- EmitSummaryIndex);
+ EmitSummaryIndex, EmitModuleHash);
// Declare success.
Out->keep();
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index d1cc1a02778..3c23103d70b 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -29,6 +29,7 @@
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Verifier.h"
@@ -38,8 +39,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <array>
#include <cctype>
#include <map>
#include <system_error>
@@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
+ STRINGIFY_CODE(MODULE_CODE, HASH)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {
@@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
default:
return nullptr;
STRINGIFY_CODE(MST_CODE, ENTRY)
+ STRINGIFY_CODE(MST_CODE, HASH)
}
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
switch (CodeID) {
@@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
if (Stream.EnterSubBlock(BlockID, &NumWords))
return Error("Malformed block record");
+ // Keep it for later, when we see a MODULE_HASH record
+ uint64_t BlockEntryPos = Stream.getCurrentByteNo();
+
const char *BlockName = nullptr;
if (DumpRecords) {
outs() << Indent << "<";
@@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
++BlockStats.NumRecords;
StringRef Blob;
+ unsigned CurrentRecordPos = Stream.getCurrentByteNo();
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
// Increment the # occurrences of this code.
@@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];
+ // If we found a module hash, let's verify that it matches!
+ if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
+ if (Record.size() != 5)
+ outs() << " (invalid)";
+ else {
+ // Recompute the hash and compare it to the one in the bitcode
+ SHA1 Hasher;
+ StringRef Hash;
+ {
+ int BlockSize = CurrentRecordPos - BlockEntryPos;
+ auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
+ Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
+ Hash = Hasher.result();
+ }
+ SmallString<20> RecordedHash;
+ RecordedHash.resize(20);
+ int Pos = 0;
+ for (auto &Val : Record) {
+ assert(!(Val >> 32) && "Unexpected high bits set");
+ RecordedHash[Pos++] = (Val >> 24) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 16) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 8) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 0) & 0xFF;
+ }
+ if (Hash == RecordedHash)
+ outs() << " (match)";
+ else
+ outs() << " (!mismatch!)";
+ }
+ }
+
outs() << "/>";
if (Abbv) {