summaryrefslogtreecommitdiff
path: root/tools/llvm-mca
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-07-15 11:01:38 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-07-15 11:01:38 +0000
commiteaabc71261e3ac822bcbaacf6255373c668b3805 (patch)
tree97e0163625affdc9a23c55ef419bfe7e42bb7f7c /tools/llvm-mca
parentf23126d10eeb5ffd5f5cfddd61f84d72261fb2bc (diff)
[llvm-mca][BtVer2] teach how to identify false dependencies on partially written
registers. The goal of this patch is to improve the throughput analysis in llvm-mca for the case where instructions perform partial register writes. On x86, partial register writes are quite difficult to model, mainly because different processors tend to implement different register merging schemes in hardware. When the code contains partial register writes, the IPC (instructions per cycles) estimated by llvm-mca tends to diverge quite significantly from the observed IPC (using perf). Modern AMD processors (at least, from Bulldozer onwards) don't rename partial registers. Quoting Agner Fog's microarchitecture.pdf: " The processor always keeps the different parts of an integer register together. For example, AL and AH are not treated as independent by the out-of-order execution mechanism. An instruction that writes to part of a register will therefore have a false dependence on any previous write to the same register or any part of it." This patch is a first important step towards improving the analysis of partial register updates. It changes the semantic of RegisterFile descriptors in tablegen, and teaches llvm-mca how to identify false dependences in the presence of partial register writes (for more details: see the new code comments in include/Target/TargetSchedule.h - class RegisterFile). This patch doesn't address the case where a write to a part of a register is followed by a read from the whole register. On Intel chips, high8 registers (AH/BH/CH/DH)) can be stored in separate physical registers. However, a later (dirty) read of the full register (example: AX/EAX) triggers a merge uOp, which adds extra latency (and potentially affects the pipe usage). This is a very interesting article on the subject with a very informative answer from Peter Cordes: https://stackoverflow.com/questions/45660139/how-exactly-do-partial-registers-on-haswell-skylake-perform-writing-al-seems-to In future, the definition of RegisterFile can be extended with extra information that may be used to identify delays caused by merge opcodes triggered by a dirty read of a partial write. Differential Revision: https://reviews.llvm.org/D49196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337123 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mca')
-rw-r--r--tools/llvm-mca/Instruction.cpp25
-rw-r--r--tools/llvm-mca/Instruction.h12
-rw-r--r--tools/llvm-mca/RegisterFile.cpp135
-rw-r--r--tools/llvm-mca/RegisterFile.h35
4 files changed, 149 insertions, 58 deletions
diff --git a/tools/llvm-mca/Instruction.cpp b/tools/llvm-mca/Instruction.cpp
index 53f0d67bf77..0c847670557 100644
--- a/tools/llvm-mca/Instruction.cpp
+++ b/tools/llvm-mca/Instruction.cpp
@@ -132,7 +132,22 @@ void Instruction::execute() {
void Instruction::update() {
assert(isDispatched() && "Unexpected instruction stage found!");
- if (llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+
+ if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+ return;
+
+ // A partial register write cannot complete before a dependent write.
+ auto IsDefReady = [&](const UniqueDef &Def) {
+ if (const WriteState *Write = Def->getDependentWrite()) {
+ int WriteLatency = Write->getCyclesLeft();
+ if (WriteLatency == UNKNOWN_CYCLES)
+ return false;
+ return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
+ }
+ return true;
+ };
+
+ if (llvm::all_of(Defs, IsDefReady))
Stage = IS_READY;
}
@@ -141,14 +156,10 @@ void Instruction::cycleEvent() {
return;
if (isDispatched()) {
- bool IsReady = true;
- for (UniqueUse &Use : Uses) {
+ for (UniqueUse &Use : Uses)
Use->cycleEvent();
- IsReady &= Use->isReady();
- }
- if (IsReady)
- Stage = IS_READY;
+ update();
return;
}
diff --git a/tools/llvm-mca/Instruction.h b/tools/llvm-mca/Instruction.h
index 3588fb0ba60..ddf5c3a5e33 100644
--- a/tools/llvm-mca/Instruction.h
+++ b/tools/llvm-mca/Instruction.h
@@ -101,6 +101,12 @@ class WriteState {
// super-registers.
bool ClearsSuperRegs;
+ // This field is set if this is a partial register write, and it has a false
+ // dependency on any previous write of the same register (or a portion of it).
+ // DependentWrite must be able to complete before this write completes, so
+ // that we don't break the WAW, and the two writes can be merged together.
+ const WriteState *DependentWrite;
+
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
// is "unknown". As soon as CyclesLeft is 'known', each user in the set
@@ -113,7 +119,7 @@ public:
WriteState(const WriteDescriptor &Desc, unsigned RegID,
bool clearsSuperRegs = false)
: WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
- ClearsSuperRegs(clearsSuperRegs) {}
+ ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr) {}
WriteState(const WriteState &Other) = delete;
WriteState &operator=(const WriteState &Other) = delete;
@@ -126,6 +132,9 @@ public:
unsigned getNumUsers() const { return Users.size(); }
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+ const WriteState *getDependentWrite() const { return DependentWrite; }
+ void setDependentWrite(const WriteState *Write) { DependentWrite = Write; }
+
// On every cycle, update CyclesLeft and notify dependent users.
void cycleEvent();
void onInstructionIssued();
@@ -315,6 +324,7 @@ public:
const VecUses &getUses() const { return Uses; }
const InstrDesc &getDesc() const { return Desc; }
unsigned getRCUTokenID() const { return RCUTokenID; }
+ int getCyclesLeft() const { return CyclesLeft; }
unsigned getNumUsers() const {
unsigned NumUsers = 0;
diff --git a/tools/llvm-mca/RegisterFile.cpp b/tools/llvm-mca/RegisterFile.cpp
index 63fe0d2d72e..44de105b899 100644
--- a/tools/llvm-mca/RegisterFile.cpp
+++ b/tools/llvm-mca/RegisterFile.cpp
@@ -26,7 +26,8 @@ namespace mca {
RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
const llvm::MCRegisterInfo &mri, unsigned NumRegs)
- : MRI(mri), RegisterMappings(mri.getNumRegs(), {WriteRef(), {0, 0}}) {
+ : MRI(mri), RegisterMappings(mri.getNumRegs(),
+ {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
initialize(SM, NumRegs);
}
@@ -71,34 +72,46 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
// Special case where there is no register class identifier in the set.
// An empty set of register classes means: this register file contains all
// the physical registers specified by the target.
- if (Entries.empty()) {
- for (std::pair<WriteRef, IndexPlusCostPairTy> &Mapping : RegisterMappings)
- Mapping.second = std::make_pair(RegisterFileIndex, 1U);
+ // We optimistically assume that a register can be renamed at the cost of a
+ // single physical register. The constructor of RegisterFile ensures that
+ // a RegisterMapping exists for each logical register defined by the Target.
+ if (Entries.empty())
return;
- }
// Now update the cost of individual registers.
for (const MCRegisterCostEntry &RCE : Entries) {
const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
for (const MCPhysReg Reg : RC) {
- IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second;
- if (Entry.first) {
+ RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+ IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+ if (IPC.first && IPC.first != RegisterFileIndex) {
// The only register file that is allowed to overlap is the default
// register file at index #0. The analysis is inaccurate if register
// files overlap.
errs() << "warning: register " << MRI.getName(Reg)
<< " defined in multiple register files.";
}
- Entry.first = RegisterFileIndex;
- Entry.second = RCE.Cost;
+ IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+ Entry.RenameAs = Reg;
+
+ // Assume the same cost for each sub-register.
+ for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+ RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+ if (!OtherEntry.IndexPlusCost.first &&
+ (!OtherEntry.RenameAs ||
+ MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+ OtherEntry.IndexPlusCost = IPC;
+ OtherEntry.RenameAs = Reg;
+ }
+ }
}
}
}
-void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
MutableArrayRef<unsigned> UsedPhysRegs) {
- unsigned RegisterFileIndex = Entry.first;
- unsigned Cost = Entry.second;
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
RMT.NumUsedPhysRegs += Cost;
@@ -110,10 +123,10 @@ void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
UsedPhysRegs[0] += Cost;
}
-void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
MutableArrayRef<unsigned> FreedPhysRegs) {
- unsigned RegisterFileIndex = Entry.first;
- unsigned Cost = Entry.second;
+ unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+ unsigned Cost = Entry.IndexPlusCost.second;
if (RegisterFileIndex) {
RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
RMT.NumUsedPhysRegs -= Cost;
@@ -128,12 +141,48 @@ void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
void RegisterFile::addRegisterWrite(WriteRef Write,
MutableArrayRef<unsigned> UsedPhysRegs,
bool ShouldAllocatePhysRegs) {
- const WriteState &WS = *Write.getWriteState();
+ WriteState &WS = *Write.getWriteState();
unsigned RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");
- RegisterMapping &Mapping = RegisterMappings[RegID];
- Mapping.first = Write;
+ LLVM_DEBUG({
+ dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+ << ", " << MRI.getName(RegID) << "]\n";
+ });
+
+ // If RenameAs is equal to RegID, then RegID is subject to register renaming
+ // and false dependencies on RegID are all eliminated.
+
+ // If RenameAs references the invalid register, then we optimistically assume
+ // that it can be renamed. In the absence of tablegen descriptors for register
+ // files, RenameAs is always set to the invalid register ID. In all other
+ // cases, RenameAs must be either equal to RegID, or it must reference a
+ // super-register of RegID.
+
+ // If RenameAs is a super-register of RegID, then a write to RegID has always
+ // a false dependency on RenameAs. The only exception is for when the write
+ // implicitly clears the upper portion of the underlying register.
+ // If a write clears its super-registers, then it is renamed as `RenameAs`.
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ if (RRI.RenameAs && RRI.RenameAs != RegID) {
+ RegID = RRI.RenameAs;
+ const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+ if (!WS.clearsSuperRegisters()) {
+ // The processor keeps the definition of `RegID` together with register
+ // `RenameAs`. Since this partial write is not renamed, no physical
+ // register is allocated.
+ ShouldAllocatePhysRegs = false;
+
+ if (OtherWrite.getSourceIndex() != Write.getSourceIndex()) {
+ // This partial write has a false dependency on RenameAs.
+ WS.setDependentWrite(OtherWrite.getWriteState());
+ }
+ }
+ }
+
+ // Update the mapping for register RegID including its sub-registers.
+ RegisterMappings[RegID].first = Write;
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
RegisterMappings[*I].first = Write;
@@ -141,9 +190,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// hardware. For example, zero-latency data-dependency breaking instructions
// don't consume physical registers.
if (ShouldAllocatePhysRegs)
- allocatePhysRegs(Mapping.second, UsedPhysRegs);
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
- // If this is a partial update, then we are done.
if (!WS.clearsSuperRegisters())
return;
@@ -155,42 +203,50 @@ void RegisterFile::removeRegisterWrite(const WriteState &WS,
MutableArrayRef<unsigned> FreedPhysRegs,
bool ShouldFreePhysRegs) {
unsigned RegID = WS.getRegisterID();
- bool ShouldInvalidateSuperRegs = WS.clearsSuperRegisters();
assert(RegID != 0 && "Invalidating an already invalid register?");
- assert(WS.getCyclesLeft() != -512 &&
+ assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
"Invalidating a write of unknown cycles!");
assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
- RegisterMapping &Mapping = RegisterMappings[RegID];
- WriteRef &WR = Mapping.first;
- if (!WR.isValid())
- return;
+
+ unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+ if (RenameAs && RenameAs != RegID) {
+ RegID = RenameAs;
+
+ if (!WS.clearsSuperRegisters()) {
+ // Keep the definition of `RegID` together with register `RenameAs`.
+ ShouldFreePhysRegs = false;
+ }
+ }
if (ShouldFreePhysRegs)
- freePhysRegs(Mapping.second, FreedPhysRegs);
+ freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+ WriteRef &WR = RegisterMappings[RegID].first;
if (WR.getWriteState() == &WS)
WR.invalidate();
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WR = RegisterMappings[*I].first;
- if (WR.getWriteState() == &WS)
- WR.invalidate();
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
}
- if (!ShouldInvalidateSuperRegs)
+ if (!WS.clearsSuperRegisters())
return;
for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WR = RegisterMappings[*I].first;
- if (WR.getWriteState() == &WS)
- WR.invalidate();
+ WriteRef &OtherWR = RegisterMappings[*I].first;
+ if (OtherWR.getWriteState() == &WS)
+ OtherWR.invalidate();
}
}
void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
unsigned RegID) const {
assert(RegID && RegID < RegisterMappings.size());
+ LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+ << MRI.getName(RegID) << '\n');
const WriteRef &WR = RegisterMappings[RegID].first;
if (WR.isValid())
Writes.push_back(WR);
@@ -225,7 +281,8 @@ unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
// Find how many new mappings must be created for each register file.
for (const unsigned RegID : Regs) {
- const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second;
+ const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+ const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
if (Entry.first)
NumPhysRegs[Entry.first] += Entry.second;
NumPhysRegs[0] += Entry.second;
@@ -266,10 +323,10 @@ void RegisterFile::dump() const {
const RegisterMapping &RM = RegisterMappings[I];
if (!RM.first.getWriteState())
continue;
- const std::pair<unsigned, unsigned> &IndexPlusCost = RM.second;
- dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << IndexPlusCost.first
- << ", Cost=" << IndexPlusCost.second
- << ", ";
+ const RegisterRenamingInfo &RRI = RM.second;
+ dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
+ << ", Cost=" << RRI.IndexPlusCost.second
+ << ", RenameAs=" << RRI.RenameAs << ", ";
RM.first.dump();
dbgs() << '\n';
}
diff --git a/tools/llvm-mca/RegisterFile.h b/tools/llvm-mca/RegisterFile.h
index 8817b8ba60e..349e9789b6e 100644
--- a/tools/llvm-mca/RegisterFile.h
+++ b/tools/llvm-mca/RegisterFile.h
@@ -61,24 +61,35 @@ class RegisterFile : public HardwareUnit {
// regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
- // This pair is used to identify the owner of a register, as well as
- // the "register cost". Register cost is defined as the number of physical
- // registers required to allocate a user register.
+ // This type is used to propagate information about the owner of a register,
+ // and the cost of allocating it in the PRF. Register cost is defined as the
+ // number of physical registers consumed by the PRF to allocate a user
+ // register.
+ //
// For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
// registers. So, the cost of allocating a YMM register in BtVer2 is 2.
using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
+ // Struct RegisterRenamingInfo maps registers to register files.
+ // There is a RegisterRenamingInfo object for every register defined by
+ // the target. RegisteRenamingInfo objects are stored into vector
+ // RegisterMappings, and register IDs can be used to reference them.
+ struct RegisterRenamingInfo {
+ IndexPlusCostPairTy IndexPlusCost;
+ llvm::MCPhysReg RenameAs;
+ };
+
// RegisterMapping objects are mainly used to track physical register
// definitions. There is a RegisterMapping for every register defined by the
// Target. For each register, a RegisterMapping pair contains a descriptor of
// the last register write (in the form of a WriteRef object), as well as a
- // IndexPlusCostPairTy to quickly identify owning register files.
+ // RegisterRenamingInfo to quickly identify owning register files.
//
// This implementation does not allow overlapping register files. The only
// register file that is allowed to overlap with other register files is
// register file #0. If we exclude register #0, every register is "owned" by
// at most one register file.
- using RegisterMapping = std::pair<WriteRef, IndexPlusCostPairTy>;
+ using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
// This map contains one entry for each register defined by the target.
std::vector<RegisterMapping> RegisterMappings;
@@ -103,13 +114,12 @@ class RegisterFile : public HardwareUnit {
// Consumes physical registers in each register file specified by the
// `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
- void allocatePhysRegs(IndexPlusCostPairTy IPC,
+ void allocatePhysRegs(const RegisterRenamingInfo &Entry,
llvm::MutableArrayRef<unsigned> UsedPhysRegs);
- // Releases previously allocated physical registers from the register file(s)
- // referenced by the IndexPlusCostPairTy object. This method is called from
- // `invalidateRegisterMapping()`.
- void freePhysRegs(IndexPlusCostPairTy IPC,
+ // Releases previously allocated physical registers from the register file(s).
+ // This method is called from `invalidateRegisterMapping()`.
+ void freePhysRegs(const RegisterRenamingInfo &Entry,
llvm::MutableArrayRef<unsigned> FreedPhysRegs);
// Create an instance of RegisterMappingTracker for every register file
@@ -140,8 +150,11 @@ public:
// Returns a "response mask" where each bit represents the response from a
// different register file. A mask of all zeroes means that all register
// files are available. Otherwise, the mask can be used to identify which
- // register file was busy. This sematic allows us classify dispatch dispatch
+ // register file was busy. This sematic allows us to classify dispatch
// stalls caused by the lack of register file resources.
+ //
+ // Current implementation can simulate up to 32 register files (including the
+ // special register file at index #0).
unsigned isAvailable(llvm::ArrayRef<unsigned> Regs) const;
void collectWrites(llvm::SmallVectorImpl<WriteRef> &Writes,
unsigned RegID) const;