diff options
author | Jake Ehrlich <jakehehrlich@google.com> | 2018-07-16 19:48:52 +0000 |
---|---|---|
committer | Jake Ehrlich <jakehehrlich@google.com> | 2018-07-16 19:48:52 +0000 |
commit | 51e4fb6e38bdb07ccbe7c6a33ff92319772c5449 (patch) | |
tree | eb828a2e0701ff2ad0c4d55b41dc29cc24e04843 /tools | |
parent | 7d88286b7cff3fd557a90cc031eb8bf4233ef7db (diff) |
[llvm-objcopy] Add support for large indexes
This patch is an update of an older patch that never landed
(see here: https://reviews.llvm.org/D42516)
Recently various users have run into this issue and it just 100%
has to be solved at this point. The main difference in this patch
is that I use gunzip instead of unzip which should hopefully allow
tests to pass. Please review this as if it is a new patch however.
I found some issues along the way and made some minor modifications.
The binary used in this patch for testing (a zip file to make it small)
can be found here:
https://drive.google.com/file/d/1UjsnTO9edLttZibbr-2T1bJl92KEQFAO/view?usp=sharing
Differential Revision: https://reviews.llvm.org/D49206
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337204 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r-- | tools/llvm-objcopy/Object.cpp | 188 | ||||
-rw-r--r-- | tools/llvm-objcopy/Object.h | 48 |
2 files changed, 215 insertions, 21 deletions
diff --git a/tools/llvm-objcopy/Object.cpp b/tools/llvm-objcopy/Object.cpp index f803ffe1d81..a214f0d42f6 100644 --- a/tools/llvm-objcopy/Object.cpp +++ b/tools/llvm-objcopy/Object.cpp @@ -101,6 +101,10 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) { SectionVisitor::~SectionVisitor() {} +void BinarySectionWriter::visit(const SectionIndexSection &Sec) { + error("Cannot write symbol section index table '" + Sec.Name + "' "); +} + void BinarySectionWriter::visit(const SymbolTableSection &Sec) { error("Cannot write symbol table '" + Sec.Name + "' out to binary"); } @@ -154,6 +158,29 @@ void StringTableSection::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); } +template <class ELFT> +void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) { + uint8_t *Buf = Out.getBufferStart() + Sec.Offset; + auto *IndexesBuffer = reinterpret_cast<typename ELFT::Word *>(Buf); + std::copy(std::begin(Sec.Indexes), std::end(Sec.Indexes), IndexesBuffer); +} + +void SectionIndexSection::initialize(SectionTableRef SecTable) { + Size = 0; + setSymTab(SecTable.getSectionOfType<SymbolTableSection>( + Link, + "Link field value " + Twine(Link) + " in section " + Name + " is invalid", + "Link field value " + Twine(Link) + " in section " + Name + + " is not a symbol table")); + Symbols->setShndxTable(this); +} + +void SectionIndexSection::finalize() { Link = Symbols->Index; } + +void SectionIndexSection::accept(SectionVisitor &Visitor) const { + Visitor.visit(*this); +} + static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { switch (Index) { case SHN_ABS: @@ -172,8 +199,13 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { return false; } +// Large indexes force us to clarify exactly what this function should do. This +// function should return the value that will appear in st_shndx when written +// out. uint16_t Symbol::getShndx() const { if (DefinedIn != nullptr) { + if (DefinedIn->Index >= SHN_LORESERVE) + return SHN_XINDEX; return DefinedIn->Index; } switch (ShndxType) { @@ -187,6 +219,7 @@ uint16_t Symbol::getShndx() const { case SYMBOL_HEXAGON_SCOMMON_2: case SYMBOL_HEXAGON_SCOMMON_4: case SYMBOL_HEXAGON_SCOMMON_8: + case SYMBOL_XINDEX: return static_cast<uint16_t>(ShndxType); } llvm_unreachable("Symbol with invalid ShndxType encountered"); @@ -207,6 +240,8 @@ void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, Sym.Binding = Bind; Sym.Type = Type; Sym.DefinedIn = DefinedIn; + if (DefinedIn != nullptr) + DefinedIn->HasSymbol = true; if (DefinedIn == nullptr) { if (Shndx >= SHN_LORESERVE) Sym.ShndxType = static_cast<SymbolShndxType>(Shndx); @@ -222,6 +257,8 @@ void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, } void SymbolTableSection::removeSectionReferences(const SectionBase *Sec) { + if (SectionIndexTable == Sec) + SectionIndexTable = nullptr; if (SymbolNames == Sec) { error("String table " + SymbolNames->Name + " cannot be removed because it is referenced by the symbol table " + @@ -274,7 +311,17 @@ void SymbolTableSection::finalize() { Info = MaxLocalIndex + 1; } -void SymbolTableSection::addSymbolNames() { +void SymbolTableSection::prepareForLayout() { + // Add all potential section indexes before file layout so that the section + // index section has the approprite size. + if (SectionIndexTable != nullptr) { + for (const auto &Sym : Symbols) { + if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE) + SectionIndexTable->addIndex(Sym->DefinedIn->Index); + else + SectionIndexTable->addIndex(SHN_UNDEF); + } + } // Add all of our strings to SymbolNames so that SymbolNames has the right // size before layout is decided. for (auto &Sym : Symbols) @@ -654,12 +701,32 @@ template <class ELFT> void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) { const Elf_Shdr &Shdr = *unwrapOrError(ElfFile.getSection(SymTab->Index)); StringRef StrTabData = unwrapOrError(ElfFile.getStringTableForSymtab(Shdr)); + ArrayRef<Elf_Word> ShndxData; - for (const auto &Sym : unwrapOrError(ElfFile.symbols(&Shdr))) { + auto Symbols = unwrapOrError(ElfFile.symbols(&Shdr)); + for (const auto &Sym : Symbols) { SectionBase *DefSection = nullptr; StringRef Name = unwrapOrError(Sym.getName(StrTabData)); - if (Sym.st_shndx >= SHN_LORESERVE) { + if (Sym.st_shndx == SHN_XINDEX) { + if (SymTab->getShndxTable() == nullptr) + error("Symbol '" + Name + + "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists."); + if (ShndxData.data() == nullptr) { + const Elf_Shdr &ShndxSec = + *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index)); + ShndxData = unwrapOrError( + ElfFile.template getSectionContentsAsArray<Elf_Word>(&ShndxSec)); + if (ShndxData.size() != Symbols.size()) + error("Symbol section index table does not have the same number of " + "entries as the symbol table."); + } + Elf_Word Index = ShndxData[&Sym - Symbols.begin()]; + DefSection = Obj.sections().getSection( + Index, + "Symbol '" + Name + "' has invalid section index " + + Twine(Index)); + } else if (Sym.st_shndx >= SHN_LORESERVE) { if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) { error( "Symbol '" + Name + @@ -669,7 +736,7 @@ void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) { } else if (Sym.st_shndx != SHN_UNDEF) { DefSection = Obj.sections().getSection( Sym.st_shndx, "Symbol '" + Name + - "' is defined in invalid section with index " + + "' is defined has invalid section index " + Twine(Sym.st_shndx)); } @@ -699,14 +766,14 @@ void initRelocations(RelocationSection *Relocs, SymbolTableSection *SymbolTable, } } -SectionBase *SectionTableRef::getSection(uint16_t Index, Twine ErrMsg) { +SectionBase *SectionTableRef::getSection(uint32_t Index, Twine ErrMsg) { if (Index == SHN_UNDEF || Index > Sections.size()) error(ErrMsg); return Sections[Index - 1].get(); } template <class T> -T *SectionTableRef::getSectionOfType(uint16_t Index, Twine IndexErrMsg, +T *SectionTableRef::getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg) { if (T *Sec = dyn_cast<T>(getSection(Index, IndexErrMsg))) return Sec; @@ -753,6 +820,11 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) { Obj.SymbolTable = &SymTab; return SymTab; } + case SHT_SYMTAB_SHNDX: { + auto &ShndxSection = Obj.addSection<SectionIndexSection>(); + Obj.SectionIndexTable = &ShndxSection; + return ShndxSection; + } case SHT_NOBITS: return Obj.addSection<Section>(Data); default: @@ -783,6 +855,12 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() { Sec.Index = Index++; } + // If a section index table exists we'll need to initialize it before we + // initialize the symbol table because the symbol table might need to + // reference it. + if (Obj.SectionIndexTable) + Obj.SectionIndexTable->initialize(Obj.sections()); + // Now that all of the sections have been added we can fill out some extra // details about symbol tables. We need the symbol table filled out before // any relocations. @@ -825,9 +903,13 @@ template <class ELFT> void ELFBuilder<ELFT>::build() { readSectionHeaders(); readProgramHeaders(); + uint32_t ShstrIndex = Ehdr.e_shstrndx; + if (ShstrIndex == SHN_XINDEX) + ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link; + Obj.SectionNames = Obj.sections().template getSectionOfType<StringTableSection>( - Ehdr.e_shstrndx, + ShstrIndex, "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) + " in elf header " + " is invalid", "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) + @@ -893,8 +975,27 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() { Ehdr.e_shentsize = sizeof(Elf_Shdr); if (WriteSectionHeaders) { Ehdr.e_shoff = Obj.SHOffset; - Ehdr.e_shnum = size(Obj.sections()) + 1; - Ehdr.e_shstrndx = Obj.SectionNames->Index; + // """ + // If the number of sections is greater than or equal to + // SHN_LORESERVE (0xff00), this member has the value zero and the actual + // number of section header table entries is contained in the sh_size field + // of the section header at index 0. + // """ + auto Shnum = size(Obj.sections()) + 1; + if (Shnum >= SHN_LORESERVE) + Ehdr.e_shnum = 0; + else + Ehdr.e_shnum = Shnum; + // """ + // If the section name string table section index is greater than or equal + // to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX (0xffff) + // and the actual index of the section name string table section is + // contained in the sh_link field of the section header at index 0. + // """ + if (Obj.SectionNames->Index >= SHN_LORESERVE) + Ehdr.e_shstrndx = SHN_XINDEX; + else + Ehdr.e_shstrndx = Obj.SectionNames->Index; } else { Ehdr.e_shoff = 0; Ehdr.e_shnum = 0; @@ -917,8 +1018,17 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() { Shdr.sh_flags = 0; Shdr.sh_addr = 0; Shdr.sh_offset = 0; - Shdr.sh_size = 0; - Shdr.sh_link = 0; + // See writeEhdr for why we do this. + uint64_t Shnum = size(Obj.sections()) + 1; + if (Shnum >= SHN_LORESERVE) + Shdr.sh_size = Shnum; + else + Shdr.sh_size = 0; + // See writeEhdr for why we do this. + if (Obj.SectionNames != nullptr && Obj.SectionNames->Index >= SHN_LORESERVE) + Shdr.sh_link = Obj.SectionNames->Index; + else + Shdr.sh_link = 0; Shdr.sh_info = 0; Shdr.sh_addralign = 0; Shdr.sh_entsize = 0; @@ -946,9 +1056,10 @@ void Object::removeSections(std::function<bool(const SectionBase &)> ToRemove) { }); if (SymbolTable != nullptr && ToRemove(*SymbolTable)) SymbolTable = nullptr; - if (SectionNames != nullptr && ToRemove(*SectionNames)) { + if (SectionNames != nullptr && ToRemove(*SectionNames)) SectionNames = nullptr; - } + if (SectionIndexTable != nullptr && ToRemove(*SectionIndexTable)) + SectionIndexTable = nullptr; // Now make sure there are no remaining references to the sections that will // be removed. Sometimes it is impossible to remove a reference so we emit // an error here instead. @@ -1109,16 +1220,59 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() { error("Cannot write section header table because section header string " "table was removed."); - // Make sure we add the names of all the sections. + Obj.sortSections(); + + // We need to assign indexes before we perform layout because we need to know + // if we need large indexes or not. We can assign indexes first and check as + // we go to see if we will actully need large indexes. + bool NeedsLargeIndexes = false; + if (size(Obj.sections()) >= SHN_LORESERVE) { + auto Sections = Obj.sections(); + NeedsLargeIndexes = + std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(), + [](const SectionBase &Sec) { return Sec.HasSymbol; }); + // TODO: handle case where only one section needs the large index table but + // only needs it because the large index table hasn't been removed yet. + } + + if (NeedsLargeIndexes) { + // This means we definitely need to have a section index table but if we + // already have one then we should use it instead of making a new one. + if (Obj.SymbolTable != nullptr && Obj.SectionIndexTable == nullptr) { + // Addition of a section to the end does not invalidate the indexes of + // other sections and assigns the correct index to the new section. + auto &Shndx = Obj.addSection<SectionIndexSection>(); + Obj.SymbolTable->setShndxTable(&Shndx); + Shndx.setSymTab(Obj.SymbolTable); + } + } else { + // Since we don't need SectionIndexTable we should remove it and all + // references to it. + if (Obj.SectionIndexTable != nullptr) { + Obj.removeSections([this](const SectionBase &Sec) { + return &Sec == Obj.SectionIndexTable; + }); + } + } + + // Make sure we add the names of all the sections. Importantly this must be + // done after we decide to add or remove SectionIndexes. if (Obj.SectionNames != nullptr) for (const auto &Section : Obj.sections()) { Obj.SectionNames->addString(Section.Name); } - // Make sure we add the names of all the symbols. + + // Before we can prepare for layout the indexes need to be finalized. + uint64_t Index = 0; + for (auto &Sec : Obj.sections()) + Sec.Index = Index++; + + // The symbol table does not update all other sections on update. For + // instance, symbol names are not added as new symbols are added. This means + // that some sections, like .strtab, don't yet have their final size. if (Obj.SymbolTable != nullptr) - Obj.SymbolTable->addSymbolNames(); + Obj.SymbolTable->prepareForLayout(); - Obj.sortSections(); assignOffsets(); // Finalize SectionNames first so that we can assign name indexes. diff --git a/tools/llvm-objcopy/Object.h b/tools/llvm-objcopy/Object.h index 2e20b5b299f..b8f45a431e1 100644 --- a/tools/llvm-objcopy/Object.h +++ b/tools/llvm-objcopy/Object.h @@ -37,6 +37,7 @@ class RelocationSection; class DynamicRelocationSection; class GnuDebugLinkSection; class GroupSection; +class SectionIndexSection; class Segment; class Object; struct Symbol; @@ -54,10 +55,10 @@ public: iterator begin() { return iterator(Sections.data()); } iterator end() { return iterator(Sections.data() + Sections.size()); } - SectionBase *getSection(uint16_t Index, Twine ErrMsg); + SectionBase *getSection(uint32_t Index, Twine ErrMsg); template <class T> - T *getSectionOfType(uint16_t Index, Twine IndexErrMsg, Twine TypeErrMsg); + T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg); }; enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; @@ -74,6 +75,7 @@ public: virtual void visit(const DynamicRelocationSection &Sec) = 0; virtual void visit(const GnuDebugLinkSection &Sec) = 0; virtual void visit(const GroupSection &Sec) = 0; + virtual void visit(const SectionIndexSection &Sec) = 0; }; class SectionWriter : public SectionVisitor { @@ -91,6 +93,7 @@ public: virtual void visit(const RelocationSection &Sec) override = 0; virtual void visit(const GnuDebugLinkSection &Sec) override = 0; virtual void visit(const GroupSection &Sec) override = 0; + virtual void visit(const SectionIndexSection &Sec) override = 0; explicit SectionWriter(Buffer &Buf) : Out(Buf) {} }; @@ -107,6 +110,7 @@ public: void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; + void visit(const SectionIndexSection &Sec) override; explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -123,6 +127,7 @@ public: void visit(const RelocationSection &Sec) override; void visit(const GnuDebugLinkSection &Sec) override; void visit(const GroupSection &Sec) override; + void visit(const SectionIndexSection &Sec) override; explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {} }; @@ -230,8 +235,9 @@ public: StringRef Name; Segment *ParentSegment = nullptr; uint64_t HeaderOffset; - uint64_t OriginalOffset; + uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max(); uint32_t Index; + bool HasSymbol = false; uint64_t Addr = 0; uint64_t Align = 1; @@ -371,6 +377,7 @@ enum SymbolShndxType { SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, + SYMBOL_XINDEX = ELF::SHN_XINDEX, }; struct Symbol { @@ -389,6 +396,32 @@ struct Symbol { uint16_t getShndx() const; }; +class SectionIndexSection : public SectionBase { + MAKE_SEC_WRITER_FRIEND + +private: + std::vector<uint32_t> Indexes; + SymbolTableSection *Symbols = nullptr; + +public: + virtual ~SectionIndexSection() {} + void addIndex(uint32_t Index) { + Indexes.push_back(Index); + Size += 4; + } + void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } + void initialize(SectionTableRef SecTable) override; + void finalize() override; + void accept(SectionVisitor &Visitor) const override; + + SectionIndexSection() { + Name = ".symtab_shndx"; + Align = 4; + EntrySize = 4; + Type = ELF::SHT_SYMTAB_SHNDX; + } +}; + class SymbolTableSection : public SectionBase { MAKE_SEC_WRITER_FRIEND @@ -398,6 +431,7 @@ class SymbolTableSection : public SectionBase { protected: std::vector<std::unique_ptr<Symbol>> Symbols; StringTableSection *SymbolNames = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; using SymPtr = std::unique_ptr<Symbol>; @@ -405,9 +439,13 @@ public: void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility, uint16_t Shndx, uint64_t Sz); - void addSymbolNames(); + void prepareForLayout(); // An 'empty' symbol table still contains a null symbol. bool empty() const { return Symbols.size() == 1; } + void setShndxTable(SectionIndexSection *ShndxTable) { + SectionIndexTable = ShndxTable; + } + const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } const SectionBase *getStrTab() const { return SymbolNames; } const Symbol *getSymbolByIndex(uint32_t Index) const; Symbol *getSymbolByIndex(uint32_t Index); @@ -589,6 +627,7 @@ private: using Elf_Addr = typename ELFT::Addr; using Elf_Shdr = typename ELFT::Shdr; using Elf_Ehdr = typename ELFT::Ehdr; + using Elf_Word = typename ELFT::Word; const ELFFile<ELFT> &ElfFile; Object &Obj; @@ -652,6 +691,7 @@ public: StringTableSection *SectionNames = nullptr; SymbolTableSection *SymbolTable = nullptr; + SectionIndexSection *SectionIndexTable = nullptr; void sortSections(); SectionTableRef sections() { return SectionTableRef(Sections); } |