diff options
author | Zachary Turner <zturner@google.com> | 2018-07-30 03:12:34 +0000 |
---|---|---|
committer | Zachary Turner <zturner@google.com> | 2018-07-30 03:12:34 +0000 |
commit | c84f7e82fc41f89375496a21d35a802f2cb188a0 (patch) | |
tree | 387ca1accde4c7d6371d660ffc04f8295f6034cb | |
parent | 28b397746c3995bde01158865af6749de25abd0b (diff) |
[MS Demangler] Demangle symbols in function scopes.
There are a couple of issues you run into when you start getting into
more complex names, especially with regards to function local statics.
When you've got something like:
int x() {
static int n = 0;
return n;
}
Then this needs to demangle to something like
int `int __cdecl x()'::`1'::n
The nested mangled symbols (e.g. `int __cdecl x()` in the above
example) also share state with regards to back-referencing, so
we need to be able to re-use the demangler in the middle of
demangling a symbol while sharing back-ref state.
To make matters more complicated, there are a lot of ambiguities
when demangling a symbol's qualified name, because a function local
scope pattern (usually something like `?1??name?`) looks suspiciously
like many other possible things that can occur, such as `?1` meaning
the second back-ref and disambiguating these cases is rather
interesting. The `?1?` in a local scope pattern is actually a special
case of the more general pattern of `? + <encoded number> + ?`, where
"encoded number" can itself have embedded `@` symbols, which is a
common delimeter in mangled names. So we have to take care during the
disambiguation, which is the reason for the overly complicated
`isLocalScopePattern` function in this patch.
I've added some pretty obnoxious tests to exercise all of this, which
exposed several other problems related to back-referencing, so those
are fixed here as well. Finally, I've uncommented some tests that were
previously marked as `FIXME`, since now these work.
Differential Revision: https://reviews.llvm.org/D49965
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338226 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Demangle/MicrosoftDemangle.cpp | 144 | ||||
-rw-r--r-- | lib/Demangle/StringView.h | 24 | ||||
-rw-r--r-- | test/Demangle/ms-mangle.test | 14 | ||||
-rw-r--r-- | test/Demangle/ms-nested-scopes.test | 146 |
4 files changed, 309 insertions, 19 deletions
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp index 551bcdcb620..82b1a4fb8dc 100644 --- a/lib/Demangle/MicrosoftDemangle.cpp +++ b/lib/Demangle/MicrosoftDemangle.cpp @@ -33,11 +33,21 @@ class ArenaAllocator { struct AllocatorNode { uint8_t *Buf = nullptr; size_t Used = 0; + size_t Capacity = 0; AllocatorNode *Next = nullptr; }; + void addNode(size_t Capacity) { + AllocatorNode *NewHead = new AllocatorNode; + NewHead->Buf = new uint8_t[Capacity]; + NewHead->Next = Head; + NewHead->Capacity = Capacity; + Head = NewHead; + NewHead->Used = 0; + } + public: - ArenaAllocator() : Head(new AllocatorNode) { Head->Buf = new uint8_t[Unit]; } + ArenaAllocator() { addNode(Unit); } ~ArenaAllocator() { while (Head) { @@ -49,10 +59,25 @@ public: } } + char *allocUnalignedBuffer(size_t Length) { + uint8_t *Buf = Head->Buf + Head->Used; + + Head->Used += Length; + if (Head->Used > Head->Capacity) { + // It's possible we need a buffer which is larger than our default unit + // size, so we need to be careful to add a node with capacity that is at + // least as large as what we need. + addNode(std::max(Unit, Length)); + Head->Used = Length; + Buf = Head->Buf; + } + + return reinterpret_cast<char *>(Buf); + } + template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) { size_t Size = sizeof(T); - assert(Size < Unit); assert(Head && Head->Buf); size_t P = (size_t)Head->Buf + Head->Used; @@ -62,15 +87,12 @@ public: size_t Adjustment = AlignedP - P; Head->Used += Size + Adjustment; - if (Head->Used < Unit) + if (Head->Used < Head->Capacity) return new (PP) T(std::forward<Args>(ConstructorArgs)...); - AllocatorNode *NewHead = new AllocatorNode; - NewHead->Buf = new uint8_t[ArenaAllocator::Unit]; - NewHead->Next = Head; - Head = NewHead; - NewHead->Used = Size; - return new (NewHead->Buf) T(std::forward<Args>(ConstructorArgs)...); + addNode(ArenaAllocator::Unit); + Head->Used = Size; + return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...); } private: @@ -386,6 +408,47 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) { } } +static bool startsWithLocalScopePattern(StringView S) { + if (!S.consumeFront('?')) + return false; + if (S.size() < 2) + return false; + + size_t End = S.find('?'); + if (End == StringView::npos) + return false; + StringView Candidate = S.substr(0, End); + if (Candidate.empty()) + return false; + + // \?[0-9]\? + // ?@? is the discriminator 0. + if (Candidate.size() == 1) + return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); + + // If it's not 0-9, then it's an encoded number terminated with an @ + if (Candidate.back() != '@') + return false; + Candidate = Candidate.dropBack(); + + // An encoded number starts with B-P and all subsequent digits are in A-P. + // Note that the reason the first digit cannot be A is two fold. First, it + // would create an ambiguity with ?A which delimits the beginning of an + // anonymous namespace. Second, A represents 0, and you don't start a multi + // digit number with a leading 0. Presumably the anonymous namespace + // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. + if (Candidate[0] < 'B' || Candidate[0] > 'P') + return false; + Candidate = Candidate.dropFront(); + while (!Candidate.empty()) { + if (Candidate[0] < 'A' || Candidate[0] > 'P') + return false; + Candidate = Candidate.dropFront(); + } + + return true; +} + // Write a function or template parameter list. static void outputParameterList(OutputStream &OS, const ParamList &Params) { if (!Params.Current) { @@ -763,6 +826,10 @@ private: int demangleNumber(StringView &MangledName); void memorizeString(StringView s); + + /// Allocate a copy of \p Borrowed into memory that we own. + StringView copyString(StringView Borrowed); + Name *demangleFullyQualifiedTypeName(StringView &MangledName); Name *demangleFullyQualifiedSymbolName(StringView &MangledName); @@ -777,6 +844,7 @@ private: Name *demangleOperatorName(StringView &MangledName); Name *demangleSimpleName(StringView &MangledName, bool Memorize); Name *demangleAnonymousNamespaceName(StringView &MangledName); + Name *demangleLocallyScopedNamePiece(StringView &MangledName); void demangleOperator(StringView &MangledName, Name *); FuncClass demangleFunctionClass(StringView &MangledName); @@ -813,6 +881,13 @@ private: }; } // namespace +StringView Demangler::copyString(StringView Borrowed) { + char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1); + std::strcpy(Stable, Borrowed.begin()); + + return {Stable, Borrowed.size()}; +} + // Parser entry point. Symbol *Demangler::parse(StringView &MangledName) { Symbol *S = Arena.alloc<Symbol>(); @@ -956,6 +1031,18 @@ Name *Demangler::demangleClassTemplateName(StringView &MangledName) { Name *Node = demangleSimpleName(MangledName, false); Node->TemplateParams = demangleTemplateParameterList(MangledName); + + // Render this class template name into a string buffer so that we can + // memorize it for the purpose of back-referencing. + OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); + outputName(OS, Node); + OS << '\0'; + char *Name = OS.getBuffer(); + + StringView Owned = copyString(Name); + memorizeString(Owned); + std::free(Name); + return Node; } @@ -1103,6 +1190,34 @@ Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { return nullptr; } +Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { + assert(startsWithLocalScopePattern(MangledName)); + + Name *Node = Arena.alloc<Name>(); + MangledName.consumeFront('?'); + int ScopeIdentifier = demangleNumber(MangledName); + + // One ? to terminate the number + MangledName.consumeFront('?'); + + assert(!Error); + Symbol *Scope = parse(MangledName); + if (Error) + return nullptr; + + // Render the parent symbol's name into a buffer. + OutputStream OS = OutputStream::create(nullptr, nullptr, 1024); + OS << '`'; + output(Scope, OS); + OS << '\''; + OS << "::`" << ScopeIdentifier << "'"; + OS << '\0'; + char *Result = OS.getBuffer(); + Node->Str = copyString(Result); + std::free(Result); + return Node; +} + // Parses a type name in the form of A@B@C@@ which represents C::B::A. Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { Name *TypeName = demangleUnqualifiedTypeName(MangledName); @@ -1140,6 +1255,10 @@ Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) { } Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) { + if (startsWithDigit(MangledName)) + return demangleBackRefName(MangledName); + if (MangledName.startsWith("?$")) + return demangleClassTemplateName(MangledName); if (MangledName.startsWith('?')) return demangleOperatorName(MangledName); return demangleSimpleName(MangledName, true); @@ -1155,6 +1274,9 @@ Name *Demangler::demangleNameScopePiece(StringView &MangledName) { if (MangledName.startsWith("?A")) return demangleAnonymousNamespaceName(MangledName); + if (startsWithLocalScopePattern(MangledName)) + return demangleLocallyScopedNamePiece(MangledName); + return demangleSimpleName(MangledName, true); } @@ -1727,9 +1849,6 @@ void Demangler::output(const Symbol *S, OutputStream &OS) { Type::outputPre(OS, *S->SymbolType); outputName(OS, S->SymbolName); Type::outputPost(OS, *S->SymbolType); - - // Null terminate the buffer. - OS << '\0'; } char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, @@ -1745,5 +1864,6 @@ char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, OutputStream OS = OutputStream::create(Buf, N, 1024); D.output(S, OS); + OS << '\0'; return OS.getBuffer(); } diff --git a/lib/Demangle/StringView.h b/lib/Demangle/StringView.h index 3416db2c286..a89deda694c 100644 --- a/lib/Demangle/StringView.h +++ b/lib/Demangle/StringView.h @@ -22,6 +22,8 @@ class StringView { const char *Last; public: + static const size_t npos = ~size_t(0); + template <size_t N> StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {} StringView(const char *First_, const char *Last_) @@ -35,6 +37,17 @@ public: return StringView(begin() + From, size() - From); } + size_t find(char C, size_t From = 0) const { + size_t FindBegin = std::min(From, size()); + // Avoid calling memchr with nullptr. + if (FindBegin < size()) { + // Just forward to memchr, which is faster than a hand-rolled loop. + if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin)) + return static_cast<const char *>(P) - First; + } + return npos; + } + StringView substr(size_t From, size_t To) const { if (To >= size()) To = size() - 1; @@ -49,11 +62,22 @@ public: return StringView(First + N, Last); } + StringView dropBack(size_t N = 1) const { + if (N >= size()) + N = size(); + return StringView(First, Last - N); + } + char front() const { assert(!empty()); return *begin(); } + char back() const { + assert(!empty()); + return *(end() - 1); + } + char popFront() { assert(!empty()); return *First++; diff --git a/test/Demangle/ms-mangle.test b/test/Demangle/ms-mangle.test index a5d0c70ee31..9a2f780f933 100644 --- a/test/Demangle/ms-mangle.test +++ b/test/Demangle/ms-mangle.test @@ -265,18 +265,18 @@ ?s6@PR13182@@3PBQBDB ; CHECK: char const *const *PR13182::s6 -; FIXME: We don't properly support static locals in functions yet. +; FIXME: We don't properly support extern "C" functions yet. ; ?local@?1??extern_c_func@@9@4HA ; FIXME: int `extern_c_func'::`2'::local ; ?local@?1??extern_c_func@@9@4HA ; FIXME: int `extern_c_func'::`2'::local -; ?v@?1??f@@YAHXZ@4U<unnamed-type-v>@?1??1@YAHXZ@A -; FIXME: struct `int __cdecl f(void)'::`2'::<unnamed-type-v> `int __cdecl f(void)'::`2'::v +?v@?1??f@@YAHXZ@4U<unnamed-type-v>@?1??1@YAHXZ@A +; CHECK: struct `int __cdecl f(void)'::`2'::<unnamed-type-v> `int __cdecl f(void)'::`2'::v -; ?v@?1???$f@H@@YAHXZ@4U<unnamed-type-v>@?1???$f@H@@YAHXZ@A -; FIXME: struct `int __cdecl f<int>(void)'::`2'::<unnamed-type-v> `int __cdecl f<int>(void)'::`2'::v +?v@?1???$f@H@@YAHXZ@4U<unnamed-type-v>@?1???$f@H@@YAHXZ@A +; CHECK: struct `int __cdecl f<int>(void)'::`2'::<unnamed-type-v> `int __cdecl f<int>(void)'::`2'::v ??2OverloadedNewDelete@@SAPAXI@Z ; CHECK: static void * __cdecl OverloadedNewDelete::operator new(unsigned int) @@ -335,8 +335,8 @@ ; ?overloaded_fn@@$$J0YAXXZ ; FIXME-EXTERNC: extern \"C\" void __cdecl overloaded_fn(void) -; ?f@UnnamedType@@YAXQAPAU<unnamed-type-T1>@S@1@@Z -; FIXME: void __cdecl UnnamedType::f(struct UnnamedType::S::<unnamed-type-T1> ** const) +?f@UnnamedType@@YAXQAPAU<unnamed-type-T1>@S@1@@Z +; CHECK: void __cdecl UnnamedType::f(struct UnnamedType::S::<unnamed-type-T1> **const) ?f@UnnamedType@@YAXUT2@S@1@@Z ; CHECK: void __cdecl UnnamedType::f(struct UnnamedType::S::T2) diff --git a/test/Demangle/ms-nested-scopes.test b/test/Demangle/ms-nested-scopes.test new file mode 100644 index 00000000000..952b138630c --- /dev/null +++ b/test/Demangle/ms-nested-scopes.test @@ -0,0 +1,146 @@ +; RUN: llvm-undname < %s | FileCheck %s + +; CHECK-NOT: Invalid mangled name + +; Test demangling of function local scope discriminator IDs. +?M@?@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`0'::M + +?M@?0??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`1'::M + +?M@?1??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`2'::M + +?M@?2??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`3'::M + +?M@?3??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`4'::M + +?M@?4??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`5'::M + +?M@?5??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`6'::M + +?M@?6??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`7'::M + +?M@?7??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`8'::M + +?M@?8??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`9'::M + +?M@?9??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`10'::M + +?M@?L@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`11'::M + +?M@?M@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`12'::M + +?M@?N@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`13'::M + +?M@?O@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`14'::M + +?M@?P@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`15'::M + +?M@?BA@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`16'::M + +?M@?BB@??L@@YAHXZ@4HA +; CHECK: int `int __cdecl L(void)'::`17'::M + +?j@?1??L@@YAHXZ@4UJ@@A +; CHECK: struct J `int __cdecl L(void)'::`2'::j + +; Test demangling of name back-references +?NN@0XX@@3HA +; CHECK: int XX::NN::NN + +?MM@0NN@XX@@3HA +; CHECK: int XX::NN::MM::MM + +?NN@MM@0XX@@3HA +; CHECK: int XX::NN::MM::NN + +?OO@0NN@01XX@@3HA +; CHECK: int XX::NN::OO::NN::OO::OO + +?NN@OO@010XX@@3HA +; CHECK: int XX::NN::OO::NN::OO::NN + +; Test demangling of name back-references combined with function local scopes. +?M@?1??0@YAHXZ@4HA +; CHECK: int `int __cdecl M(void)'::`2'::M + +?L@?2??M@0?2??0@YAHXZ@QEAAHXZ@4HA +; CHECK: int `int __cdecl `int __cdecl L(void)'::`3'::L::M(void)'::`3'::L + +?M@?2??0L@?2??1@YAHXZ@QEAAHXZ@4HA +; CHECK: int `int __cdecl `int __cdecl L(void)'::`3'::L::M(void)'::`3'::M + +; Function local scopes of template functions +?M@?1???$L@H@@YAHXZ@4HA +; CHECK: int `int __cdecl L<int>(void)'::`2'::M + +; And member functions of template classes +?SN@?$NS@H@NS@@QEAAHXZ +; CHECK: int __cdecl NS::NS<int>::SN(void) + +?NS@?1??SN@?$NS@H@0@QEAAHXZ@4HA +; CHECK: int `int __cdecl NS::NS<int>::SN(void)'::`2'::NS + +?SN@?1??0?$NS@H@NS@@QEAAHXZ@4HA +; CHECK: int `int __cdecl NS::NS<int>::SN(void)'::`2'::SN + +?NS@?1??SN@?$NS@H@10@QEAAHXZ@4HA +; CHECK: int `int __cdecl NS::SN::NS<int>::SN(void)'::`2'::NS + +?SN@?1??0?$NS@H@0NS@@QEAAHXZ@4HA +; CHECK: int `int __cdecl NS::SN::NS<int>::SN(void)'::`2'::SN + +; Make sure instantiated templates participate in back-referencing. +; In the next 3 examples there should be 3 back-references: +; 0 = X (right most name) +; 1 = C<int> (second from right) +; 2 = C (third from right) +; Make sure all 3 work as expected by having the 4th component take each value +; from 0-2 and confirming it is the right component. +?X@?$C@H@C@0@2HB +; CHECK: static int const X::C::C<int>::X + +?X@?$C@H@C@1@2HB +; CHECK: static int const C<int>::C::C<int>::X + +?X@?$C@H@C@2@2HB +; CHECK: static int const C::C::C<int>::X + +; Putting everything together. + +; namespace A { namespace B { namespace C { namespace B { namespace C { +; template<typename T> +; struct C { +; int B() { +; static C<int> C; +; static int B = 7; +; static int A = 7; +; return C.B() + B + A; +; } +; }; +; } } } } } + +?C@?1??B@?$C@H@0101A@@QEAAHXZ@4U201013@A +; CHECK: struct A::B::C::B::C::C<int> `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::C + +?B@?1??0?$C@H@C@020A@@QEAAHXZ@4HA +; CHECK: int `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::B + +?A@?1??B@?$C@H@C@1310@QEAAHXZ@4HA +; CHECK: int `int __cdecl A::B::C::B::C::C<int>::B(void)'::`2'::A |