//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // This file defines the interface for a function call trie. // //===----------------------------------------------------------------------===// #ifndef XRAY_FUNCTION_CALL_TRIE_H #define XRAY_FUNCTION_CALL_TRIE_H #include "sanitizer_common/sanitizer_allocator_internal.h" #include "xray_profiling_flags.h" #include "xray_segmented_array.h" #include // For placement new. #include namespace __xray { /// A FunctionCallTrie represents the stack traces of XRay instrumented /// functions that we've encountered, where a node corresponds to a function and /// the path from the root to the node its stack trace. Each node in the trie /// will contain some useful values, including: /// /// * The cumulative amount of time spent in this particular node/stack. /// * The number of times this stack has appeared. /// * A histogram of latencies for that particular node. /// /// Each node in the trie will also contain a list of callees, represented using /// a Array -- each NodeIdPair instance will contain the function /// ID of the callee, and a pointer to the node. /// /// If we visualise this data structure, we'll find the following potential /// representation: /// /// [function id node] -> [callees] [cumulative time] /// [call counter] [latency histogram] /// /// As an example, when we have a function in this pseudocode: /// /// func f(N) { /// g() /// h() /// for i := 1..N { j() } /// } /// /// We may end up with a trie of the following form: /// /// f -> [ g, h, j ] [...] [1] [...] /// g -> [ ... ] [...] [1] [...] /// h -> [ ... ] [...] [1] [...] /// j -> [ ... ] [...] [N] [...] /// /// If for instance the function g() called j() like so: /// /// func g() { /// for i := 1..10 { j() } /// } /// /// We'll find the following updated trie: /// /// f -> [ g, h, j ] [...] [1] [...] /// g -> [ j' ] [...] [1] [...] /// h -> [ ... ] [...] [1] [...] /// j -> [ ... ] [...] [N] [...] /// j' -> [ ... ] [...] [10] [...] /// /// Note that we'll have a new node representing the path `f -> g -> j'` with /// isolated data. This isolation gives us a means of representing the stack /// traces as a path, as opposed to a key in a table. The alternative /// implementation here would be to use a separate table for the path, and use /// hashes of the path as an identifier to accumulate the information. We've /// moved away from this approach as it takes a lot of time to compute the hash /// every time we need to update a function's call information as we're handling /// the entry and exit events. /// /// This approach allows us to maintain a shadow stack, which represents the /// currently executing path, and on function exits quickly compute the amount /// of time elapsed from the entry, then update the counters for the node /// already represented in the trie. This necessitates an efficient /// representation of the various data structures (the list of callees must be /// cache-aware and efficient to look up, and the histogram must be compact and /// quick to update) to enable us to keep the overheads of this implementation /// to the minimum. class FunctionCallTrie { public: struct Node; // We use a NodeIdPair type instead of a std::pair<...> to not rely on the // standard library types in this header. struct NodeIdPair { Node *NodePtr; int32_t FId; // Constructor for inplace-construction. NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {} }; using NodeIdPairArray = Array; using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; // A Node in the FunctionCallTrie gives us a list of callees, the cumulative // number of times this node actually appeared, the cumulative amount of time // for this particular node including its children call times, and just the // local time spent on this node. Each Node will have the ID of the XRay // instrumented function that it is associated to. struct Node { Node *Parent; NodeIdPairArray Callees; int64_t CallCount; int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. int32_t FId; // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, int32_t F) : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), FId(F) {} // TODO: Include the compact histogram. }; private: struct ShadowStackEntry { uint64_t EntryTSC; Node *NodePtr; // We add a constructor here to allow us to inplace-construct through // Array<...>'s AppendEmplace. ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {} }; using NodeArray = Array; using RootArray = Array; using ShadowStackArray = Array; public: // We collate the allocators we need into a single struct, as a convenience to // allow us to initialize these as a group. struct Allocators { using NodeAllocatorType = NodeArray::AllocatorType; using RootAllocatorType = RootArray::AllocatorType; using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; NodeAllocatorType *NodeAllocator = nullptr; RootAllocatorType *RootAllocator = nullptr; ShadowStackAllocatorType *ShadowStackAllocator = nullptr; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; Allocators() {} Allocators(const Allocators &) = delete; Allocators &operator=(const Allocators &) = delete; Allocators(Allocators &&O) : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), ShadowStackAllocator(O.ShadowStackAllocator), NodeIdPairAllocator(O.NodeIdPairAllocator) { O.NodeAllocator = nullptr; O.RootAllocator = nullptr; O.ShadowStackAllocator = nullptr; O.NodeIdPairAllocator = nullptr; } Allocators &operator=(Allocators &&O) { { auto Tmp = O.NodeAllocator; O.NodeAllocator = this->NodeAllocator; this->NodeAllocator = Tmp; } { auto Tmp = O.RootAllocator; O.RootAllocator = this->RootAllocator; this->RootAllocator = Tmp; } { auto Tmp = O.ShadowStackAllocator; O.ShadowStackAllocator = this->ShadowStackAllocator; this->ShadowStackAllocator = Tmp; } { auto Tmp = O.NodeIdPairAllocator; O.NodeIdPairAllocator = this->NodeIdPairAllocator; this->NodeIdPairAllocator = Tmp; } return *this; } ~Allocators() { // Note that we cannot use delete on these pointers, as they need to be // returned to the sanitizer_common library's internal memory tracking // system. if (NodeAllocator != nullptr) { NodeAllocator->~NodeAllocatorType(); InternalFree(NodeAllocator); NodeAllocator = nullptr; } if (RootAllocator != nullptr) { RootAllocator->~RootAllocatorType(); InternalFree(RootAllocator); RootAllocator = nullptr; } if (ShadowStackAllocator != nullptr) { ShadowStackAllocator->~ShadowStackAllocatorType(); InternalFree(ShadowStackAllocator); ShadowStackAllocator = nullptr; } if (NodeIdPairAllocator != nullptr) { NodeIdPairAllocator->~NodeIdPairAllocatorType(); InternalFree(NodeIdPairAllocator); NodeIdPairAllocator = nullptr; } } }; // TODO: Support configuration of options through the arguments. static Allocators InitAllocators() { return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); } static Allocators InitAllocatorsCustom(uptr Max) { Allocators A; auto NodeAllocator = reinterpret_cast( InternalAlloc(sizeof(Allocators::NodeAllocatorType))); new (NodeAllocator) Allocators::NodeAllocatorType(Max); A.NodeAllocator = NodeAllocator; auto RootAllocator = reinterpret_cast( InternalAlloc(sizeof(Allocators::RootAllocatorType))); new (RootAllocator) Allocators::RootAllocatorType(Max); A.RootAllocator = RootAllocator; auto ShadowStackAllocator = reinterpret_cast( InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType))); new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); A.ShadowStackAllocator = ShadowStackAllocator; auto NodeIdPairAllocator = reinterpret_cast( InternalAlloc(sizeof(NodeIdPairAllocatorType))); new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); A.NodeIdPairAllocator = NodeIdPairAllocator; return A; } private: NodeArray Nodes; RootArray Roots; ShadowStackArray ShadowStack; NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; public: explicit FunctionCallTrie(const Allocators &A) : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), ShadowStack(*A.ShadowStackAllocator), NodeIdPairAllocator(A.NodeIdPairAllocator) {} void enterFunction(const int32_t FId, uint64_t TSC) { DCHECK_NE(FId, 0); // This function primarily deals with ensuring that the ShadowStack is // consistent and ready for when an exit event is encountered. if (UNLIKELY(ShadowStack.empty())) { auto NewRoot = Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewRoot == nullptr)) return; Roots.Append(NewRoot); ShadowStack.AppendEmplace(TSC, NewRoot); return; } auto &Top = ShadowStack.back(); auto TopNode = Top.NodePtr; DCHECK_NE(TopNode, nullptr); // If we've seen this callee before, then we just access that node and place // that on the top of the stack. auto Callee = TopNode->Callees.find_element( [FId](const NodeIdPair &NR) { return NR.FId == FId; }); if (Callee != nullptr) { CHECK_NE(Callee->NodePtr, nullptr); ShadowStack.AppendEmplace(TSC, Callee->NodePtr); return; } // This means we've never seen this stack before, create a new node here. auto NewNode = Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId); if (UNLIKELY(NewNode == nullptr)) return; DCHECK_NE(NewNode, nullptr); TopNode->Callees.AppendEmplace(NewNode, FId); ShadowStack.AppendEmplace(TSC, NewNode); DCHECK_NE(ShadowStack.back().NodePtr, nullptr); return; } void exitFunction(int32_t FId, uint64_t TSC) { // When we exit a function, we look up the ShadowStack to see whether we've // entered this function before. We do as little processing here as we can, // since most of the hard work would have already been done at function // entry. uint64_t CumulativeTreeTime = 0; while (!ShadowStack.empty()) { const auto &Top = ShadowStack.back(); auto TopNode = Top.NodePtr; DCHECK_NE(TopNode, nullptr); auto LocalTime = TSC - Top.EntryTSC; TopNode->CallCount++; TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; CumulativeTreeTime += LocalTime; ShadowStack.trim(1); // TODO: Update the histogram for the node. if (TopNode->FId == FId) break; } } const RootArray &getRoots() const { return Roots; } // The deepCopyInto operation will update the provided FunctionCallTrie by // re-creating the contents of this particular FunctionCallTrie in the other // FunctionCallTrie. It will do this using a Depth First Traversal from the // roots, and while doing so recreating the traversal in the provided // FunctionCallTrie. // // This operation will *not* destroy the state in `O`, and thus may cause some // duplicate entries in `O` if it is not empty. // // This function is *not* thread-safe, and may require external // synchronisation of both "this" and |O|. // // This function must *not* be called with a non-empty FunctionCallTrie |O|. void deepCopyInto(FunctionCallTrie &O) const { DCHECK(O.getRoots().empty()); // We then push the root into a stack, to use as the parent marker for new // nodes we push in as we're traversing depth-first down the call tree. struct NodeAndParent { FunctionCallTrie::Node *Node; FunctionCallTrie::Node *NewNode; }; using Stack = Array; typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { // Add a node in O for this root. auto NewRoot = O.Nodes.AppendEmplace( nullptr, *O.NodeIdPairAllocator, Root->CallCount, Root->CumulativeLocalTime, Root->FId); // Because we cannot allocate more memory we should bail out right away. if (UNLIKELY(NewRoot == nullptr)) return; O.Roots.Append(NewRoot); // TODO: Figure out what to do if we fail to allocate any more stack // space. Maybe warn or report once? DFSStack.AppendEmplace(Root, NewRoot); while (!DFSStack.empty()) { NodeAndParent NP = DFSStack.back(); DCHECK_NE(NP.Node, nullptr); DCHECK_NE(NP.NewNode, nullptr); DFSStack.trim(1); for (const auto Callee : NP.Node->Callees) { auto NewNode = O.Nodes.AppendEmplace( NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, Callee.FId); if (UNLIKELY(NewNode == nullptr)) return; NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); DFSStack.AppendEmplace(Callee.NodePtr, NewNode); } } } } // The mergeInto operation will update the provided FunctionCallTrie by // traversing the current trie's roots and updating (i.e. merging) the data in // the nodes with the data in the target's nodes. If the node doesn't exist in // the provided trie, we add a new one in the right position, and inherit the // data from the original (current) trie, along with all its callees. // // This function is *not* thread-safe, and may require external // synchronisation of both "this" and |O|. void mergeInto(FunctionCallTrie &O) const { struct NodeAndTarget { FunctionCallTrie::Node *OrigNode; FunctionCallTrie::Node *TargetNode; }; using Stack = Array; typename Stack::AllocatorType StackAllocator( profilingFlags()->stack_allocator_max); Stack DFSStack(StackAllocator); for (const auto Root : getRoots()) { Node *TargetRoot = nullptr; auto R = O.Roots.find_element( [&](const Node *Node) { return Node->FId == Root->FId; }); if (R == nullptr) { TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0, 0, Root->FId); if (UNLIKELY(TargetRoot == nullptr)) return; O.Roots.Append(TargetRoot); } else { TargetRoot = *R; } DFSStack.Append(NodeAndTarget{Root, TargetRoot}); while (!DFSStack.empty()) { NodeAndTarget NT = DFSStack.back(); DCHECK_NE(NT.OrigNode, nullptr); DCHECK_NE(NT.TargetNode, nullptr); DFSStack.trim(1); // TODO: Update the histogram as well when we have it ready. NT.TargetNode->CallCount += NT.OrigNode->CallCount; NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; for (const auto Callee : NT.OrigNode->Callees) { auto TargetCallee = NT.TargetNode->Callees.find_element( [&](const FunctionCallTrie::NodeIdPair &C) { return C.FId == Callee.FId; }); if (TargetCallee == nullptr) { auto NewTargetNode = O.Nodes.AppendEmplace( NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId); if (UNLIKELY(NewTargetNode == nullptr)) return; TargetCallee = NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); } DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); } } } } }; } // namespace __xray #endif // XRAY_FUNCTION_CALL_TRIE_H