From 8790f0a7f39d61a806c7339abd45bb83c15ee042 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Fri, 31 Aug 2012 11:12:10 +0000 Subject: [compiler-rt] Move draft code for llvm-symbolizer to compiler-rt/utils/llvm-symbolizer after chandlerc's suggestion git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@162988 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/llvm-symbolizer/CMakeLists.txt | 67 +++++++++ utils/llvm-symbolizer/llvm-symbolizer.cc | 237 +++++++++++++++++++++++++++++++ 2 files changed, 304 insertions(+) create mode 100644 utils/llvm-symbolizer/CMakeLists.txt create mode 100644 utils/llvm-symbolizer/llvm-symbolizer.cc (limited to 'utils') diff --git a/utils/llvm-symbolizer/CMakeLists.txt b/utils/llvm-symbolizer/CMakeLists.txt new file mode 100644 index 000000000..d1f0acd88 --- /dev/null +++ b/utils/llvm-symbolizer/CMakeLists.txt @@ -0,0 +1,67 @@ +# Build llvm-symbolizer binary, which will be used as an external symbolizer +# by sanitizer tools. Later it can probably become another LLVM tool. + +set(SYMBOLIZER_SOURCES + llvm-symbolizer.cc + ) + +# Append sources of LLVM libs we use. +append_llvm_sources(SYMBOLIZER_SOURCES + LLVMDebugInfo + LLVMSupport + LLVMObject + ) + +set(SYMBOLIZER_CFLAGS + -I${LLVM_MAIN_SRC_DIR}/include + -I${LLVM_BINARY_DIR}/include + -fPIC + -fomit-frame-pointer + -O3 + ) + +set(SYMBOLIZER_DEFINITIONS +# __STDC_CONSTANT_MACROS=1 +# __STDC_LIMIT_MACROS=1 + ) + +# FIXME: Generalize the following ugly piece in sanitizer_common, symbolizer +# and asan folders. +set(SYMBOLIZER_BINARIES) +if (APPLE) + # Build universal binary on Apple. + add_executable(llvm-symbolizer.osx ${SYMBOLIZER_SOURCES}) + set_target_compile_flags(llvm-symbolizer.osx ${SYMBOLIZER_CFLAGS}) + filter_available_targets(SYMBOLIZER_TARGETS x86_64 i386) + set_target_properties(llvm-symbolizer.osx PROPERTIES + OSX_ARCHITECTURES "${SYMBOLIZER_TARGETS}") + list(APPEND SYMBOLIZER_BINARIES llvm-symbolizer.osx) +elseif(UNIX) + # Assume Linux + if(CAN_TARGET_X86_64) + add_executable(llvm-symbolizer-x86_64 ${SYMBOLIZER_SOURCES}) + set_target_compile_flags(llvm-symbolizer-x86_64 + ${SYMBOLIZER_CFLAGS} ${TARGET_X86_64_CFLAGS}) + set_property(TARGET llvm-symbolizer-x86_64 APPEND_STRING PROPERTY + LINK_FLAGS " ${TARGET_X86_64_CFLAGS}") + list(APPEND SYMBOLIZER_BINARIES llvm-symbolizer-x86_64) + endif() + if(CAN_TARGET_I386) + add_executable(llvm-symbolizer-i386 ${SYMBOLIZER_SOURCES}) + set_target_compile_flags(llvm-symbolizer-i386 + ${SYMBOLIZER_CFLAGS} ${TARGET_I386_CFLAGS}) + set_property(TARGET llvm-symbolizer-i386 APPEND_STRING PROPERTY + LINK_FLAGS " ${TARGET_I386_CFLAGS}") + list(APPEND SYMBOLIZER_BINARIES llvm-symbolizer-i386) + endif() + # Linux-specific linker flags. + set_property(TARGET ${SYMBOLIZER_BINARIES} APPEND_STRING PROPERTY + LINK_FLAGS " -lpthread -ldl") +endif() + +set_property(TARGET ${SYMBOLIZER_BINARIES} APPEND PROPERTY + COMPILE_DEFINITIONS ${SYMBOLIZER_DEFINITIONS} + ) +# FIXME: Setup sane output directories for ${SYMBOLIZER_BINARIES} +set_target_properties(${SYMBOLIZER_BINARIES} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/utils/llvm-symbolizer/llvm-symbolizer.cc b/utils/llvm-symbolizer/llvm-symbolizer.cc new file mode 100644 index 000000000..d9bcf2a68 --- /dev/null +++ b/utils/llvm-symbolizer/llvm-symbolizer.cc @@ -0,0 +1,237 @@ +//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This utility works much like "addr2line". It is able of transforming +// tuples (module name, module offset) to code locations (function name, +// file, line number, column number). It is targeted for compiler-rt tools +// (especially AddressSanitizer and ThreadSanitizer) that can use it +// to symbolize stack traces in their error reports. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include // NOLINT +#include +#include + +using namespace llvm; +using namespace object; +using std::string; + +static cl::opt +UseSymbolTable("use-symbol-table", cl::init(true), + cl::desc("Prefer names in symbol table to names " + "in debug info")); + +static cl::opt +PrintFunctions("functions", cl::init(true), + cl::desc("Print function names as well as line " + "information for a given address")); + +static StringRef ToolInvocationPath; + +static bool error(error_code ec) { + if (!ec) return false; + errs() << ToolInvocationPath << ": error reading file: " + << ec.message() << ".\n"; + return true; +} + +namespace { +class ModuleInfo { + OwningPtr Module; + OwningPtr DebugInfoContext; + public: + ModuleInfo(ObjectFile *obj, DIContext *di_ctx) + : Module(obj), DebugInfoContext(di_ctx) {} + DILineInfo symbolizeCode(uint64_t module_offset) const { + DILineInfo dli; + if (DebugInfoContext) { + uint32_t flags = llvm::DILineInfoSpecifier::FileLineInfo | + llvm::DILineInfoSpecifier::AbsoluteFilePath; + if (PrintFunctions) + flags |= llvm::DILineInfoSpecifier::FunctionName; + dli = DebugInfoContext->getLineInfoForAddress( + module_offset, flags); + } + // Override function name from symbol table if necessary. + if (PrintFunctions && UseSymbolTable) { + string filename = dli.getFileName(); + string function = dli.getFunctionName(); + if (getFunctionNameFromSymbolTable(module_offset, function)) { + dli = DILineInfo(StringRef(filename), StringRef(function), + dli.getLine(), dli.getColumn()); + } + } + return dli; + } + private: + bool getFunctionNameFromSymbolTable(size_t address, + string &function_name) const { + assert(Module); + error_code ec; + for (symbol_iterator si = Module->begin_symbols(), + se = Module->end_symbols(); + si != se; si.increment(ec)) { + if (error(ec)) return false; + uint64_t Address; + uint64_t Size; + if (error(si->getAddress(Address))) continue; + if (error(si->getSize(Size))) continue; + // FIXME: If a function has alias, there are two entries in symbol table + // with same address size. Make sure we choose the correct one. + if (Address <= address && address < Address + Size) { + StringRef Name; + if (error(si->getName(Name))) continue; + function_name = Name.str(); + return true; + } + } + return false; + } +}; + +typedef std::map ModuleMapTy; +typedef ModuleMapTy::iterator ModuleMapIter; +typedef ModuleMapTy::const_iterator ModuleMapConstIter; +} // namespace + +static ModuleMapTy modules; + +static bool isFullNameOfDwarfSection(const StringRef &full_name, + const StringRef &short_name) { + static const char kDwarfPrefix[] = "__DWARF,"; + StringRef name = full_name; + // Skip "__DWARF," prefix. + if (name.startswith(kDwarfPrefix)) + name = name.substr(strlen(kDwarfPrefix)); + // Skip . and _ prefixes. + name = name.substr(name.find_first_not_of("._")); + return (name == short_name); +} + +// Returns true if the object endianness is known. +static bool getObjectEndianness(const ObjectFile *obj, + bool &is_little_endian) { + // FIXME: Implement this when libLLVMObject allows to do it easily. + is_little_endian = true; + return true; +} + +static ModuleInfo *getOrCreateModuleInfo(const string &module_name) { + ModuleMapIter I = modules.find(module_name); + if (I != modules.end()) + return I->second; + + OwningPtr Buff; + MemoryBuffer::getFile(module_name, Buff); + ObjectFile *obj = ObjectFile::createObjectFile(Buff.take()); + assert(obj); + + DIContext *di_context = 0; + bool IsLittleEndian; + if (getObjectEndianness(obj, IsLittleEndian)) { + StringRef DebugInfoSection; + StringRef DebugAbbrevSection; + StringRef DebugLineSection; + StringRef DebugArangesSection; + StringRef DebugStringSection; + error_code ec; + for (section_iterator i = obj->begin_sections(), + e = obj->end_sections(); + i != e; i.increment(ec)) { + if (error(ec)) break; + StringRef name; + if (error(i->getName(name))) continue; + StringRef data; + if (error(i->getContents(data))) continue; + if (isFullNameOfDwarfSection(name, "debug_info")) + DebugInfoSection = data; + else if (isFullNameOfDwarfSection(name, "debug_abbrev")) + DebugAbbrevSection = data; + else if (isFullNameOfDwarfSection(name, "debug_line")) + DebugLineSection = data; + // Don't use debug_aranges for now, as address ranges contained + // there may not cover all instructions in the module + // else if (isFullNameOfDwarfSection(name, "debug_aranges")) + // DebugArangesSection = data; + else if (isFullNameOfDwarfSection(name, "debug_str")) + DebugStringSection = data; + } + + di_context = DIContext::getDWARFContext( + IsLittleEndian, DebugInfoSection, DebugAbbrevSection, + DebugArangesSection, DebugLineSection, DebugStringSection); + assert(di_context); + } + + ModuleInfo *module_info = new ModuleInfo(obj, di_context); + modules.insert(make_pair(module_name, module_info)); + return module_info; +} + +static void symbolize(const string &module_name, + const string &module_offset_str) { + // FIXME: check that module_name points to valid file. + ModuleInfo *module_info = getOrCreateModuleInfo(module_name); + DILineInfo line_info; + uint64_t module_offset; + if (!StringRef(module_offset_str).getAsInteger(0, module_offset)) { + line_info = module_info->symbolizeCode(module_offset); + } + // By default, DILineInfo contains "" for function/filename it + // cannot fetch. We replace it to "??" to make our output closer to addr2line. + static const string kDILineInfoBadString = ""; + static const string kSymbolizerBadString = "??"; + + if (PrintFunctions) { + string function_name = line_info.getFunctionName(); + if (function_name == kDILineInfoBadString) + function_name = kSymbolizerBadString; + outs() << function_name << "\n"; + } + string filename = line_info.getFileName(); + if (filename == kDILineInfoBadString) + filename = kSymbolizerBadString; + outs() << filename << + ":" << line_info.getLine() << + ":" << line_info.getColumn() << + "\n\n"; // Print extra empty line to mark the end of output. + outs().flush(); +} + +int main(int argc, char **argv) { + // Print stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n"); + ToolInvocationPath = argv[0]; + + string module_name; + string module_offset_str; + while (std::cin >> module_name >> module_offset_str) { + symbolize(module_name, module_offset_str); + } + return 0; +} -- cgit v1.2.3