scripts/symbolize.py: add support for dynamically linked TAs

Dynamically linked TAs contain sections mapped from several ELF files. Update symbolize.py accordingly. As a result, the call stack will show the expected debug information instead of "?? ??:0" for stack frames that belong to a dynamic library. Also, the memory regions used to map libraries will be annotated with section names like for the main executable. Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org> Reviewed-by: Joakim Bech <joakim.bech@linaro.org>
author: Jerome Forissier <jerome.forissier@linaro.org> 2018-05-25 15:07:28 +0200
committer: Jérôme Forissier <jerome.forissier@linaro.org> 2018-05-29 17:52:25 +0200
commit: ae252462a0b6e505e7d8f143347daaea20fca88f (patch)
tree: 808c1c8957ea13a1bf89aa741ce9686a3a86da6b /scripts
parent: 01b8b5ce011dd291c22133d7bb979984e626f913 (diff)
1 files changed, 143 insertions, 59 deletions
diff --git a/scripts/symbolize.py b/scripts/symbolize.py
index 68d98940..ac476e5d 100755
--- a/scripts/symbolize.py
+++ b/scripts/symbolize.py
@@ -12,17 +12,16 @@ import re
 import subprocess
 import sys
 
-TA_UUID_RE = re.compile(r'Status of TA (?P<uuid>[0-9a-f\-]+)')
-TA_INFO_RE = re.compile('  arch: (?P<arch>\w+)  '
-                        'load address: (?P<load_addr>0x[0-9a-f]+)')
 CALL_STACK_RE = re.compile('Call stack:')
-
 # This gets the address from lines looking like this:
 # E/TC:0  0x001044a8
 STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
 ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
 REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
-                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)')
+                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
+                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
+ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
+                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
 
 epilog = '''
 This scripts reads an OP-TEE abort or panic message from stdin and adds debug
@@ -54,6 +53,7 @@ Sample usage:
   ^D
 '''
 
+
 def get_args():
     parser = argparse.ArgumentParser(
                 formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -71,13 +71,13 @@ def get_args():
 
     return parser.parse_args()
 
+
 class Symbolizer(object):
     def __init__(self, out, dirs, strip_path):
         self._out = out
         self._dirs = dirs
         self._strip_path = strip_path
         self._addr2line = None
-        self._bin = 'tee.elf'
         self.reset()
 
     def get_elf(self, elf_or_uuid):
@@ -93,41 +93,84 @@ class Symbolizer(object):
     def set_arch(self):
         if self._arch:
             return
-        if self._bin:
-            p = subprocess.Popen([ 'file', self.get_elf(self._bin) ],
-                                 stdout=subprocess.PIPE)
-            output = p.stdout.readlines()
-            p.terminate()
-            if 'ARM aarch64,' in output[0]:
-                self._arch = 'aarch64-linux-gnu-'
-            elif 'ARM,' in output[0]:
-                self._arch = 'arm-linux-gnueabihf-'
+        elf = self.get_elf(self._elfs[0][0])
+        if elf is None:
+            return
+        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
+                             stdout=subprocess.PIPE)
+        output = p.stdout.readlines()
+        p.terminate()
+        if 'ARM aarch64,' in output[0]:
+            self._arch = 'aarch64-linux-gnu-'
+        elif 'ARM,' in output[0]:
+            self._arch = 'arm-linux-gnueabihf-'
 
     def arch_prefix(self, cmd):
         self.set_arch()
+        if self._arch is None:
+            return ''
         return self._arch + cmd
 
-    def spawn_addr2line(self):
-        if not self._addr2line:
-            elf = self.get_elf(self._bin)
-            if not elf:
-                return
-            cmd = self.arch_prefix('addr2line')
-            if not cmd:
-                return
-            self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
-                                                stdin = subprocess.PIPE,
-                                                stdout = subprocess.PIPE)
+    def spawn_addr2line(self, elf_name):
+        if elf_name is None:
+            return
+        if self._addr2line_elf_name is elf_name:
+            return
+        if self._addr2line:
+            self._addr2line.terminate
+            self._addr2line = None
+        elf = self.get_elf(elf_name)
+        if not elf:
+            return
+        cmd = self.arch_prefix('addr2line')
+        if not cmd:
+            return
+        self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
+                                           stdin=subprocess.PIPE,
+                                           stdout=subprocess.PIPE)
+        self._addr2line_elf_name = elf_name
+
+    # If addr falls into a region that maps a TA ELF file, return the load
+    # address of that file.
+    def elf_load_addr(self, addr):
+        if self._regions:
+            for r in self._regions:
+                r_addr = int(r[0], 16)
+                r_size = int(r[1], 16)
+                i_addr = int(addr, 16)
+                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
+                    # Found region
+                    elf_idx = r[2]
+                    if elf_idx is not None:
+                        return self._elfs[int(elf_idx)][1]
+            return None
+        else:
+            # tee.elf
+            return '0x0'
+
+    def elf_for_addr(self, addr):
+        l_addr = self.elf_load_addr(addr)
+        if l_addr is None:
+            return None
+        if l_addr is '0x0':
+            return 'tee.elf'
+        for k in self._elfs:
+            e = self._elfs[k]
+            if int(e[1], 16) == int(l_addr, 16):
+                return e[0]
+        return None
 
     def subtract_load_addr(self, addr):
-        offs = self._load_addr
-        if int(offs, 16) > int(addr, 16):
+        l_addr = self.elf_load_addr(addr)
+        if l_addr is None:
+            return None
+        if int(l_addr, 16) > int(addr, 16):
             return ''
-        return '0x{:x}'.format(int(addr, 16) - int(offs, 16))
+        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
 
     def resolve(self, addr):
         reladdr = self.subtract_load_addr(addr)
-        self.spawn_addr2line()
+        self.spawn_addr2line(self.elf_for_addr(addr))
         if not reladdr or not self._addr2line:
             return '???'
         try:
@@ -141,14 +184,17 @@ class Symbolizer(object):
         ret = ''
         prevsize = 0
         reladdr = self.subtract_load_addr(addr)
-        elf = self.get_elf(self._bin)
+        elf_name = self.elf_for_addr(addr)
+        if elf_name is None:
+            return ''
+        elf = self.get_elf(elf_name)
         cmd = self.arch_prefix('nm')
         if not reladdr or not elf or not cmd:
             return ''
         ireladdr = int(reladdr, 16)
         nm = subprocess.Popen([cmd, '--numeric-sort', '--print-size', elf],
-                               stdin = subprocess.PIPE,
-                               stdout = subprocess.PIPE)
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE)
         for line in iter(nm.stdout.readline, ''):
             try:
                 addr, size, _, name = line.split()
@@ -177,19 +223,22 @@ class Symbolizer(object):
     def section_plus_offset(self, addr):
         ret = ''
         reladdr = self.subtract_load_addr(addr)
-        elf = self.get_elf(self._bin)
+        elf_name = self.elf_for_addr(addr)
+        if elf_name is None:
+            return ''
+        elf = self.get_elf(elf_name)
         cmd = self.arch_prefix('objdump')
         if not reladdr or not elf or not cmd:
             return ''
         iaddr = int(reladdr, 16)
         objdump = subprocess.Popen([cmd, '--section-headers', elf],
-                                    stdin = subprocess.PIPE,
-                                    stdout = subprocess.PIPE)
+                                   stdin=subprocess.PIPE,
+                                   stdout=subprocess.PIPE)
         for line in iter(objdump.stdout.readline, ''):
             try:
                 idx, name, size, vma, lma, offs, algn = line.split()
             except:
-                continue;
+                continue
             ivma = int(vma, 16)
             isize = int(size, 16)
             if ivma == iaddr:
@@ -221,53 +270,64 @@ class Symbolizer(object):
         return ret
 
     # Return all ELF sections with the ALLOC flag
-    def read_sections(self):
-        if self._sections:
+    def read_sections(self, elf_name):
+        if elf_name is None:
+            return
+        if elf_name in self._sections:
             return
-        elf = self.get_elf(self._bin)
+        elf = self.get_elf(elf_name)
         cmd = self.arch_prefix('objdump')
         if not elf or not cmd:
             return
+        self._sections[elf_name] = []
         objdump = subprocess.Popen([cmd, '--section-headers', elf],
-                                    stdin = subprocess.PIPE,
-                                    stdout = subprocess.PIPE)
+                                   stdin=subprocess.PIPE,
+                                   stdout=subprocess.PIPE)
         for line in iter(objdump.stdout.readline, ''):
             try:
                 _, name, size, vma, _, _, _ = line.split()
             except:
                 if 'ALLOC' in line:
-                    self._sections.append([name, int(vma, 16), int(size, 16)])
+                    self._sections[elf_name].append([name, int(vma, 16),
+                                                     int(size, 16)])
 
     def overlaps(self, section, addr, size):
         sec_addr = section[1]
         sec_size = section[2]
         if not size or not sec_size:
             return False
-        return (addr <= (sec_addr + sec_size - 1)) and ((addr + size - 1) >= sec_addr)
+        return ((addr <= (sec_addr + sec_size - 1)) and
+                ((addr + size - 1) >= sec_addr))
 
-    def sections_in_region(self, addr, size):
+    def sections_in_region(self, addr, size, elf_idx):
         ret = ''
         addr = self.subtract_load_addr(addr)
         if not addr:
             return ''
         iaddr = int(addr, 16)
         isize = int(size, 16)
-        self.read_sections()
-        for s in self._sections:
+        elf = self._elfs[int(elf_idx)][0]
+        if elf is None:
+            return ''
+        self.read_sections(elf)
+        if elf not in self._sections:
+            return ''
+        for s in self._sections[elf]:
             if self.overlaps(s, iaddr, isize):
                 ret += ' ' + s[0]
         return ret
 
     def reset(self):
         self._call_stack_found = False
-        self._load_addr = '0'
         if self._addr2line:
             self._addr2line.terminate()
             self._addr2line = None
+        self._addr2line_elf_name = None
         self._arch = None
         self._saved_abort_line = ''
-        self._sections = []
-        self._bin = "tee.elf"
+        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
+        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
+        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
 
     def write(self, line):
             if self._call_stack_found:
@@ -281,7 +341,7 @@ class Symbolizer(object):
                     res = self.resolve(addr)
                     if self._strip_path:
                         res = re.sub(re.escape(self._strip_path) + '/*', '',
-                              res)
+                                     res)
                     self._out.write(' ' + res)
                     self._out.write(line[post:])
                     return
@@ -289,24 +349,47 @@ class Symbolizer(object):
                     self.reset()
             match = re.search(REGION_RE, line)
             if match:
+                # Region table: save info for later processing once
+                # we know which UUID corresponds to which ELF index
                 addr = match.group('addr')
                 size = match.group('size')
-                self._out.write(line.strip() +
-                                self.sections_in_region(addr, size) + '\n');
+                elf_idx = match.group('elf_idx')
+                self._regions.append([addr, size, elf_idx, line])
+                return
+            match = re.search(ELF_LIST_RE, line)
+            if match:
+                # ELF list: save info for later. Region table and ELF list
+                # will be displayed when the call stack is reached
+                i = int(match.group('idx'))
+                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
+                                 line]
                 return
             match = re.search(CALL_STACK_RE, line)
             if match:
                 self._call_stack_found = True
+                if self._regions:
+                    for r in self._regions:
+                        r_addr = r[0]
+                        r_size = r[1]
+                        elf_idx = r[2]
+                        saved_line = r[3]
+                        if elf_idx is None:
+                            self._out.write(saved_line)
+                        else:
+                            self._out.write(saved_line.strip() +
+                                            self.sections_in_region(r_addr,
+                                                                    r_size,
+                                                                    elf_idx) +
+                                            '\n')
+                if self._elfs:
+                    for k in self._elfs:
+                        e = self._elfs[k]
+                        if (len(e) >= 3):
+                            self._out.write(e[2])
                 # Here is a good place to resolve the abort address because we
                 # have all the information we need
                 if self._saved_abort_line:
                     self._out.write(self.process_abort(self._saved_abort_line))
-            match = re.search(TA_UUID_RE, line)
-            if match:
-                self._bin = match.group('uuid')
-            match = re.search(TA_INFO_RE, line)
-            if match:
-                self._load_addr = match.group('load_addr')
             match = re.search(ABORT_ADDR_RE, line)
             if match:
                 self.reset()
@@ -318,6 +401,7 @@ class Symbolizer(object):
     def flush(self):
         self._out.flush()
 
+
 def main():
     args = get_args()
     if args.dir:
author	Jerome Forissier <jerome.forissier@linaro.org>	2018-05-25 15:07:28 +0200
committer	Jérôme Forissier <jerome.forissier@linaro.org>	2018-05-29 17:52:25 +0200
commit	ae252462a0b6e505e7d8f143347daaea20fca88f (patch)
tree	808c1c8957ea13a1bf89aa741ce9686a3a86da6b /scripts
parent	01b8b5ce011dd291c22133d7bb979984e626f913 (diff)