summaryrefslogtreecommitdiff
path: root/conform/linknamespace.py
diff options
context:
space:
mode:
Diffstat (limited to 'conform/linknamespace.py')
-rw-r--r--conform/linknamespace.py217
1 files changed, 217 insertions, 0 deletions
diff --git a/conform/linknamespace.py b/conform/linknamespace.py
new file mode 100644
index 0000000000..07a775499d
--- /dev/null
+++ b/conform/linknamespace.py
@@ -0,0 +1,217 @@
+#!/usr/bin/python
+# Check that use of symbols declared in a given header does not result
+# in any symbols being brought in that are not reserved with external
+# linkage for the given standard.
+# Copyright (C) 2014-2018 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+import argparse
+from collections import defaultdict
+import os.path
+import re
+import subprocess
+import sys
+import tempfile
+
+import glibcconform
+
+# The following whitelisted symbols are also allowed for now.
+#
+# * Bug 17576: stdin, stdout, stderr only reserved with external
+# linkage when stdio.h included (and possibly not then), not
+# generally.
+#
+# * Bug 18442: re_syntax_options wrongly brought in by regcomp and
+# used by re_comp.
+#
+WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
+
+
+def list_syms(filename):
+ """Return information about GLOBAL and WEAK symbols listed in readelf
+ -s output."""
+ ret = []
+ cur_file = filename
+ with open(filename, 'r') as syms_file:
+ for line in syms_file:
+ line = line.rstrip()
+ if line.startswith('File: '):
+ cur_file = line[len('File: '):]
+ cur_file = cur_file.split('/')[-1]
+ continue
+ # Architecture-specific st_other bits appear inside [] and
+ # disrupt the format of readelf output.
+ line = re.sub(r'\[.*?\]', '', line)
+ fields = line.split()
+ if len(fields) < 8:
+ continue
+ bind = fields[4]
+ ndx = fields[6]
+ sym = fields[7]
+ if bind not in ('GLOBAL', 'WEAK'):
+ continue
+ if not re.fullmatch('[A-Za-z0-9_]+', sym):
+ continue
+ ret.append((cur_file, sym, bind, ndx != 'UND'))
+ return ret
+
+
+def main():
+ """The main entry point."""
+ parser = argparse.ArgumentParser(description='Check link-time namespace.')
+ parser.add_argument('--header', metavar='HEADER',
+ help='name of header')
+ parser.add_argument('--standard', metavar='STD',
+ help='standard to use when processing header')
+ parser.add_argument('--cc', metavar='CC',
+ help='C compiler to use')
+ parser.add_argument('--flags', metavar='CFLAGS',
+ help='Compiler flags to use with CC')
+ parser.add_argument('--stdsyms', metavar='FILE',
+ help='File with list of standard symbols')
+ parser.add_argument('--libsyms', metavar='FILE',
+ help='File with symbol information from libraries')
+ parser.add_argument('--readelf', metavar='READELF',
+ help='readelf program to use')
+ args = parser.parse_args()
+
+ # Load the list of symbols that are OK.
+ stdsyms = set()
+ with open(args.stdsyms, 'r') as stdsyms_file:
+ for line in stdsyms_file:
+ stdsyms.add(line.rstrip())
+ stdsyms |= WHITELIST
+
+ # Load information about GLOBAL and WEAK symbols defined or used
+ # in the standard libraries.
+ # Symbols from a given object, except for weak defined symbols.
+ seen_syms = defaultdict(list)
+ # Strong undefined symbols from a given object.
+ strong_undef_syms = defaultdict(list)
+ # Objects defining a given symbol (strongly or weakly).
+ sym_objs = defaultdict(list)
+ for file, name, bind, defined in list_syms(args.libsyms):
+ if defined:
+ sym_objs[name].append(file)
+ if bind == 'GLOBAL' or not defined:
+ seen_syms[file].append(name)
+ if bind == 'GLOBAL' and not defined:
+ strong_undef_syms[file].append(name)
+
+ # Determine what ELF-level symbols are brought in by use of C-level
+ # symbols declared in the given header.
+ #
+ # The rules followed are heuristic and so may produce false
+ # positives and false negatives.
+ #
+ # * All undefined symbols are considered of signficance, but it is
+ # possible that (a) any standard library definition is weak, so
+ # can be overridden by the user's definition, and (b) the symbol
+ # is only used conditionally and not if the program is limited to
+ # standard functionality.
+ #
+ # * If a symbol reference is only brought in by the user using a
+ # data symbol rather than a function from the standard library,
+ # this will not be detected.
+ #
+ # * If a symbol reference is only brought in by crt*.o or libgcc,
+ # this will not be detected.
+ #
+ # * If a symbol reference is only brought in through __builtin_foo
+ # in a standard macro being compiled to call foo, this will not be
+ # detected.
+ #
+ # * Header inclusions should be compiled several times with
+ # different options such as -O2, -D_FORTIFY_SOURCE and
+ # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
+ # from such a compilation; this is not yet implemented.
+ #
+ # * This script finds symbols referenced through use of macros on
+ # the basis that if a macro calls an internal function, that
+ # function must also be declared in the header. However, the
+ # header might also declare implementation-namespace functions
+ # that are not called by any standard macro in the header,
+ # resulting in false positives for any symbols brought in only
+ # through use of those implementation-namespace functions.
+ #
+ # * Namespace issues can apply for dynamic linking as well as
+ # static linking, when a call is from one shared library to
+ # another or uses a PLT entry for a call within a shared library;
+ # such issues are only detected by this script if the same
+ # namespace issue applies for static linking.
+ seen_where = {}
+ files_seen = set()
+ all_undef = {}
+ current_undef = {}
+ compiler = '%s %s' % (args.cc, args.flags)
+ c_syms = glibcconform.list_exported_functions(compiler, args.standard,
+ args.header)
+ with tempfile.TemporaryDirectory() as temp_dir:
+ cincfile_name = os.path.join(temp_dir, 'undef.c')
+ cincfile_o_name = os.path.join(temp_dir, 'undef.o')
+ cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
+ cincfile_text = ('#include <%s>\n%s\n'
+ % (args.header,
+ '\n'.join('void *__glibc_test_%s = (void *) &%s;'
+ % (sym, sym) for sym in sorted(c_syms))))
+ with open(cincfile_name, 'w') as cincfile:
+ cincfile.write(cincfile_text)
+ cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
+ % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
+ cincfile_name, cincfile_o_name))
+ subprocess.check_call(cmd, shell=True)
+ cmd = ('LC_ALL=C %s -W -s %s > %s'
+ % (args.readelf, cincfile_o_name, cincfile_sym_name))
+ subprocess.check_call(cmd, shell=True)
+ for file, name, bind, defined in list_syms(cincfile_sym_name):
+ if bind == 'GLOBAL' and not defined:
+ sym_text = '[initial] %s' % name
+ seen_where[name] = sym_text
+ all_undef[name] = sym_text
+ current_undef[name] = sym_text
+
+ while current_undef:
+ new_undef = {}
+ for sym, cu_sym in sorted(current_undef.items()):
+ for file in sym_objs[sym]:
+ if file in files_seen:
+ continue
+ files_seen.add(file)
+ for ssym in seen_syms[file]:
+ if ssym not in seen_where:
+ seen_where[ssym] = ('%s -> [%s] %s'
+ % (cu_sym, file, ssym))
+ for usym in strong_undef_syms[file]:
+ if usym not in all_undef:
+ usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
+ all_undef[usym] = usym_text
+ new_undef[usym] = usym_text
+ current_undef = new_undef
+
+ ret = 0
+ for sym in sorted(seen_where):
+ if sym.startswith('_'):
+ continue
+ if sym in stdsyms:
+ continue
+ print(seen_where[sym])
+ ret = 1
+ sys.exit(ret)
+
+
+if __name__ == '__main__':
+ main()