DWARF-5: .debug_names index producer

This adds a new "-dwarf-5" switch to "save gdb-index" that makes it generate index files with DWARF-5 .debug_names/.debug_str sections instead of GDB's own .gdb_index. We should probably add a command line option to contrib/gdb-add-index.sh (incl. cc-with-tweaks.sh) for the new -dwarf-5 GDB option, and a new target board to make it more convenient to exercise this. To be done later. gdb/ChangeLog 2017-12-08 Jan Kratochvil <jan.kratochvil@redhat.com> Pedro Alves <palves@redhat.com> * contrib/gdb-add-index.sh (index): Rename to ... (index4): ... here. (index5, debugstr, debugstrmerge, debugstrerr): New variables. Support also .debug_names and .debug_str. * dwarf2read.c: Include cmath, set, list. (INDEX_SUFFIX): Rename to ... (INDEX4_SUFFIX): ... here. (INDEX5_SUFFIX, DEBUG_STR_SUFFIX): New. (file_write(FILE *, const void *, size_t)): New. (file_write(FILE *, const std::vector<Elem, Alloc> &)): New. (data_buf::append_unsigned_leb128, data_buf::empty): New. (data_buf::file_write): Use ::file_write. (data_buf::c_str, dwarf5_djb_hash, debug_names) (check_dwarf64_offsets): New. (psyms_seen_size, write_gdbindex): New from write_psymtabs_to_index code. (dwarf5_gdb_augmentation, write_debug_names, assert_file_size) (enum dw_index_kind): New. (write_psymtabs_to_index): New parameter index_kind. Support filename_str and out_file_str. Move code to write_gdbindex, possibly call write_debug_names. (save_gdb_index_command): New parameter -dwarf-5. (_initialize_dwarf2_read): Document the new parameter -dwarf-5. gdb/doc/ChangeLog 2017-12-08 Jan Kratochvil <jan.kratochvil@redhat.com> * gdb.texinfo (Index Files): Document .debug_names and -dwarf-5. -- gdb/contrib/gdb-add-index.sh | 53 ++ gdb/doc/gdb.texinfo | 24 + gdb/dwarf2read.c | 919 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 935 insertions(+), 61 deletions(-)
author: Jan Kratochvil <jan.kratochvil@redhat.com> 2017-12-08 22:44:10 +0000
committer: Pedro Alves <palves@redhat.com> 2017-12-08 23:33:27 +0000
commit: 437afbb81e3a04cbd933fc534a50c686eaa63b19 (patch)
tree: 9389ec170eff92880790a69075e48eccb3afdccc /gdb
parent: 6432ec65a8822246db5bcede0c6bd3ed0e464a0b (diff)
5 files changed, 964 insertions, 60 deletions
diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index e90a3f01b3..0f26785f16 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,6 +1,33 @@
 2017-12-08  Jan Kratochvil  <jan.kratochvil@redhat.com>
 	    Pedro Alves  <palves@redhat.com>
 
+	* contrib/gdb-add-index.sh (index): Rename to ...
+	(index4): ... here.
+	(index5, debugstr, debugstrmerge, debugstrerr): New variables.
+	Support also .debug_names and .debug_str.
+	* dwarf2read.c: Include cmath, set, list.
+	(INDEX_SUFFIX): Rename to ...
+	(INDEX4_SUFFIX): ... here.
+	(INDEX5_SUFFIX, DEBUG_STR_SUFFIX): New.
+	(file_write(FILE *, const void *, size_t)): New.
+	(file_write(FILE *, const std::vector<Elem, Alloc> &)): New.
+	(data_buf::append_unsigned_leb128, data_buf::empty): New.
+	(data_buf::file_write): Use ::file_write.
+	(data_buf::c_str, dwarf5_djb_hash, debug_names)
+	(check_dwarf64_offsets): New.
+	(psyms_seen_size, write_gdbindex): New from
+	write_psymtabs_to_index code.
+	(dwarf5_gdb_augmentation, write_debug_names, assert_file_size)
+	(enum dw_index_kind): New.
+	(write_psymtabs_to_index): New parameter index_kind.  Support
+	filename_str and out_file_str.  Move code to write_gdbindex,
+	possibly call write_debug_names.
+	(save_gdb_index_command): New parameter -dwarf-5.
+	(_initialize_dwarf2_read): Document the new parameter -dwarf-5.
+
+2017-12-08  Jan Kratochvil  <jan.kratochvil@redhat.com>
+	    Pedro Alves  <palves@redhat.com>
+
 	* contrib/cc-with-tweaks.sh: Change interpreter to bash, incl. initial
 	comment.
 	(GDB_ADD_INDEX): New variable.
diff --git a/gdb/contrib/gdb-add-index.sh b/gdb/contrib/gdb-add-index.sh
index 0cd4ce3c9d..02b17f0fc0 100755
--- a/gdb/contrib/gdb-add-index.sh
+++ b/gdb/contrib/gdb-add-index.sh
@@ -37,11 +37,15 @@ fi
 
 dir="${file%/*}"
 test "$dir" = "$file" && dir="."
-index="${file}.gdb-index"
+index4="${file}.gdb-index"
+index5="${file}.debug_names"
+debugstr="${file}.debug_str"
+debugstrmerge="${file}.debug_str.merge"
+debugstrerr="${file}.debug_str.err"
 
-rm -f $index
+rm -f $index4 $index5 $debugstr $debugstrmerge $debugstrerr
 # Ensure intermediate index file is removed when we exit.
-trap "rm -f $index" 0
+trap "rm -f $index4 $index5 $debugstr $debugstrmerge $debugstrerr" 0
 
 $GDB --batch -nx -iex 'set auto-load no' \
     -ex "file $file" -ex "save gdb-index $dir" || {
@@ -57,9 +61,46 @@ $GDB --batch -nx -iex 'set auto-load no' \
 # already stripped binary, it's a no-op.
 status=0
 
-if test -f "$index"; then
-    $OBJCOPY --add-section .gdb_index="$index" \
-	--set-section-flags .gdb_index=readonly "$file" "$file"
+if test -f "$index4" -a -f "$index5"; then
+    echo "$myname: Both index types were created for $file" 1>&2
+    status=1
+elif test -f "$index4" -o -f "$index5"; then
+    if test -f "$index4"; then
+	index="$index4"
+	section=".gdb_index"
+    else
+	index="$index5"
+	section=".debug_names"
+    fi
+    debugstradd=false
+    debugstrupdate=false
+    if test -s "$debugstr"; then
+	if ! $OBJCOPY --dump-section .debug_str="$debugstrmerge" "$file" /dev/null \
+		 2>$debugstrerr; then
+	    cat >&2 $debugstrerr
+	    exit 1
+	fi
+	if grep -q "can't dump section '.debug_str' - it does not exist" \
+		  $debugstrerr; then
+	    debugstradd=true
+	else
+	    debugstrupdate=true
+	    cat >&2 $debugstrerr
+	fi
+	cat "$debugstr" >>"$debugstrmerge"
+    fi
+
+    $OBJCOPY --add-section $section="$index" \
+	--set-section-flags $section=readonly \
+	$(if $debugstradd; then \
+	      echo --add-section .debug_str="$debugstrmerge"; \
+	      echo --set-section-flags .debug_str=readonly; \
+	  fi; \
+	  if $debugstrupdate; then \
+	      echo --update-section .debug_str="$debugstrmerge"; \
+	  fi) \
+	"$file" "$file"
+
     status=$?
 else
     echo "$myname: No index was created for $file" 1>&2
diff --git a/gdb/doc/ChangeLog b/gdb/doc/ChangeLog
index df294d6874..771064041c 100644
--- a/gdb/doc/ChangeLog
+++ b/gdb/doc/ChangeLog
@@ -1,3 +1,7 @@
+2017-12-08  Jan Kratochvil  <jan.kratochvil@redhat.com>
+
+	* gdb.texinfo (Index Files): Document .debug_names and -dwarf-5.
+
 2017-12-07  Keith Seitz  <keiths@redhat.com>
 
 	* gdb.texinfo (Symbols): Document "set print type nested-type-limit"
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 1e4ef98738..062f01d5ff 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -19565,12 +19565,16 @@ using @command{objcopy}.
 To create an index file, use the @code{save gdb-index} command:
 
 @table @code
-@item save gdb-index @var{directory}
+@item save gdb-index [-dwarf-5] @var{directory}
 @kindex save gdb-index
-Create an index file for each symbol file currently known by
-@value{GDBN}.  Each file is named after its corresponding symbol file,
-with @samp{.gdb-index} appended, and is written into the given
-@var{directory}.
+Create index files for all symbol files currently known by
+@value{GDBN}.  For each known @var{symbol-file}, this command by
+default creates it produces a single file
+@file{@var{symbol-file}.gdb-index}.  If you invoke this command with
+the @option{-dwarf-5} option, it produces 2 files:
+@file{@var{symbol-file}.debug_names} and
+@file{@var{symbol-file}.debug_str}.  The files are created in the
+given @var{directory}.
 @end table
 
 Once you have created an index file you can merge it into your symbol
@@ -19581,6 +19585,16 @@ $ objcopy --add-section .gdb_index=symfile.gdb-index \
     --set-section-flags .gdb_index=readonly symfile symfile
 @end smallexample
 
+Or for @code{-dwarf-5}:
+
+@smallexample
+$ objcopy --dump-section .debug_str=symfile.debug_str.new symfile
+$ cat symfile.debug_str >>symfile.debug_str.new
+$ objcopy --add-section .debug_names=symfile.gdb-index \
+    --set-section-flags .debug_names=readonly \
+    --update-section .debug_str=symfile.debug_str.new symfile symfile
+@end smallexample
+
 @value{GDBN} will normally ignore older versions of @file{.gdb_index}
 sections that have been deprecated.  Usually they are deprecated because
 they are missing a new feature or have performance issues.
diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index 68ddbb1dbd..a9daf1f3bc 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -82,6 +82,12 @@
 #include <unordered_set>
 #include <unordered_map>
 #include "selftest.h"
+#include <cmath>
+#include <set>
+#include <forward_list>
+
+typedef struct symbol *symbolp;
+DEF_VEC_P (symbolp);
 
 /* When == 1, print basic high level tracing messages.
    When > 1, be more verbose.
@@ -2267,7 +2273,9 @@ attr_value_as_address (struct attribute *attr)
 }
 
 /* The suffix for an index file.  */
-#define INDEX_SUFFIX ".gdb-index"
+#define INDEX4_SUFFIX ".gdb-index"
+#define INDEX5_SUFFIX ".debug_names"
+#define DEBUG_STR_SUFFIX ".debug_str"
 
 /* See declaration.  */
 
@@ -24232,6 +24240,25 @@ dwarf2_per_objfile_free (struct objfile *objfile, void *d)
 
 /* The "save gdb-index" command.  */
 
+/* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
+   error checking.  */
+
+static void
+file_write (FILE *file, const void *data, size_t size)
+{
+  if (fwrite (data, 1, size, file) != size)
+    error (_("couldn't data write to file"));
+}
+
+/* Write the contents of VEC to FILE, with error checking.  */
+
+template<typename Elem, typename Alloc>
+static void
+file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
+{
+  file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
+}
+
 /* In-memory buffer to prepare data to be written later to a file.  */
 class data_buf
 {
@@ -24253,6 +24280,21 @@ public:
     std::copy (cstr, cstr + size, grow (size));
   }
 
+  /* Store INPUT as ULEB128 to the end of buffer.  */
+  void append_unsigned_leb128 (ULONGEST input)
+  {
+    for (;;)
+      {
+	gdb_byte output = input & 0x7f;
+	input >>= 7;
+	if (input)
+	  output |= 0x80;
+	append_data (output);
+	if (input == 0)
+	  break;
+      }
+  }
+
   /* Accept a host-format integer in VAL and append it to the buffer
      as a target-format integer which is LEN bytes long.  */
   void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
@@ -24266,11 +24308,16 @@ public:
     return m_vec.size ();
   }
 
+  /* Return true iff the buffer is empty.  */
+  bool empty () const
+  {
+    return m_vec.empty ();
+  }
+
   /* Write the buffer to FILE.  */
   void file_write (FILE *file) const
   {
-    if (::fwrite (m_vec.data (), 1, m_vec.size (), file) != m_vec.size ())
-      error (_("couldn't write data to file"));
+    ::file_write (file, m_vec);
   }
 
 private:
@@ -24419,6 +24466,13 @@ public:
     return strcmp (m_cstr, other.m_cstr) == 0;
   }
 
+  /* Return the underlying C string.  Note, the returned string is
+     only a reference with lifetime of this object.  */
+  const char *c_str () const
+  {
+    return m_cstr;
+  }
+
 private:
   friend class c_str_view_hasher;
   const char *const m_cstr;
@@ -24771,57 +24825,575 @@ recursively_write_psymbols (struct objfile *objfile,
 		  1);
 }
 
-/* Create an index file for OBJFILE in the directory DIR.  */
+/* Symbol name hashing function as specified by DWARF-5.  */
 
-static void
-write_psymtabs_to_index (struct objfile *objfile, const char *dir)
+static uint32_t
+dwarf5_djb_hash (const char *str_)
 {
-  if (dwarf2_per_objfile->using_index)
-    error (_("Cannot use an index to create the index"));
+  const unsigned char *str = (const unsigned char *) str_;
 
-  if (VEC_length (dwarf2_section_info_def, dwarf2_per_objfile->types) > 1)
-    error (_("Cannot make an index when the file has multiple .debug_types sections"));
+  /* Note: tolower here ignores UTF-8, which isn't fully compliant.
+     See http://dwarfstd.org/ShowIssue.php?issue=161027.1.  */
 
-  if (!objfile->psymtabs || !objfile->psymtabs_addrmap)
-    return;
+  uint32_t hash = 5381;
+  while (int c = *str++)
+    hash = hash * 33 + tolower (c);
+  return hash;
+}
 
-  struct stat st;
-  if (stat (objfile_name (objfile), &st) < 0)
-    perror_with_name (objfile_name (objfile));
+/* DWARF-5 .debug_names builder.  */
+class debug_names
+{
+public:
+  debug_names (bool is_dwarf64, bfd_endian dwarf5_byte_order)
+    : m_dwarf5_byte_order (dwarf5_byte_order),
+      m_dwarf32 (dwarf5_byte_order),
+      m_dwarf64 (dwarf5_byte_order),
+      m_dwarf (is_dwarf64
+	       ? static_cast<dwarf &> (m_dwarf64)
+	       : static_cast<dwarf &> (m_dwarf32)),
+      m_name_table_string_offs (m_dwarf.name_table_string_offs),
+      m_name_table_entry_offs (m_dwarf.name_table_entry_offs)
+  {}
 
-  std::string filename (std::string (dir) + SLASH_STRING
-			+ lbasename (objfile_name (objfile)) + INDEX_SUFFIX);
+  /* Insert one symbol.  */
+  void insert (const partial_symbol *psym, int cu_index, bool is_static)
+  {
+    const int dwarf_tag = psymbol_tag (psym);
+    if (dwarf_tag == 0)
+      return;
+    const char *const name = SYMBOL_SEARCH_NAME (psym);
+    const auto insertpair
+      = m_name_to_value_set.emplace (c_str_view (name),
+				     std::set<symbol_value> ());
+    std::set<symbol_value> &value_set = insertpair.first->second;
+    value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static));
+  }
 
-  FILE *out_file = gdb_fopen_cloexec (filename.c_str (), "wb").release ();
-  if (!out_file)
-    error (_("Can't open `%s' for writing"), filename.c_str ());
+  /* Build all the tables.  All symbols must be already inserted.
+     This function does not call file_write, caller has to do it
+     afterwards.  */
+  void build ()
+  {
+    /* Verify the build method has not be called twice.  */
+    gdb_assert (m_abbrev_table.empty ());
+    const size_t name_count = m_name_to_value_set.size ();
+    m_bucket_table.resize
+      (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
+    m_hash_table.reserve (name_count);
+    m_name_table_string_offs.reserve (name_count);
+    m_name_table_entry_offs.reserve (name_count);
+
+    /* Map each hash of symbol to its name and value.  */
+    struct hash_it_pair
+    {
+      uint32_t hash;
+      decltype (m_name_to_value_set)::const_iterator it;
+    };
+    std::vector<std::forward_list<hash_it_pair>> bucket_hash;
+    bucket_hash.resize (m_bucket_table.size ());
+    for (decltype (m_name_to_value_set)::const_iterator it
+	   = m_name_to_value_set.cbegin ();
+	 it != m_name_to_value_set.cend ();
+	 ++it)
+      {
+	const char *const name = it->first.c_str ();
+	const uint32_t hash = dwarf5_djb_hash (name);
+	hash_it_pair hashitpair;
+	hashitpair.hash = hash;
+	hashitpair.it = it;
+	auto &slot = bucket_hash[hash % bucket_hash.size()];
+	slot.push_front (std::move (hashitpair));
+      }
+    for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
+      {
+	const std::forward_list<hash_it_pair> &hashitlist
+	  = bucket_hash[bucket_ix];
+	if (hashitlist.empty ())
+	  continue;
+	uint32_t &bucket_slot = m_bucket_table[bucket_ix];
+	/* The hashes array is indexed starting at 1.  */
+	store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
+				sizeof (bucket_slot), m_dwarf5_byte_order,
+				m_hash_table.size () + 1);
+	for (const hash_it_pair &hashitpair : hashitlist)
+	  {
+	    m_hash_table.push_back (0);
+	    store_unsigned_integer (reinterpret_cast<gdb_byte *>
+							(&m_hash_table.back ()),
+				    sizeof (m_hash_table.back ()),
+				    m_dwarf5_byte_order, hashitpair.hash);
+	    const c_str_view &name = hashitpair.it->first;
+	    const std::set<symbol_value> &value_set = hashitpair.it->second;
+	    m_name_table_string_offs.push_back_reorder
+	      (m_debugstrlookup.lookup (name.c_str ()));
+	    m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
+	    gdb_assert (!value_set.empty ());
+	    for (const symbol_value &value : value_set)
+	      {
+		int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
+							value.is_static)];
+		if (idx == 0)
+		  {
+		    idx = m_idx_next++;
+		    m_abbrev_table.append_unsigned_leb128 (idx);
+		    m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
+		    m_abbrev_table.append_unsigned_leb128 (DW_IDX_compile_unit);
+		    m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
+		    m_abbrev_table.append_unsigned_leb128 (value.is_static
+							   ? DW_IDX_GNU_internal
+							   : DW_IDX_GNU_external);
+		    m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
+
+		    /* Terminate attributes list.  */
+		    m_abbrev_table.append_unsigned_leb128 (0);
+		    m_abbrev_table.append_unsigned_leb128 (0);
+		  }
 
-  /* Order matters here; we want FILE to be closed before FILENAME is
-     unlinked, because on MS-Windows one cannot delete a file that is
-     still open.  (Don't call anything here that might throw until
-     file_closer is created.)  */
-  gdb::unlinker unlink_file (filename.c_str ());
-  gdb_file_up close_out_file (out_file);
+		m_entry_pool.append_unsigned_leb128 (idx);
+		m_entry_pool.append_unsigned_leb128 (value.cu_index);
+	      }
 
-  mapped_symtab symtab;
-  data_buf cu_list;
+	    /* Terminate the list of CUs.  */
+	    m_entry_pool.append_unsigned_leb128 (0);
+	  }
+      }
+    gdb_assert (m_hash_table.size () == name_count);
 
-  /* While we're scanning CU's create a table that maps a psymtab pointer
-     (which is what addrmap records) to its index (which is what is recorded
-     in the index file).  This will later be needed to write the address
-     table.  */
-  psym_index_map cu_index_htab;
-  cu_index_htab.reserve (dwarf2_per_objfile->n_comp_units);
+    /* Terminate tags list.  */
+    m_abbrev_table.append_unsigned_leb128 (0);
+  }
 
-  /* The CU list is already sorted, so we don't need to do additional
-     work here.  Also, the debug_types entries do not appear in
-     all_comp_units, but only in their own hash table.  */
+  /* Return .debug_names bucket count.  This must be called only after
+     calling the build method.  */
+  uint32_t bucket_count () const
+  {
+    /* Verify the build method has been already called.  */
+    gdb_assert (!m_abbrev_table.empty ());
+    const uint32_t retval = m_bucket_table.size ();
+
+    /* Check for overflow.  */
+    gdb_assert (retval == m_bucket_table.size ());
+    return retval;
+  }
+
+  /* Return .debug_names names count.  This must be called only after
+     calling the build method.  */
+  uint32_t name_count () const
+  {
+    /* Verify the build method has been already called.  */
+    gdb_assert (!m_abbrev_table.empty ());
+    const uint32_t retval = m_hash_table.size ();
+
+    /* Check for overflow.  */
+    gdb_assert (retval == m_hash_table.size ());
+    return retval;
+  }
+
+  /* Return number of bytes of .debug_names abbreviation table.  This
+     must be called only after calling the build method.  */
+  uint32_t abbrev_table_bytes () const
+  {
+    gdb_assert (!m_abbrev_table.empty ());
+    return m_abbrev_table.size ();
+  }
+
+  /* Recurse into all "included" dependencies and store their symbols
+     as if they appeared in this psymtab.  */
+  void recursively_write_psymbols
+    (struct objfile *objfile,
+     struct partial_symtab *psymtab,
+     std::unordered_set<partial_symbol *> &psyms_seen,
+     int cu_index)
+  {
+    for (int i = 0; i < psymtab->number_of_dependencies; ++i)
+      if (psymtab->dependencies[i]->user != NULL)
+	recursively_write_psymbols (objfile, psymtab->dependencies[i],
+				    psyms_seen, cu_index);
+
+    write_psymbols (psyms_seen,
+		    &objfile->global_psymbols[psymtab->globals_offset],
+		    psymtab->n_global_syms, cu_index, false);
+    write_psymbols (psyms_seen,
+		    &objfile->static_psymbols[psymtab->statics_offset],
+		    psymtab->n_static_syms, cu_index, true);
+  }
+
+  /* Return number of bytes the .debug_names section will have.  This
+     must be called only after calling the build method.  */
+  size_t bytes () const
+  {
+    /* Verify the build method has been already called.  */
+    gdb_assert (!m_abbrev_table.empty ());
+    size_t expected_bytes = 0;
+    expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
+    expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
+    expected_bytes += m_name_table_string_offs.bytes ();
+    expected_bytes += m_name_table_entry_offs.bytes ();
+    expected_bytes += m_abbrev_table.size ();
+    expected_bytes += m_entry_pool.size ();
+    return expected_bytes;
+  }
+
+  /* Write .debug_names to FILE_NAMES and .debug_str addition to
+     FILE_STR.  This must be called only after calling the build
+     method.  */
+  void file_write (FILE *file_names, FILE *file_str) const
+  {
+    /* Verify the build method has been already called.  */
+    gdb_assert (!m_abbrev_table.empty ());
+    ::file_write (file_names, m_bucket_table);
+    ::file_write (file_names, m_hash_table);
+    m_name_table_string_offs.file_write (file_names);
+    m_name_table_entry_offs.file_write (file_names);
+    m_abbrev_table.file_write (file_names);
+    m_entry_pool.file_write (file_names);
+    m_debugstrlookup.file_write (file_str);
+  }
+
+private:
+
+  /* Storage for symbol names mapping them to their .debug_str section
+     offsets.  */
+  class debug_str_lookup
+  {
+  public:
+
+    /* Object costructor to be called for current DWARF2_PER_OBJFILE.
+       All .debug_str section strings are automatically stored.  */
+    debug_str_lookup ()
+      : m_abfd (dwarf2_per_objfile->objfile->obfd)
+    {
+      dwarf2_read_section (dwarf2_per_objfile->objfile,
+			   &dwarf2_per_objfile->str);
+      if (dwarf2_per_objfile->str.buffer == NULL)
+	return;
+      for (const gdb_byte *data = dwarf2_per_objfile->str.buffer;
+	   data < (dwarf2_per_objfile->str.buffer
+		   + dwarf2_per_objfile->str.size);)
+	{
+	  const char *const s = reinterpret_cast<const char *> (data);
+	  const auto insertpair
+	    = m_str_table.emplace (c_str_view (s),
+				   data - dwarf2_per_objfile->str.buffer);
+	  if (!insertpair.second)
+	    complaint (&symfile_complaints,
+		       _("Duplicate string \"%s\" in "
+			 ".debug_str section [in module %s]"),
+		       s, bfd_get_filename (m_abfd));
+	  data += strlen (s) + 1;
+	}
+    }
+
+    /* Return offset of symbol name S in the .debug_str section.  Add
+       such symbol to the section's end if it does not exist there
+       yet.  */
+    size_t lookup (const char *s)
+    {
+      const auto it = m_str_table.find (c_str_view (s));
+      if (it != m_str_table.end ())
+	return it->second;
+      const size_t offset = (dwarf2_per_objfile->str.size
+			     + m_str_add_buf.size ());
+      m_str_table.emplace (c_str_view (s), offset);
+      m_str_add_buf.append_cstr0 (s);
+      return offset;
+    }
+
+    /* Append the end of the .debug_str section to FILE.  */
+    void file_write (FILE *file) const
+    {
+      m_str_add_buf.file_write (file);
+    }
+
+  private:
+    std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
+    bfd *const m_abfd;
+
+    /* Data to add at the end of .debug_str for new needed symbol names.  */
+    data_buf m_str_add_buf;
+  };
+
+  /* Container to map used DWARF tags to their .debug_names abbreviation
+     tags.  */
+  class index_key
+  {
+  public:
+    index_key (int dwarf_tag_, bool is_static_)
+      : dwarf_tag (dwarf_tag_), is_static (is_static_)
+    {
+    }
+
+    bool
+    operator== (const index_key &other) const
+    {
+      return dwarf_tag == other.dwarf_tag && is_static == other.is_static;
+    }
+
+    const int dwarf_tag;
+    const bool is_static;
+  };
+
+  /* Provide std::unordered_map::hasher for index_key.  */
+  class index_key_hasher
+  {
+  public:
+    size_t
+    operator () (const index_key &key) const
+    {
+      return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
+    }
+  };
+
+  /* Parameters of one symbol entry.  */
+  class symbol_value
+  {
+  public:
+    const int dwarf_tag, cu_index;
+    const bool is_static;
+
+    symbol_value (int dwarf_tag_, int cu_index_, bool is_static_)
+      : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_)
+    {}
+
+    bool
+    operator< (const symbol_value &other) const
+    {
+#define X(n) \
+  do \
+    { \
+      if (n < other.n) \
+	return true; \
+      if (n > other.n) \
+	return false; \
+    } \
+  while (0)
+      X (dwarf_tag);
+      X (is_static);
+      X (cu_index);
+#undef X
+      return false;
+    }
+  };
+
+  /* Abstract base class to unify DWARF-32 and DWARF-64 name table
+     output.  */
+  class offset_vec
+  {
+  protected:
+    const bfd_endian dwarf5_byte_order;
+  public:
+    explicit offset_vec (bfd_endian dwarf5_byte_order_)
+      : dwarf5_byte_order (dwarf5_byte_order_)
+    {}
+
+    /* Call std::vector::reserve for NELEM elements.  */
+    virtual void reserve (size_t nelem) = 0;
+
+    /* Call std::vector::push_back with store_unsigned_integer byte
+       reordering for ELEM.  */
+    virtual void push_back_reorder (size_t elem) = 0;
+
+    /* Return expected output size in bytes.  */
+    virtual size_t bytes () const = 0;
+
+    /* Write name table to FILE.  */
+    virtual void file_write (FILE *file) const = 0;
+  };
+
+  /* Template to unify DWARF-32 and DWARF-64 output.  */
+  template<typename OffsetSize>
+  class offset_vec_tmpl : public offset_vec
+  {
+  public:
+    explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
+      : offset_vec (dwarf5_byte_order_)
+    {}
+
+    /* Implement offset_vec::reserve.  */
+    void reserve (size_t nelem) override
+    {
+      m_vec.reserve (nelem);
+    }
+
+    /* Implement offset_vec::push_back_reorder.  */
+    void push_back_reorder (size_t elem) override
+    {
+      m_vec.push_back (elem);
+      /* Check for overflow.  */
+      gdb_assert (m_vec.back () == elem);
+      store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
+			      sizeof (m_vec.back ()), dwarf5_byte_order, elem);
+    }
+
+    /* Implement offset_vec::bytes.  */
+    size_t bytes () const override
+    {
+      return m_vec.size () * sizeof (m_vec[0]);
+    }
+
+    /* Implement offset_vec::file_write.  */
+    void file_write (FILE *file) const override
+    {
+      ::file_write (file, m_vec);
+    }
+
+  private:
+    std::vector<OffsetSize> m_vec;
+  };
+
+  /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
+     respecting name table width.  */
+  class dwarf
+  {
+  public:
+    offset_vec &name_table_string_offs, &name_table_entry_offs;
+
+    dwarf (offset_vec &name_table_string_offs_,
+	   offset_vec &name_table_entry_offs_)
+      : name_table_string_offs (name_table_string_offs_),
+	name_table_entry_offs (name_table_entry_offs_)
+    {
+    }
+  };
 
-  /* The psyms_seen set is potentially going to be largish (~40k
-     elements when indexing a -g3 build of GDB itself).  Estimate the
-     number of elements in order to avoid too many rehashes, which
-     require rebuilding buckets and thus many trips to
-     malloc/free.  */
+  /* Template to unify DWARF-32 and DWARF-64 .debug_names output
+     respecting name table width.  */
+  template<typename OffsetSize>
+  class dwarf_tmpl : public dwarf
+  {
+  public:
+    explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
+      : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
+	m_name_table_string_offs (dwarf5_byte_order_),
+	m_name_table_entry_offs (dwarf5_byte_order_)
+    {}
+
+  private:
+    offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
+    offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
+  };
+
+  /* Try to reconstruct original DWARF tag for given partial_symbol.
+     This function is not DWARF-5 compliant but it is sufficient for
+     GDB as a DWARF-5 index consumer.  */
+  static int psymbol_tag (const struct partial_symbol *psym)
+  {
+    domain_enum domain = PSYMBOL_DOMAIN (psym);
+    enum address_class aclass = PSYMBOL_CLASS (psym);
+
+    switch (domain)
+      {
+      case VAR_DOMAIN:
+	switch (aclass)
+	  {
+	  case LOC_BLOCK:
+	    return DW_TAG_subprogram;
+	  case LOC_TYPEDEF:
+	    return DW_TAG_typedef;
+	  case LOC_COMPUTED:
+	  case LOC_CONST_BYTES:
+	  case LOC_OPTIMIZED_OUT:
+	  case LOC_STATIC:
+	    return DW_TAG_variable;
+	  case LOC_CONST:
+	    /* Note: It's currently impossible to recognize psyms as enum values
+	       short of reading the type info.  For now punt.  */
+	    return DW_TAG_variable;
+	  default:
+	    /* There are other LOC_FOO values that one might want to classify
+	       as variables, but dwarf2read.c doesn't currently use them.  */
+	    return DW_TAG_variable;
+	  }
+      case STRUCT_DOMAIN:
+	return DW_TAG_structure_type;
+      default:
+	return 0;
+      }
+  }
+
+  /* Call insert for all partial symbols and mark them in PSYMS_SEEN.  */
+  void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
+		       struct partial_symbol **psymp, int count, int cu_index,
+		       bool is_static)
+  {
+    for (; count-- > 0; ++psymp)
+      {
+	struct partial_symbol *psym = *psymp;
+
+	if (SYMBOL_LANGUAGE (psym) == language_ada)
+	  error (_("Ada is not currently supported by the index"));
+
+	/* Only add a given psymbol once.  */
+	if (psyms_seen.insert (psym).second)
+	  insert (psym, cu_index, is_static);
+      }
+  }
+
+  /* Store value of each symbol.  */
+  std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
+    m_name_to_value_set;
+
+  /* Tables of DWARF-5 .debug_names.  They are in object file byte
+     order.  */
+  std::vector<uint32_t> m_bucket_table;
+  std::vector<uint32_t> m_hash_table;
+
+  const bfd_endian m_dwarf5_byte_order;
+  dwarf_tmpl<uint32_t> m_dwarf32;
+  dwarf_tmpl<uint64_t> m_dwarf64;
+  dwarf &m_dwarf;
+  offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
+  debug_str_lookup m_debugstrlookup;
+
+  /* Map each used .debug_names abbreviation tag parameter to its
+     index value.  */
+  std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
+
+  /* Next unused .debug_names abbreviation tag for
+     m_indexkey_to_idx.  */
+  int m_idx_next = 1;
+
+  /* .debug_names abbreviation table.  */
+  data_buf m_abbrev_table;
+
+  /* .debug_names entry pool.  */
+  data_buf m_entry_pool;
+};
+
+/* Return iff any of the needed offsets does not fit into 32-bit
+   .debug_names section.  */
+
+static bool
+check_dwarf64_offsets ()
+{
+  for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
+    {
+      const dwarf2_per_cu_data &per_cu = *dwarf2_per_objfile->all_comp_units[i];
+
+      if (to_underlying (per_cu.sect_off) >= (static_cast<uint64_t> (1) << 32))
+	return true;
+    }
+  for (int i = 0; i < dwarf2_per_objfile->n_type_units; ++i)
+    {
+      const signatured_type &sigtype = *dwarf2_per_objfile->all_type_units[i];
+      const dwarf2_per_cu_data &per_cu = sigtype.per_cu;
+
+      if (to_underlying (per_cu.sect_off) >= (static_cast<uint64_t> (1) << 32))
+	return true;
+    }
+  return false;
+}
+
+/* The psyms_seen set is potentially going to be largish (~40k
+   elements when indexing a -g3 build of GDB itself).  Estimate the
+   number of elements in order to avoid too many rehashes, which
+   require rebuilding buckets and thus many trips to
+   malloc/free.  */
+
+static size_t
+psyms_seen_size ()
+{
   size_t psyms_count = 0;
   for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
     {
@@ -24834,7 +25406,30 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
     }
   /* Generating an index for gdb itself shows a ratio of
      TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5.  4 seems like a good bet.  */
-  std::unordered_set<partial_symbol *> psyms_seen (psyms_count / 4);
+  return psyms_count / 4;
+}
+
+/* Write new .gdb_index section for OBJFILE into OUT_FILE.
+   Return how many bytes were expected to be written into OUT_FILE.  */
+
+static size_t
+write_gdbindex (struct objfile *objfile, FILE *out_file)
+{
+  mapped_symtab symtab;
+  data_buf cu_list;
+
+  /* While we're scanning CU's create a table that maps a psymtab pointer
+     (which is what addrmap records) to its index (which is what is recorded
+     in the index file).  This will later be needed to write the address
+     table.  */
+  psym_index_map cu_index_htab;
+  cu_index_htab.reserve (dwarf2_per_objfile->n_comp_units);
+
+  /* The CU list is already sorted, so we don't need to do additional
+     work here.  Also, the debug_types entries do not appear in
+     all_comp_units, but only in their own hash table.  */
+
+  std::unordered_set<partial_symbol *> psyms_seen (psyms_seen_size ());
   for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
     {
       struct dwarf2_per_cu_data *per_cu
@@ -24920,22 +25515,241 @@ write_psymtabs_to_index (struct objfile *objfile, const char *dir)
   symtab_vec.file_write (out_file);
   constant_pool.file_write (out_file);
 
+  return total_len;
+}
+
+/* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension.  */
+static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
+
+/* Write a new .debug_names section for OBJFILE into OUT_FILE, write
+   needed addition to .debug_str section to OUT_FILE_STR.  Return how
+   many bytes were expected to be written into OUT_FILE.  */
+
+static size_t
+write_debug_names (struct objfile *objfile, FILE *out_file, FILE *out_file_str)
+{
+  const bool dwarf5_is_dwarf64 = check_dwarf64_offsets ();
+  const int dwarf5_offset_size = dwarf5_is_dwarf64 ? 8 : 4;
+  const enum bfd_endian dwarf5_byte_order
+    = gdbarch_byte_order (get_objfile_arch (objfile));
+
+  /* The CU list is already sorted, so we don't need to do additional
+     work here.  Also, the debug_types entries do not appear in
+     all_comp_units, but only in their own hash table.  */
+  data_buf cu_list;
+  debug_names nametable (dwarf5_is_dwarf64, dwarf5_byte_order);
+  std::unordered_set<partial_symbol *> psyms_seen (psyms_seen_size ());
+  for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
+    {
+      const dwarf2_per_cu_data *per_cu = dwarf2_per_objfile->all_comp_units[i];
+      partial_symtab *psymtab = per_cu->v.psymtab;
+
+      /* CU of a shared file from 'dwz -m' may be unused by this main
+	 file.  It may be referenced from a local scope but in such
+	 case it does not need to be present in .debug_names.  */
+      if (psymtab == NULL)
+	continue;
+
+      if (psymtab->user == NULL)
+	nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen, i);
+
+      cu_list.append_uint (dwarf5_offset_size, dwarf5_byte_order,
+			   to_underlying (per_cu->sect_off));
+    }
+  nametable.build ();
+
+  /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC.  */
+
+  data_buf types_cu_list;
+  for (int i = 0; i < dwarf2_per_objfile->n_type_units; ++i)
+    {
+      const signatured_type &sigtype = *dwarf2_per_objfile->all_type_units[i];
+      const dwarf2_per_cu_data &per_cu = sigtype.per_cu;
+
+      types_cu_list.append_uint (dwarf5_offset_size, dwarf5_byte_order,
+				 to_underlying (per_cu.sect_off));
+    }
+
+  const offset_type bytes_of_header
+    = ((dwarf5_is_dwarf64 ? 12 : 4)
+       + 2 + 2 + 7 * 4
+       + sizeof (dwarf5_gdb_augmentation));
+  size_t expected_bytes = 0;
+  expected_bytes += bytes_of_header;
+  expected_bytes += cu_list.size ();
+  expected_bytes += types_cu_list.size ();
+  expected_bytes += nametable.bytes ();
+  data_buf header;
+
+  if (!dwarf5_is_dwarf64)
+    {
+      const uint64_t size64 = expected_bytes - 4;
+      gdb_assert (size64 < 0xfffffff0);
+      header.append_uint (4, dwarf5_byte_order, size64);
+    }
+  else
+    {
+      header.append_uint (4, dwarf5_byte_order, 0xffffffff);
+      header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
+    }
+
+  /* The version number.  */
+  header.append_uint (2, dwarf5_byte_order, 5);
+
+  /* Padding.  */
+  header.append_uint (2, dwarf5_byte_order, 0);
+
+  /* comp_unit_count - The number of CUs in the CU list.  */
+  header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_comp_units);
+
+  /* local_type_unit_count - The number of TUs in the local TU
+     list.  */
+  header.append_uint (4, dwarf5_byte_order, dwarf2_per_objfile->n_type_units);
+
+  /* foreign_type_unit_count - The number of TUs in the foreign TU
+     list.  */
+  header.append_uint (4, dwarf5_byte_order, 0);
+
+  /* bucket_count - The number of hash buckets in the hash lookup
+     table.  */
+  header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
+
+  /* name_count - The number of unique names in the index.  */
+  header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
+
+  /* abbrev_table_size - The size in bytes of the abbreviations
+     table.  */
+  header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
+
+  /* augmentation_string_size - The size in bytes of the augmentation
+     string.  This value is rounded up to a multiple of 4.  */
+  static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
+  header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
+  header.append_data (dwarf5_gdb_augmentation);
+
+  gdb_assert (header.size () == bytes_of_header);
+
+  header.file_write (out_file);
+  cu_list.file_write (out_file);
+  types_cu_list.file_write (out_file);
+  nametable.file_write (out_file, out_file_str);
+
+  return expected_bytes;
+}
+
+/* Assert that FILE's size is EXPECTED_SIZE.  Assumes file's seek
+   position is at the end of the file.  */
+
+static void
+assert_file_size (FILE *file, const char *filename, size_t expected_size)
+{
+  const auto file_size = ftell (file);
+  if (file_size == -1)
+    error (_("Can't get `%s' size"), filename);
+  gdb_assert (file_size == expected_size);
+}
+
+/* An index variant.  */
+enum dw_index_kind
+{
+  /* GDB's own .gdb_index format.   */
+  GDB_INDEX,
+
+  /* DWARF5 .debug_names.  */
+  DEBUG_NAMES,
+};
+
+/* Create an index file for OBJFILE in the directory DIR.  */
+
+static void
+write_psymtabs_to_index (struct objfile *objfile, const char *dir,
+			 dw_index_kind index_kind)
+{
+  if (dwarf2_per_objfile->using_index)
+    error (_("Cannot use an index to create the index"));
+
+  if (VEC_length (dwarf2_section_info_def, dwarf2_per_objfile->types) > 1)
+    error (_("Cannot make an index when the file has multiple .debug_types sections"));
+
+  if (!objfile->psymtabs || !objfile->psymtabs_addrmap)
+    return;
+
+  struct stat st;
+  if (stat (objfile_name (objfile), &st) < 0)
+    perror_with_name (objfile_name (objfile));
+
+  std::string filename (std::string (dir) + SLASH_STRING
+			+ lbasename (objfile_name (objfile))
+			+ (index_kind == dw_index_kind::DEBUG_NAMES
+			   ? INDEX5_SUFFIX : INDEX4_SUFFIX));
+
+  FILE *out_file = gdb_fopen_cloexec (filename.c_str (), "wb").release ();
+  if (!out_file)
+    error (_("Can't open `%s' for writing"), filename.c_str ());
+
+  /* Order matters here; we want FILE to be closed before FILENAME is
+     unlinked, because on MS-Windows one cannot delete a file that is
+     still open.  (Don't call anything here that might throw until
+     file_closer is created.)  */
+  gdb::unlinker unlink_file (filename.c_str ());
+  gdb_file_up close_out_file (out_file);
+
+  if (index_kind == dw_index_kind::DEBUG_NAMES)
+    {
+      std::string filename_str (std::string (dir) + SLASH_STRING
+				+ lbasename (objfile_name (objfile))
+				+ DEBUG_STR_SUFFIX);
+      FILE *out_file_str
+	= gdb_fopen_cloexec (filename_str.c_str (), "wb").release ();
+      if (!out_file_str)
+	error (_("Can't open `%s' for writing"), filename_str.c_str ());
+      gdb::unlinker unlink_file_str (filename_str.c_str ());
+      gdb_file_up close_out_file_str (out_file_str);
+
+      const size_t total_len
+	= write_debug_names (objfile, out_file, out_file_str);
+      assert_file_size (out_file, filename.c_str (), total_len);
+
+      /* We want to keep the file .debug_str file too.  */
+      unlink_file_str.keep ();
+    }
+  else
+    {
+      const size_t total_len
+	= write_gdbindex (objfile, out_file);
+      assert_file_size (out_file, filename.c_str (), total_len);
+    }
+
   /* We want to keep the file.  */
   unlink_file.keep ();
 }
 
 /* Implementation of the `save gdb-index' command.
    
-   Note that the file format used by this command is documented in the
-   GDB manual.  Any changes here must be documented there.  */
+   Note that the .gdb_index file format used by this command is
+   documented in the GDB manual.  Any changes here must be documented
+   there.  */
 
 static void
 save_gdb_index_command (const char *arg, int from_tty)
 {
   struct objfile *objfile;
+  const char dwarf5space[] = "-dwarf-5 ";
+  dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
+
+  if (!arg)
+    arg = "";
+
+  arg = skip_spaces (arg);
+  if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
+    {
+      index_kind = dw_index_kind::DEBUG_NAMES;
+      arg += strlen (dwarf5space);
+      arg = skip_spaces (arg);
+    }
 
-  if (!arg || !*arg)
-    error (_("usage: save gdb-index DIRECTORY"));
+  if (!*arg)
+    error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
 
   ALL_OBJFILES (objfile)
   {
@@ -24953,7 +25767,7 @@ save_gdb_index_command (const char *arg, int from_tty)
 
 	TRY
 	  {
-	    write_psymtabs_to_index (objfile, arg);
+	    write_psymtabs_to_index (objfile, arg, index_kind);
 	  }
 	CATCH (except, RETURN_MASK_ERROR)
 	  {
@@ -25085,7 +25899,11 @@ Warning: This option must be enabled before gdb reads the file."),
   c = add_cmd ("gdb-index", class_files, save_gdb_index_command,
 	       _("\
 Save a gdb-index file.\n\
-Usage: save gdb-index DIRECTORY"),
+Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
+\n\
+No options create one file with .gdb-index extension for pre-DWARF-5\n\
+compatible .gdb_index section.  With -dwarf-5 creates two files with\n\
+extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
 	       &save_cmdlist);
   set_cmd_completer (c, filename_completer);
author	Jan Kratochvil <jan.kratochvil@redhat.com>	2017-12-08 22:44:10 +0000
committer	Pedro Alves <palves@redhat.com>	2017-12-08 23:33:27 +0000
commit	437afbb81e3a04cbd933fc534a50c686eaa63b19 (patch)
tree	9389ec170eff92880790a69075e48eccb3afdccc /gdb
parent	6432ec65a8822246db5bcede0c6bd3ed0e464a0b (diff)