summaryrefslogtreecommitdiff
path: root/gcc/input.c
diff options
context:
space:
mode:
authorLewis Hyatt <lhyatt@gmail.com>2019-12-09 20:03:47 +0000
committerDavid Malcolm <dmalcolm@gcc.gnu.org>2019-12-09 20:03:47 +0000
commitee9256409f21eab5df5076e46d220d6a0b995f79 (patch)
tree68053762905d3e64e86dc0db19b7d1f3d65b5ba8 /gcc/input.c
parent763c9f4a8544318998c7adf04e4c92e9a4b85614 (diff)
Byte vs column awareness for diagnostic-show-locus.c (PR 49973)
contrib/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * unicode/from_glibc/unicode_utils.py: Support script from glibc (commit 464cd3) to extract character widths from Unicode data files. * unicode/from_glibc/utf8_gen.py: Likewise. * unicode/UnicodeData.txt: Unicode v. 12.1.0 data file. * unicode/EastAsianWidth.txt: Likewise. * unicode/PropList.txt: Likewise. * unicode/gen_wcwidth.py: New utility to generate libcpp/generated_cpp_wcwidth.h with help from the glibc support scripts and the Unicode data files. * unicode/unicode-license.txt: Added. * unicode/README: New explanatory file. libcpp/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * generated_cpp_wcwidth.h: New file generated by ../contrib/unicode/gen_wcwidth.py, supports new cpp_wcwidth function. * charset.c (compute_next_display_width): New function to help implement display columns. (cpp_byte_column_to_display_column): Likewise. (cpp_display_column_to_byte_column): Likewise. (cpp_wcwidth): Likewise. * include/cpplib.h (cpp_byte_column_to_display_column): Declare. (cpp_display_column_to_byte_column): Declare. (cpp_wcwidth): Declare. (cpp_display_width): New function. gcc/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * input.c (location_compute_display_column): New function to help with multibyte awareness in diagnostics. (test_cpp_utf8): New self-test. (input_c_tests): Call the new test. * input.h (location_compute_display_column): Declare. * diagnostic-show-locus.c: Pervasive changes to add multibyte awareness to all classes and functions. (enum column_unit): New enum. (class exploc_with_display_col): New class. (class layout_point): Convert m_column member to array m_columns[2]. (layout_range::contains_point): Add col_unit argument. (test_layout_range_for_single_point): Pass new argument. (test_layout_range_for_single_line): Likewise. (test_layout_range_for_multiple_lines): Likewise. (line_bounds::convert_to_display_cols): New function. (layout::get_state_at_point): Add col_unit argument. (make_range): Use empty filename rather than dummy filename. (get_line_width_without_trailing_whitespace): Rename to... (get_line_bytes_without_trailing_whitespace): ...this. (test_get_line_width_without_trailing_whitespace): Rename to... (test_get_line_bytes_without_trailing_whitespace): ...this. (class layout): m_exploc changed to exploc_with_display_col from plain expanded_location. (layout::get_linenum_width): New accessor member function. (layout::get_x_offset_display): Likewise. (layout::calculate_linenum_width): New subroutine for the constuctor. (layout::calculate_x_offset_display): Likewise. (layout::layout): Use the new subroutines. Add multibyte awareness. (layout::print_source_line): Add multibyte awareness. (layout::print_line): Likewise. (layout::print_annotation_line): Likewise. (line_label::line_label): Likewise. (layout::print_any_labels): Likewise. (layout::annotation_line_showed_range_p): Likewise. (get_printed_columns): Likewise. (class line_label): Rename m_length to m_display_width. (get_affected_columns): Rename to... (get_affected_range): ...this; add col_unit argument and multibyte awareness. (class correction): Add m_affected_bytes and m_display_cols members. Rename m_len to m_byte_length for clarity. Add multibyte awareness throughout. (correction::insertion_p): Add multibyte awareness. (correction::compute_display_cols): New function. (correction::ensure_terminated): Use new member name m_byte_length. (line_corrections::add_hint): Add multibyte awareness. (layout::print_trailing_fixits): Likewise. (layout::get_x_bound_for_row): Likewise. (test_one_liner_simple_caret_utf8): New self-test analogous to the one with _utf8 suffix removed, testing multibyte awareness. (test_one_liner_caret_and_range_utf8): Likewise. (test_one_liner_multiple_carets_and_ranges_utf8): Likewise. (test_one_liner_fixit_insert_before_utf8): Likewise. (test_one_liner_fixit_insert_after_utf8): Likewise. (test_one_liner_fixit_remove_utf8): Likewise. (test_one_liner_fixit_replace_utf8): Likewise. (test_one_liner_fixit_replace_non_equal_range_utf8): Likewise. (test_one_liner_fixit_replace_equal_secondary_range_utf8): Likewise. (test_one_liner_fixit_validation_adhoc_locations_utf8): Likewise. (test_one_liner_many_fixits_1_utf8): Likewise. (test_one_liner_many_fixits_2_utf8): Likewise. (test_one_liner_labels_utf8): Likewise. (test_diagnostic_show_locus_one_liner_utf8): Likewise. (test_overlapped_fixit_printing_utf8): Likewise. (test_overlapped_fixit_printing): Adapt for changes to get_affected_columns, get_printed_columns and class corrections. (test_overlapped_fixit_printing_2): Likewise. (test_linenum_sep): New constant. (test_left_margin): Likewise. (test_offset_impl): Helper function for new test. (test_layout_x_offset_display_utf8): New test. (diagnostic_show_locus_c_tests): Call new tests. gcc/testsuite/ChangeLog: 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * gcc.dg/plugin/diagnostic_plugin_test_show_locus.c (test_show_locus): Tweak so that expected output is the same as before the diagnostic-show-locus.c changes. * gcc.dg/cpp/pr66415-1.c: Likewise. From-SVN: r279137
Diffstat (limited to 'gcc/input.c')
-rw-r--r--gcc/input.c105
1 files changed, 105 insertions, 0 deletions
diff --git a/gcc/input.c b/gcc/input.c
index 00301ef68dd..1dc6b339afe 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -908,6 +908,22 @@ make_location (location_t caret, source_range src_range)
return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
}
+/* An expanded_location stores the column in byte units. This function
+ converts that column to display units. That requires reading the associated
+ source line in order to calculate the display width. If that cannot be done
+ for any reason, then returns the byte column as a fallback. */
+int
+location_compute_display_column (expanded_location exploc)
+{
+ if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
+ return exploc.column;
+ char_span line = location_get_source_line (exploc.file, exploc.line);
+ /* If line is NULL, this function returns exploc.column which is the
+ desired fallback. */
+ return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
+ exploc.column);
+}
+
/* Dump statistics to stderr about the memory usage of the line_table
set of line maps. This also displays some statistics about macro
expansion. */
@@ -3590,6 +3606,93 @@ test_line_offset_overflow ()
ASSERT_NE (ordmap_a, ordmap_b);
}
+void test_cpp_utf8 ()
+{
+ /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
+ {
+ int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8);
+ ASSERT_EQ (8, w_bad);
+ int w_ctrl = cpp_display_width ("\r\t\n\v\0\1", 6);
+ ASSERT_EQ (6, w_ctrl);
+ }
+
+ /* Verify that wcwidth of valid UTF-8 is as expected. */
+ {
+ const int w_pi = cpp_display_width ("\xcf\x80", 2);
+ ASSERT_EQ (1, w_pi);
+ const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4);
+ ASSERT_EQ (2, w_emoji);
+ const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2);
+ ASSERT_EQ (1, w_umlaut_precomposed);
+ const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3);
+ ASSERT_EQ (1, w_umlaut_combining);
+ const int w_han = cpp_display_width ("\xe4\xb8\xba", 3);
+ ASSERT_EQ (2, w_han);
+ const int w_ascii = cpp_display_width ("GCC", 3);
+ ASSERT_EQ (3, w_ascii);
+ const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
+ "\x9f! \xe4\xb8\xba y\xcc\x88", 24);
+ ASSERT_EQ (18, w_mixed);
+ }
+
+ /* Verify that cpp_byte_column_to_display_column can go past the end,
+ and similar edge cases. */
+ {
+ const char *str
+ /* Display columns.
+ 111111112345 */
+ = "\xcf\x80 abc";
+ /* 111122223456
+ Byte columns. */
+
+ ASSERT_EQ (5, cpp_display_width (str, 6));
+ ASSERT_EQ (105, cpp_byte_column_to_display_column (str, 6, 106));
+ ASSERT_EQ (10000, cpp_byte_column_to_display_column (NULL, 0, 10000));
+ ASSERT_EQ (0, cpp_byte_column_to_display_column (NULL, 10000, 0));
+ }
+
+ /* Verify that cpp_display_column_to_byte_column can go past the end,
+ and similar edge cases, and check invertibility. */
+ {
+ const char *str
+ /* Display columns.
+ 000000000000000000000000000000000000011
+ 111111112222222234444444455555555678901 */
+ = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
+ /* 000000000000000000000000000000000111111
+ 111122223333444456666777788889999012345
+ Byte columns. */
+ ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2));
+ ASSERT_EQ (15, cpp_display_column_to_byte_column (str, 15, 11));
+ ASSERT_EQ (115, cpp_display_column_to_byte_column (str, 15, 111));
+ ASSERT_EQ (10000, cpp_display_column_to_byte_column (NULL, 0, 10000));
+ ASSERT_EQ (0, cpp_display_column_to_byte_column (NULL, 10000, 0));
+
+ /* Verify that we do not interrupt a UTF-8 sequence. */
+ ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1));
+
+ for (int byte_col = 1; byte_col <= 15; ++byte_col)
+ {
+ const int disp_col = cpp_byte_column_to_display_column (str, 15,
+ byte_col);
+ const int byte_col2 = cpp_display_column_to_byte_column (str, 15,
+ disp_col);
+
+ /* If we ask for the display column in the middle of a UTF-8
+ sequence, it will return the length of the partial sequence,
+ matching the behavior of GCC before display column support.
+ Otherwise check the round trip was successful. */
+ if (byte_col < 4)
+ ASSERT_EQ (byte_col, disp_col);
+ else if (byte_col >= 6 && byte_col < 9)
+ ASSERT_EQ (3 + (byte_col - 5), disp_col);
+ else
+ ASSERT_EQ (byte_col2, byte_col);
+ }
+ }
+
+}
+
/* Run all of the selftests within this file. */
void
@@ -3631,6 +3734,8 @@ input_c_tests ()
test_reading_source_line ();
test_line_offset_overflow ();
+
+ test_cpp_utf8 ();
}
} // namespace selftest