Hi,

as discussed, I rebased and tested my patch against current master. 
Additionally, it now has the Signed-off-by tag.
Looking forward to your comments.

Best
Lucas

----

Within a compile cluster, only the preprocessed output of GCC is transferred
to remote nodes for compilation. When GCC produces advanced diagnostics
(with -fdiagnostics-show-caret), e.g. prints out the affected source
line and fixit hints, it attempts to read the source file again, even when
compiling a preprocessed file (-fpreprocessed). This leads to wrong
diagnostics when building with a compile cluster, or, more generally,
when changing or deleting the original source file.

This patch alters GCC to read from the preprocessed file instead by
calculating the corresponding source line. This behavior is consistent
with clang.

gcc/c-family/ChangeLog:

        * c-opts.cc (c_common_handle_option): pass -fpreprocessed
        option value to global diagnostic configuration

gcc/ChangeLog:

        * diagnostic-show-locus.cc (layout::calculate_x_offset_display): read 
line from source or preprocessed
        file based on -fpreprocessed value
        (source_line::source_line): read line from source or preprocessed
        file based on -fpreprocessed value
        (layout_printer::print_line): read line from source or preprocessed
        file based on -fpreprocessed value

        * diagnostic.cc (diagnostic_context::initialize): initialize new members
        * diagnostic.h: new members for reading
        source lines from preprocessed files

        * input.cc (file_cache::get_source_line_preprocessed): new function
        to read source lines from preprocessed files
        (test_reading_source_line_preprocessed): new test case
        (input_cc_tests): execute new test case
        * input.h (class file_cache): add new member function

        * opts-global.cc (read_cmdline_options): pass input filename to global
        diagnostic context

Signed-off-by: Lucas Bader <lucas.ba...@sap.com>
---
 gcc/c-family/c-opts.cc       |   1 +
 gcc/diagnostic-show-locus.cc |  25 ++++--
 gcc/diagnostic.cc            |   2 +
 gcc/diagnostic.h             |   6 ++
 gcc/input.cc                 | 169 +++++++++++++++++++++++++++++++++++
 gcc/input.h                  |   2 +
 gcc/opts-global.cc           |   4 +
 7 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 87b231861a6..105d8fa6eff 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
 
     case OPT_fpreprocessed:
       cpp_opts->preprocessed = value;
+      global_dc->m_is_preprocessed = value;
       break;
 
     case OPT_fdebug_cpp:
diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
index 898efe74acf..7ac183a126f 100644
--- a/gcc/diagnostic-show-locus.cc
+++ b/gcc/diagnostic-show-locus.cc
@@ -1786,8 +1786,13 @@ layout::calculate_x_offset_display ()
       return;
     }
 
-  const char_span line = m_file_cache.get_source_line (m_exploc.file,
-                                                      m_exploc.line);
+  char_span line (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+    line = m_file_cache.get_source_line_preprocessed (
+         m_exploc.file, global_dc->m_main_input_file_path, m_exploc.line);
+  else
+    line = m_file_cache.get_source_line (m_exploc.file, m_exploc.line);
+
   if (!line)
     {
       /* Nothing to do, we couldn't find the source line.  */
@@ -2780,7 +2785,12 @@ public:
 
 source_line::source_line (file_cache &fc, const char *filename, int line)
 {
-  char_span span = fc.get_source_line (filename, line);
+  char_span span (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+    span = fc.get_source_line_preprocessed (
+       filename, global_dc->m_main_input_file_path, line);
+  else
+    span = fc.get_source_line (filename, line);
   chars = span.get_buffer ();
   width = span.length ();
 }
@@ -3132,8 +3142,13 @@ layout_printer::show_ruler (int max_column)
 void
 layout_printer::print_line (linenum_type row)
 {
-  char_span line
-    = m_layout.m_file_cache.get_source_line (m_layout.m_exploc.file, row);
+  char_span line (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+    line = m_layout.m_file_cache.get_source_line_preprocessed (
+       m_layout.m_exploc.file, global_dc->m_main_input_file_path, row);
+  else
+    line = m_layout.m_file_cache.get_source_line (m_layout.m_exploc.file, row);
+
   if (!line)
     return;
 
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index c2f6714c24a..c8a93d407ad 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -290,6 +290,8 @@ diagnostic_context::initialize (int n_opts)
   m_diagrams.m_theme = nullptr;
   m_original_argv = nullptr;
   m_diagnostic_buffer = nullptr;
+  m_main_input_file_path = nullptr;
+  m_is_preprocessed = false;
 
   enum diagnostic_text_art_charset text_art_charset
     = DIAGNOSTICS_TEXT_ART_CHARSET_EMOJI;
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 202760b2f85..60a7900eb23 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -850,6 +850,12 @@ public:
   /* True if warnings should be given in system headers.  */
   bool m_warn_system_headers;
 
+  /* Original input file.  */
+  const char* m_main_input_file_path;
+
+  /* True if the input file is treated as preprocessed.  */
+  bool m_is_preprocessed;
+
 private:
   /* Maximum number of errors to report.  */
   int m_max_errors;
diff --git a/gcc/input.cc b/gcc/input.cc
index 9f3cc6651e8..c261f2c4750 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -1056,6 +1056,109 @@ file_cache::get_source_line (const char *file_path, int 
line)
   return char_span (buffer, len);
 }
 
+/* Return the physical source line that corresponds to SOURCE_NAME/LINE.
+   Read the line from the preprocessed file at FILE_PATH.
+   The line is not nul-terminated.  The returned pointer is only
+   valid until the next call of get_source_line.
+   Note that the line can contain several null characters,
+   so the returned value's length has the actual length of the line.
+   If the function fails, a NULL char_span is returned.  */
+
+char_span
+file_cache::get_source_line_preprocessed (const char *source_name,
+                                      const char *file_path, int line)
+{
+  char *buffer = NULL;
+  ssize_t len;
+
+  if (line == 0)
+    return char_span (NULL, 0);
+
+  file_cache_slot *c = lookup_or_add_file (file_path);
+  if (c == NULL)
+    return char_span (NULL, 0);
+
+  // find linemarker closest to actual line number
+  int linemarker_line = 0; // line no of linemarker in pp file
+  int linemarker_loc = 0;  // source line no indicated in linemarker
+  int current_line = 1;
+  ssize_t source_name_len = (ssize_t) strlen (source_name);
+  while (c->read_line_num (current_line, &buffer, &len))
+    {
+      if (len > 2 && buffer[0] == '#' && ISDIGIT (buffer[2]))
+       {
+         // is a linemarker
+         bool is_match = false;
+         for (ssize_t i = 3; i < len && source_name_len < len - i - 1; ++i)
+           {
+             if (buffer[i] == '"')
+               {
+                 is_match
+                     = memcmp (buffer + i + 1, source_name, source_name_len)
+                       == 0;
+                 break;
+               }
+           }
+         bool match_past_line = false;
+         if (is_match)
+           {
+             // we found a linemarker that matches the source file
+             // e.g.
+             // # 14 "../../file.h"
+             // we use the last suitable marker we find
+             int loc = atoi (buffer + 2);
+             if (loc <= line && line - loc <= line - linemarker_loc)
+               {
+                 // new best candidate
+                 linemarker_loc = loc;
+                 linemarker_line = current_line;
+                 current_line++;
+                 continue;
+               }
+             else if (loc > line)
+               {
+                 // don't break directly due to validity check
+                 match_past_line = true;
+               }
+           }
+         if (linemarker_line != 0
+             && linemarker_line + (line - linemarker_loc) + 1 >= current_line)
+           {
+             // if we find any other linemarker, we have to check if our
+             // potential source line is outside the current candidate if
+             // that's the case, we invalidate the candidate
+             linemarker_line = 0;
+             linemarker_loc = 0;
+           }
+         if (match_past_line)
+           break;
+       }
+      current_line++;
+    }
+  if (linemarker_line != 0)
+    {
+      // line of linemarker in the preprocessed file is the base at which we
+      // start counting the line indicated by the linemarker, e.g. the 5 in # 5
+      // "file.cpp", is the original source line number which starts right
+      // below the linemarker So to read line 7 of the original source file
+      // "file.cpp" we treat line 2055 as line 5 of the original line 7 = 2054
+      // + (7 - 5) + 1 = 2057
+      // ...
+      // 2054: # 5 "file.cpp"
+      // 2055: int func() {
+      // 2056:    int a = 4;
+      // 2057:    int b = 5;
+      // ...
+      bool read = c->read_line_num (
+         linemarker_line + (line - linemarker_loc) + 1, &buffer, &len);
+      if (!read)
+       return char_span (NULL, 0);
+
+      return char_span (buffer, len);
+    }
+  return char_span (NULL, 0);
+}
+
 /* Return a NUL-terminated copy of the source text between two locations, or
    NULL if the arguments are invalid.  The caller is responsible for freeing
    the return value.  */
@@ -2401,6 +2504,71 @@ test_reading_source_line ()
   ASSERT_TRUE (source_line.get_buffer () == NULL);
 }
 
+/* Verify reading of preprocessed input files
+   (e.g. for caret-based diagnostics).  */
+
+static void
+test_reading_source_line_preprocessed ()
+{
+  /* Create a tempfile and write some valid preprocessor output.  */
+  temp_source_file tmp (SELFTEST_LOCATION, ".cpp.ii",
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.h\" 1\n"
+                       "void test_func() {\n"
+                       "# 35 \"test.h\"\n"
+                       "    do_something_else ();\n"
+                       "}\n"
+                       "# 2 \"test.cpp\" 2\n"
+                       "\n"
+                       "int main() {\n"
+                       "    do_something ();\n"
+                       "\n"
+                       "    int i = 5;\n"
+                       "    unsigned j = 3;\n"
+                       "    if (i > j)\n"
+                       "    return 0;\n"
+                       "\n"
+                       "    test_func ();\n"
+                       "}");
+  file_cache fc;
+
+  /* Read back a specific line from the tempfile.  */
+  char_span source_line = fc.get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 4);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (20, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something ();", source_line.get_buffer (),
+                        source_line.length ()));
+
+  source_line = fc.get_source_line_preprocessed (
+      "test.h", tmp.get_filename (), 35);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (25, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something_else ();",
+                        source_line.get_buffer (), source_line.length ()));
+
+  // file not present in preprocessor output
+  source_line = fc.get_source_line_preprocessed ("other.h",
+                                                      tmp.get_filename (), 4);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // off by one
+  source_line = fc.get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 13);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // empty lines omitted by preprocessor
+  source_line = fc.get_source_line_preprocessed ("test.h",
+                                                      tmp.get_filename (), 2);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+}
+
 /* Verify reading from buffers (e.g. for sarif-replay).  */
 
 static void
@@ -4318,6 +4486,7 @@ input_cc_tests ()
   for_each_line_table_case (test_lexer_char_constants);
 
   test_reading_source_line ();
+  test_reading_source_line_preprocessed ();
   test_reading_source_buffer ();
 
   test_line_offset_overflow ();
diff --git a/gcc/input.h b/gcc/input.h
index 18ccf4429fc..45abebdcee7 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -155,6 +155,8 @@ class file_cache
 
   char_span get_source_file_content (const char *file_path);
   char_span get_source_line (const char *file_path, int line);
+  char_span get_source_line_preprocessed (const char *source_name,
+                const char *file_path, int line);
   bool missing_trailing_newline_p (const char *file_path);
 
   void add_buffered_content (const char *file_path,
diff --git a/gcc/opts-global.cc b/gcc/opts-global.cc
index b9b42d3b233..b8c5aafb93f 100644
--- a/gcc/opts-global.cc
+++ b/gcc/opts-global.cc
@@ -231,6 +231,10 @@ read_cmdline_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
          if (opts->x_main_input_filename == NULL)
            {
              opts->x_main_input_filename = decoded_options[i].arg;
+          // remember original input filename in diagnostic context
+          // because if a preprocessed file is compiled, this is
+          // changed to the original source file name later
+          dc->m_main_input_file_path = opts->x_main_input_filename;
              opts->x_main_input_baselength
                = base_of_path (opts->x_main_input_filename,
                                &opts->x_main_input_basename);
-- 
2.35.3

Reply via email to