We found one more issue with the patch in our internal compile cluster. When 
using "-fdirectives-only", linemarkers do not always start at the beginning of 
a line.
This version of the patch (last submission 
https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675450.html) fixes this 
by skipping
blanks before parsing the linemarker. Also rebased to latest trunk.

Best
Lucas

----------------------------------

Within a compile cluster, only the preprocessed output of GCC is transferred
to remote nodes for compilation. When GCC produces advanced diagnostics
(with -fdiagnostics-show-caret), e.g. prints out the affected source
line and fixit hints, it attempts to read the source file again, even when
compiling a preprocessed file (-fpreprocessed). This leads to wrong
diagnostics when building with a compile cluster, or, more generally,
when changing or deleting the original source file.

This change alters GCC to read from the preprocessed file instead by
calculating the corresponding source line. This behavior is consistent
with clang.

The patch implements this efficiently by using a cache for
linemarkers that are already seen and a memoization of lines
that have already been requested.

gcc/c-family/ChangeLog:

        PR preprocessor/79106
        * c-opts.cc (c_common_handle_option): Pass -fpreprocessed
        option value to global diagnostic configuration.

gcc/ChangeLog:

        PR preprocessor/79106
        * diagnostic-show-locus.cc (get_source_line_maybe_preprocessed): Read
        line from source or preprocessed file based on -fpreprocessed value.
        (layout::calculate_x_offset_display): Use new function.
        (source_line::source_line): Use new function.
        (layout_printer::print_line): Use new function.
        * diagnostic.cc (diagnostic_context::initialize): Initialize new
                members.
        * diagnostic.h: Add new members for reading source lines from
                preprocessed files.
        * input.cc (file_cache_slot::evict): Also empty linemarker
                cache.
        (file_cache_slot::create): Initialize new linemarker cache.
        (file_cache_slot::file_cache_slot): Initialize new linemarker
                cache.
        (file_cache_slot::~file_cache_slot): Delete linemarker cache.
        (file_cache_slot::linemarker_cache::add_linemarker_unique): New
                function for adding linemarkers to the linemarker cache.
        (file_cache_slot::linemarker_cache::add_result_line): Memoize
                single result of get_source_line_preprocessed.
        (file_cache_slot::linemarker_cache::get_result_line): Retrieve
                memoized result of get_source_line_preprocessed.
        (is_linemarker_for_file): New function for testing a line for
                a linemarker.
        (file_cache::get_source_line_preprocessed): New function for
                reading lines from preprocessed sources.
        (test_parsing_linemarker): New test case.
        (test_linemarker_cache): New test case.
        (test_reading_source_line_preprocessed): New test case.
        (input_cc_tests): Add new test cases.
        * input.h (class file_cache): Add new member function.
        * opts-global.cc (read_cmdline_options): Pass input filename to global
        diagnostic context.

gcc/testsuite/ChangeLog:

        PR preprocessor/79106
        * g++.dg/lookup/missing-std-include-11.C: Adapt to new behavior.

Signed-off-by: Lucas Bader <lucas.ba...@sap.com>
---
 gcc/c-family/c-opts.cc                        |   1 +
 gcc/diagnostic-show-locus.cc                  |  25 +-
 gcc/diagnostic.cc                             |   2 +
 gcc/diagnostic.h                              |   6 +
 gcc/input.cc                                  | 696 ++++++++++++++++++
 gcc/input.h                                   |   2 +
 gcc/opts-global.cc                            |   4 +
 .../g++.dg/lookup/missing-std-include-11.C    |   2 +-
 8 files changed, 733 insertions(+), 5 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index d43b3aef102..40c38ae9318 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
 
     case OPT_fpreprocessed:
       cpp_opts->preprocessed = value;
+      global_dc->m_is_preprocessed = value;
       break;
 
     case OPT_fdebug_cpp:
diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
index 898efe74acf..f08d325bfcf 100644
--- a/gcc/diagnostic-show-locus.cc
+++ b/gcc/diagnostic-show-locus.cc
@@ -1768,6 +1768,20 @@ layout::calculate_linenum_width ()
   m_linenum_width = MAX (m_linenum_width, m_options.min_margin_width - 1);
 }
 
+/* Get the source line depending on the global context, i.e. whether we are
+   compiling a preprocessed file or not.  */
+static char_span
+get_source_line_maybe_preprocessed (file_cache &fc, const char *filename,
+                                   int line_num)
+{
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+    return fc.get_source_line_preprocessed (filename,
+                                           global_dc->m_main_input_file_path,
+                                           line_num);
+  else
+    return fc.get_source_line (filename, line_num);
+}
+
 /* Calculate m_x_offset_display, which improves readability in case the source
    line of interest is longer than the user's display.  All lines output will 
be
    shifted to the left (so that their beginning is no longer displayed) by
@@ -1786,8 +1800,9 @@ layout::calculate_x_offset_display ()
       return;
     }
 
-  const char_span line = m_file_cache.get_source_line (m_exploc.file,
-                                                      m_exploc.line);
+  char_span line
+    = get_source_line_maybe_preprocessed (m_file_cache, m_exploc.file,
+                                         m_exploc.line);
   if (!line)
     {
       /* Nothing to do, we couldn't find the source line.  */
@@ -2780,7 +2795,7 @@ public:
 
 source_line::source_line (file_cache &fc, const char *filename, int line)
 {
-  char_span span = fc.get_source_line (filename, line);
+  char_span span = get_source_line_maybe_preprocessed (fc, filename, line);
   chars = span.get_buffer ();
   width = span.length ();
 }
@@ -3133,7 +3148,9 @@ void
 layout_printer::print_line (linenum_type row)
 {
   char_span line
-    = m_layout.m_file_cache.get_source_line (m_layout.m_exploc.file, row);
+    = get_source_line_maybe_preprocessed (m_layout.m_file_cache,
+                                         m_layout.m_exploc.file, row);
+
   if (!line)
     return;
 
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index c2f6714c24a..c8a93d407ad 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -290,6 +290,8 @@ diagnostic_context::initialize (int n_opts)
   m_diagrams.m_theme = nullptr;
   m_original_argv = nullptr;
   m_diagnostic_buffer = nullptr;
+  m_main_input_file_path = nullptr;
+  m_is_preprocessed = false;
 
   enum diagnostic_text_art_charset text_art_charset
     = DIAGNOSTICS_TEXT_ART_CHARSET_EMOJI;
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 202760b2f85..5c4b61175e8 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -850,6 +850,12 @@ public:
   /* True if warnings should be given in system headers.  */
   bool m_warn_system_headers;
 
+  /* Original input file.  */
+  const char *m_main_input_file_path;
+
+  /* True if the input file is treated as preprocessed.  */
+  bool m_is_preprocessed;
+
 private:
   /* Maximum number of errors to report.  */
   int m_max_errors;
diff --git a/gcc/input.cc b/gcc/input.cc
index fabfbfb6eaa..a87b39c132f 100644
--- a/gcc/input.cc
+++ b/gcc/input.cc
@@ -196,6 +196,132 @@ public:
     m_size -= offset;
   }
 
+  /* Contains info about linemarkers for the given file cache slot.
+     Is used by the linemarker_cache.  */
+  struct linemarker_cache_entry
+  {
+    /* The source file name.  */
+    char *source_name;
+
+    /* The line in the file containing the linemarker, counting from 1.  */
+    int line;
+
+    /* The line the linemarker refers to.  */
+    int source_line;
+
+    /* The entry's index in the sorted record of all entries.  */
+    unsigned int record_idx;
+
+    linemarker_cache_entry (char_span sn, int l, int sl, unsigned int i)
+      : source_name (sn.xstrdup ()), line (l), source_line (sl), record_idx (i)
+    {}
+    ~linemarker_cache_entry () { free (source_name); }
+  };
+
+  /* A cache that is used by get_source_line_preprocessed to speed up
+    finding the correct line to read based on linemarker data.
+    Every file_cache_slot has a cache for its' given file. Helps to avoid
+    repeatedly scanning the whole file for linemarkers via read_line_num.  */
+  struct linemarker_cache
+  {
+    /* String portion of the hash is owned by linemarker_cache_entry,
+       so we use nofree for the linemarker_hash_map.  */
+    typedef hash_map<nofree_string_hash, vec<linemarker_cache_entry *, 
va_heap>>
+      linemarker_hash_map;
+
+    /* For the memoization map, source names are copied so we use
+       free_string_hash.  */
+    typedef pair_hash<free_string_hash, int_hash<int, -1, -2>>
+      source_file_line_hash;
+    typedef hash_map<source_file_line_hash, int> result_line_map;
+
+    /* The maximum line number of all linemarkers we've seen so far.  */
+    int m_max_linemarker_line;
+
+    /* This hash map is the main entry point to the cache. It maps source paths
+       to a vector of linemarker_cache_entry pointers.
+       It allows efficient lookup of the closest linemarker.
+       For each source file, linemarkers are unique with both physical line
+       and referenced source line numbers in ascending order.
+       The same source line number can be referenced by multiple linemarkers.
+    */
+    linemarker_hash_map *m_linemarkers_by_source_name;
+
+    /* A sorted (by line number ascending) record of all unique linemarkers
+       we encounter. This is used for efficiently determining if a candidate
+       from the cache is the best candidate.  */
+    vec<linemarker_cache_entry *, va_heap> m_linemarkers;
+
+    /* This map is used for plain memoization of linemarker to
+       physical source line mappings. This is an additional optimization used
+       to avoid repeated calls to get_source_line_preprocessed
+       for the same source_file and line.
+       Expects a copy of the source_file string and assumes ownership.
+    */
+    result_line_map *m_result_lines_by_source;
+
+    /* Default ctor.  */
+    linemarker_cache ()
+      : m_max_linemarker_line (0),
+       m_linemarkers_by_source_name (new linemarker_hash_map),
+       m_result_lines_by_source (new result_line_map)
+    {
+      m_linemarkers.create (0);
+    }
+
+    /* Used in map traversal to clean up linemarker vectors.  */
+    static bool
+    release_linemarker_cache_vector (ATTRIBUTE_UNUSED const char *const &,
+                                    vec<linemarker_cache_entry *> *vec,
+                                    ATTRIBUTE_UNUSED void *)
+    {
+      // linemarker_cache_entry is owned by m_linemarkers so
+      // we only need to release the vector.
+      vec->release ();
+      return true;
+    }
+
+    /* Destructor.  */
+    ~linemarker_cache ()
+    {
+      m_linemarkers_by_source_name
+       ->traverse<void *, &release_linemarker_cache_vector> (NULL);
+      delete m_linemarkers_by_source_name;
+      delete m_result_lines_by_source;
+
+      for (unsigned i = 0; i < m_linemarkers.length (); ++i)
+       delete m_linemarkers[i];
+      m_linemarkers.release ();
+    }
+
+    /* Drop all cached linemarkers and free associated resources.  */
+    void empty ()
+    {
+      m_max_linemarker_line = 0;
+      m_linemarkers_by_source_name
+       ->traverse<void *, &release_linemarker_cache_vector> (NULL);
+      m_linemarkers_by_source_name->empty ();
+      m_result_lines_by_source->empty ();
+      for (unsigned i = 0; i < m_linemarkers.length (); ++i)
+       delete m_linemarkers[i];
+      m_linemarkers.truncate (0);
+    }
+
+    int get_max_linemarker_line () const { return m_max_linemarker_line; }
+    void set_max_linemarker_line (int l) { m_max_linemarker_line = l; }
+
+    int get_result_line (const char *source_name, int source_line) const;
+    void add_result_line (const char *source_name, int source_line, int line);
+    bool get_closest_linemarker (const char *source_name, int source_line,
+                                int *out_linemarker_line,
+                                int *out_linemarker_source_line) const;
+    void add_linemarker_unique (char_span source_name, int line,
+                               int source_line);
+  };
+
+public:
+  /* Linemarker cache for the given file_cache_slot.  */
+  linemarker_cache *m_linemarker_cache;
 };
 
 size_t file_cache_slot::line_record_size = 0;
@@ -411,6 +537,7 @@ file_cache_slot::evict ()
   m_line_record.truncate (0);
   m_line_recent_first = 0;
   m_line_recent_last = 0;
+  m_linemarker_cache->empty ();
   m_use_count = 0;
   m_missing_trailing_newline = true;
 }
@@ -510,6 +637,7 @@ file_cache_slot::create (const file_cache::input_context 
&in_context,
   m_line_recent_first = 0;
   m_line_recent_last = 0;
   m_line_record.truncate (0);
+  m_linemarker_cache->empty ();
   /* Ensure that this cache entry doesn't get evicted next time
      add_file_to_cache_tab is called.  */
   m_use_count = ++highest_use_count;
@@ -622,6 +750,7 @@ file_cache_slot::file_cache_slot ()
   m_line_recent.create (1U << recent_cached_lines_shift);
   for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
     m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
+  m_linemarker_cache = new linemarker_cache;
 }
 
 /* Destructor for a cache of file used by caret diagnostic.  */
@@ -642,6 +771,7 @@ file_cache_slot::~file_cache_slot ()
     }
   m_line_record.release ();
   m_line_recent.release ();
+  delete m_linemarker_cache;
 }
 
 void
@@ -1046,6 +1176,296 @@ file_cache::get_source_line (const char *file_path, int 
line)
   return char_span (buffer, len);
 }
 
+/* Find the closest cached linemarker for a given SOURCE_LINE and SOURCE_NAME.
+
+   Closest is defined by the smallest distance between the cached source line
+   and the given source line (i.e. the nearest left neighbor or exact match).
+   If we can safely determine that the cached linemarker is the best candidate,
+   return true, otherwise false. If we have a valid match, store the linemarker
+   physical line number in OUT_LINEMARKER_LINE and the referenced source line
+   in OUT_LINEMARKER_SOURCE_LINE.
+
+   If we are certain that the candidate not the best because we know
+   about more linemarkers after, set OUT_LINEMARKER_LINE to the line of the
+   last known linemarker and OUT_LINEMARKER_SOURCE_LINE to 0.
+   This way, the caller does not have to rescan lines.  */
+
+bool
+file_cache_slot::linemarker_cache::get_closest_linemarker (
+  const char *source_name, int source_line, int *out_linemarker_line,
+  int *out_linemarker_source_line) const
+{
+  *out_linemarker_source_line = 0;
+  *out_linemarker_line = 0;
+
+  // get sorted list of linemarkers for the given source file from cache
+  vec<linemarker_cache_entry *, va_heap> *linemarkers
+    = m_linemarkers_by_source_name->get (source_name);
+  if (!linemarkers)
+    return false;
+  if (linemarkers->length () == 0)
+    return false;
+
+  // perform a binary search to find the nearest left neighbor or exact match
+  int left = 0, right = linemarkers->length () - 1;
+  while (left <= right)
+    {
+      int mid = left + (right - left) / 2;
+      if ((*linemarkers)[mid]->source_line > source_line)
+       {
+        right = mid - 1;
+       }
+      else
+       {
+        left = mid + 1;
+       }
+    }
+  if (right < 0)
+    return false;
+
+  *out_linemarker_source_line = (*linemarkers)[right]->source_line;
+  *out_linemarker_line = (*linemarkers)[right]->line;
+
+  // If there is a linemarker for any file above the candidate, we can
+  // check if the target source line falls in the range between.
+  // If so, the candidate is the best.
+  if (m_linemarkers.length () > (*linemarkers)[right]->record_idx + 1)
+    {
+      linemarker_cache_entry *next_lm
+       = m_linemarkers[(*linemarkers)[right]->record_idx + 1];
+      size_t lines_to_next_lm = next_lm->line - (*linemarkers)[right]->line - 
1;
+      size_t offset = source_line - (*linemarkers)[right]->source_line;
+      if (offset < lines_to_next_lm)
+       return true;
+      else
+       {
+        // we know this candidate cannot be the right one
+        // because the space between the candidate and the next linemarker
+        // is not large enough for the offset.
+        *out_linemarker_source_line = 0;
+        *out_linemarker_line = 0;
+       }
+    }
+
+  return false;
+}
+
+/* Add a linemarker to the linemarker record in the file_cache_slot  **/
+void
+file_cache_slot::linemarker_cache::add_linemarker_unique (char_span 
source_name,
+                                                         int line,
+                                                         int source_line)
+{
+  if (line <= get_max_linemarker_line ())
+    return; // seen already
+  set_max_linemarker_line (line);
+
+  // first, add the linemarker to the sorted record along with its index
+  linemarker_cache_entry *entry
+    = new linemarker_cache_entry{source_name, line, source_line,
+                                m_linemarkers.length ()};
+
+  m_linemarkers.safe_push (entry);
+
+  // then, link the entry to the source_file based lookup table,
+  // creating the table entry if it doesn't exist yet
+  vec<linemarker_cache_entry *, va_heap> *linemarkers
+    = m_linemarkers_by_source_name->get (entry->source_name);
+  if (!linemarkers)
+    {
+      vec<linemarker_cache_entry *, va_heap> linemarkers_vec
+       = vec<linemarker_cache_entry *, va_heap> ();
+      linemarkers_vec.create (1);
+      linemarkers_vec.safe_push (entry);
+      m_linemarkers_by_source_name->put (entry->source_name, linemarkers_vec);
+    }
+  else
+    {
+      linemarkers->safe_push (entry);
+    }
+}
+
+/* Memoize a result line for get_source_line_preprocessed calls.  */
+void
+file_cache_slot::linemarker_cache::add_result_line (const char *source_name,
+                                                   int source_line,
+                                                   int result_line)
+{
+  m_result_lines_by_source->put (string_int_pair (xstrdup (source_name),
+                                                 source_line),
+                                result_line);
+}
+
+/* Return a memoized line number for the given SOURCE_NAME and SOURCE_LINE.
+   If no result is cached, return 0.
+   The cached line number is the physical line which is referenced by
+   the cached linemarker for these inputs.  */
+int
+file_cache_slot::linemarker_cache::get_result_line (const char *source_name,
+                                                   int source_line) const
+{
+  int *cached_line = m_result_lines_by_source->get (
+    string_int_pair (source_name, source_line));
+  return cached_line ? *cached_line : 0;
+}
+
+/* Return true if LINE is a linemarker for the given SOURCE_NAME.
+   Linemarkers are of the form:
+   # LINENUM "SOURCE_NAME" [FLAGS]
+   Store the referenced line number in OUT_LINE_NUM.
+   If it is any linemarker, even for a different file, write the file name
+   out to OUT_LINEMARKER_SOURCE_NAME.  */
+static bool
+is_linemarker_for_file (char_span line, const char *source_name,
+                       int *out_line_num,
+                       char_span *out_linemarker_source_name)
+{
+  // skip leading whitespace
+  size_t i = 0;
+  while (line.length () > i && ISBLANK (line[i]))
+    ++i;
+  if (line.length () > i + 2 && line[i] == '#' && line[i + 1] == ' '
+      && ISDIGIT (line[i + 2]))
+    {
+      *out_line_num = atoi (line.get_buffer () + i + 2);
+      for (i += 3; i < line.length (); ++i)
+       {
+        if (line[i] == '"') // leading quote
+           {
+             size_t source_name_start = i + 1;
+             for (size_t j = i + 1; j < line.length (); ++j)
+               {
+                 if (line[j] == '"') // trailing quote
+                   {
+                     size_t source_name_length = j - source_name_start;
+                     *out_linemarker_source_name
+                       = line.subspan (i + 1, source_name_length);
+                     if (memcmp (out_linemarker_source_name->get_buffer (),
+                                 source_name,
+                                 MAX (strlen (source_name),
+                                      source_name_length))
+                         == 0)
+                       {
+                         // linemarker is for the same file
+                         return true;
+                       }
+                     return false;
+                   }
+               }
+           }
+       }
+    }
+  return false;
+}
+
+/* Return the physical source line that corresponds to SOURCE_NAME/LINE.
+   Read the line from the preprocessed file at FILE_PATH.
+   The line is not nul-terminated.  The returned pointer is only
+   valid until the next call of get_source_line.
+   Note that the line can contain several null characters,
+   so the returned value's length has the actual length of the line.
+   If the function fails, a NULL char_span is returned.
+
+   If we for example want to get line 7 of file.cpp from the preprocessed
+   output, the corresponding line marker might look like this:
+   # 5 "file.cpp"    // line 2050 of the preprocessed file
+   int func() {
+     int a = 4;
+     int b = 5;
+
+   This means that line 2051 of the preprocessed file is line 5 of the
+   original file.cpp. So to get line 7 of file.cpp we have to read line
+   2053 of the preprocessed file.  */
+
+char_span
+file_cache::get_source_line_preprocessed (const char *source_name,
+                                         const char *file_path, int line)
+{
+  char *buffer = NULL;
+  ssize_t len;
+
+  if (line == 0)
+    return char_span (NULL, 0);
+
+  file_cache_slot *c = lookup_or_add_file (file_path);
+  if (c == NULL)
+    return char_span (NULL, 0);
+
+  // return memoized result if available
+  int cached_line = c->m_linemarker_cache->get_result_line (source_name, line);
+  if (cached_line != 0)
+    {
+      bool read = c->read_line_num (cached_line, &buffer, &len);
+      if (read)
+       return char_span (buffer, len);
+    }
+
+  // attempt to find a linemarker in the cache
+  int linemarker_line = 0; // the physical line num of the candidate
+  int linemarker_loc = 0;  // the referenced source line num of the candidate
+  bool is_best
+    = c->m_linemarker_cache->get_closest_linemarker (source_name, line,
+                                                    &linemarker_line,
+                                                    &linemarker_loc);
+
+  // we continue reading from (highest) cached location if the potential
+  // candidate is not the best already
+  int current_line = c->m_linemarker_cache->get_max_linemarker_line () + 1;
+  char_span linemarker_source_name = char_span (NULL, 0);
+  int linemarker_source_line = 0;
+  bool is_match = false;
+  while (!is_best && c->read_line_num (current_line, &buffer, &len))
+    {
+      linemarker_source_name = char_span (NULL, 0);
+      linemarker_source_line = 0;
+      is_match = is_linemarker_for_file (char_span (buffer, len), source_name,
+                                        &linemarker_source_line,
+                                        &linemarker_source_name);
+
+      if (linemarker_source_name)
+       {
+        c->m_linemarker_cache->add_linemarker_unique (linemarker_source_name,
+                                                      current_line,
+                                                      linemarker_source_line);
+        if (linemarker_line > 0 && linemarker_loc > 0)
+           {
+             // if our last candidate fits the range to the next linemarker,
+             // we can stop searching
+             int lines_to_next_lm = current_line - linemarker_line - 1;
+             int offset = line - linemarker_loc;
+             if (offset < lines_to_next_lm)
+               break;
+             else
+               {
+                 // reset the candidate if we have passed the range
+                 linemarker_line = 0;
+                 linemarker_loc = 0;
+               }
+           }
+        if (is_match)
+           {
+             // new candidate
+             linemarker_line = current_line;
+             linemarker_loc = linemarker_source_line;
+           }
+       }
+      ++current_line;
+    }
+
+  if (linemarker_line == 0 || linemarker_loc == 0)
+    return char_span (NULL, 0);
+
+  // we have a matching linemarker, read the indicated line
+  size_t physical_line = linemarker_line + (line - linemarker_loc) + 1;
+  bool read = c->read_line_num (physical_line, &buffer, &len);
+  if (!read)
+    return char_span (NULL, 0);
+
+  // memoize result line
+  c->m_linemarker_cache->add_result_line (source_name, line, physical_line);
+  return char_span (buffer, len);
+}
+
 /* Return a NUL-terminated copy of the source text between two locations, or
    NULL if the arguments are invalid.  The caller is responsible for freeing
    the return value.  */
@@ -2440,6 +2860,279 @@ test_reading_source_line ()
   ASSERT_TRUE (source_line.get_buffer () == NULL);
 }
 
+/* Verify reading of preprocessed input files
+   (e.g. for caret-based diagnostics).  */
+
+static void
+test_parsing_linemarker ()
+{
+  const char *buf = "# 112 \"test.cpp\"\n";
+  int line_num;
+  char_span file_name (NULL, 0);
+  bool result = is_linemarker_for_file (char_span (buf, strlen (buf)),
+                                       "test.cpp", &line_num, &file_name);
+
+  ASSERT_TRUE (result);
+  ASSERT_EQ (112, line_num);
+  ASSERT_TRUE (file_name);
+  ASSERT_EQ (strcmp ("test.cpp", file_name.xstrdup ()), 0);
+  file_name = char_span (NULL, 0);
+
+  /* Linemarker but for a different file.  */
+  const char *buf2 = "# 23 \"test.hpp\"\n";
+  result = is_linemarker_for_file (char_span (buf2, strlen (buf2)), "test.h",
+                                  &line_num, &file_name);
+
+  ASSERT_FALSE (result);
+  ASSERT_TRUE (file_name);
+  ASSERT_EQ (strcmp ("test.hpp", file_name.xstrdup ()), 0);
+  file_name = char_span (NULL, 0);
+
+  const char *buf3 = "# 23 \"test.h\"\n";
+  result = is_linemarker_for_file (char_span (buf3, strlen (buf3)), "test.hpp",
+                                  &line_num, &file_name);
+
+  ASSERT_FALSE (result);
+  ASSERT_TRUE (file_name);
+  ASSERT_EQ (strcmp ("test.h", file_name.xstrdup ()), 0);
+  file_name = char_span (NULL, 0);
+
+  /* Not a linemarker.  */
+  const char *buf4 = "int main() {\n";
+  result = is_linemarker_for_file (char_span (buf4, strlen (buf4)), "test.cpp",
+                                  &line_num, &file_name);
+  ASSERT_FALSE (result);
+  ASSERT_FALSE (file_name);
+
+  /* Linemarker with flags.  */
+  const char *buf5 = "# 23 \"test.h\" 1\n";
+  result = is_linemarker_for_file (char_span (buf5, strlen (buf5)), "test.h",
+                                  &line_num, &file_name);
+  ASSERT_TRUE (result);
+  ASSERT_EQ (23, line_num);
+  ASSERT_TRUE (file_name);
+  ASSERT_EQ (strcmp ("test.h", file_name.xstrdup ()), 0);
+  file_name = char_span (NULL, 0);
+
+  /* Malformed linemarkers.  */
+  const char *buf6 = "# 23 \"test.h   \n";
+  result = is_linemarker_for_file (char_span (buf6, strlen (buf6)), "test.h",
+                                  &line_num, &file_name);
+  ASSERT_FALSE (result);
+  ASSERT_FALSE (file_name);
+  const char *buf7 = "# 2 test.h\n";
+  result = is_linemarker_for_file (char_span (buf7, strlen (buf6)), "test.h",
+                                  &line_num, &file_name);
+  ASSERT_FALSE (result);
+  ASSERT_FALSE (file_name);
+  const char *buf8 = "#  23 \"test.h\"\n";
+  result = is_linemarker_for_file (char_span (buf8, strlen (buf6)), "test.h",
+                                  &line_num, &file_name);
+  ASSERT_FALSE (result);
+  ASSERT_FALSE (file_name);
+
+  /* Linemarker not starting at beginning of line. Spaces and tabs. */
+  const char *buf9 = "      # 23 \"test.hpp\"\n";
+  result = is_linemarker_for_file (char_span (buf9, strlen (buf9)), "test.hpp",
+                                  &line_num, &file_name);
+
+  ASSERT_TRUE (result);
+  ASSERT_EQ (23, line_num);
+  ASSERT_TRUE (file_name);
+  ASSERT_EQ (strcmp ("test.hpp", file_name.xstrdup ()), 0);
+  file_name = char_span (NULL, 0);
+}
+
+static void
+test_linemarker_cache ()
+{
+  temp_source_file tmp (SELFTEST_LOCATION, ".cpp.ii", "");
+  file_cache fc;
+  file_cache_slot *c = fc.lookup_or_add_file (tmp.get_filename ());
+
+  // Represents a file like
+  // # 1 "test.cpp" 1
+  // # 1 "test.cpp"
+  // content
+  // # 1 "test.h" 1
+  // # 2 "test.h"
+  // ...
+  // # 35 "test.h"
+  c->m_linemarker_cache->add_linemarker_unique (char_span (xstrdup 
("test.cpp"),
+                                                          8),
+                                               1, 1);
+  c->m_linemarker_cache->add_linemarker_unique (char_span (xstrdup 
("test.cpp"),
+                                                          8),
+                                               2, 1);
+
+  int line_num = 0;
+  int source_line_num = 0;
+
+  /* Matching linemarkers.  */
+  bool best
+    = c->m_linemarker_cache->get_closest_linemarker ("test.cpp", 4, &line_num,
+                                                    &source_line_num);
+  // not surely the best but a match
+  // because there are no further linemarkers known yet
+  ASSERT_FALSE (best);
+  ASSERT_EQ (2, line_num);
+  ASSERT_EQ (1, source_line_num);
+
+  // add more linemarkers
+  c->m_linemarker_cache->add_linemarker_unique (char_span (xstrdup ("test.h"),
+                                                          6),
+                                               5, 2);
+  c->m_linemarker_cache->add_linemarker_unique (char_span (xstrdup ("test.h"),
+                                                          6),
+                                               15, 35);
+
+  // repeat previous test but with more cache entries
+  best
+    = c->m_linemarker_cache->get_closest_linemarker ("test.cpp", 1, &line_num,
+                                                    &source_line_num);
+  // best because there is room for one line before the next linemarker
+  ASSERT_TRUE (best);
+  // left-nearest neighbor, i.e. the last linemarker before the source line
+  ASSERT_EQ (2, line_num);
+  ASSERT_EQ (1, source_line_num);
+
+  best
+    = c->m_linemarker_cache->get_closest_linemarker ("test.cpp", 4, &line_num,
+                                                    &source_line_num);
+  // not the best because there are linemarkers after and the
+  // space between is not enough for the source line offset
+  ASSERT_FALSE (best);
+  ASSERT_EQ (0, line_num);
+  ASSERT_EQ (0, source_line_num);
+
+  best = c->m_linemarker_cache->get_closest_linemarker ("test.h", 10, 
&line_num,
+                                                       &source_line_num);
+  // best because there are 9 lines between the linemarker and the next
+  // We can target source line offsets between 0 and 8
+  // and the offset is 8
+  ASSERT_TRUE (best);
+  ASSERT_EQ (5, line_num);
+  ASSERT_EQ (2, source_line_num);
+  best = c->m_linemarker_cache->get_closest_linemarker ("test.h", 11, 
&line_num,
+                                                       &source_line_num);
+  // one source line further already falls on the next linemarker
+  ASSERT_FALSE (best);
+  ASSERT_EQ (0, line_num);
+  ASSERT_EQ (0, source_line_num);
+
+  best = c->m_linemarker_cache->get_closest_linemarker ("test.h", 40, 
&line_num,
+                                                       &source_line_num);
+  // no linemarker in the cache after this, so we would need to scan further
+  ASSERT_FALSE (best);
+  ASSERT_EQ (15, line_num);
+  ASSERT_EQ (35, source_line_num);
+
+  /* No match.  */
+  best = c->m_linemarker_cache->get_closest_linemarker ("test.h", 1, &line_num,
+                                                       &source_line_num);
+  // no left-nearest neighbor
+  ASSERT_FALSE (best);
+  ASSERT_EQ (0, line_num);
+  ASSERT_EQ (0, source_line_num);
+
+  best = c->m_linemarker_cache->get_closest_linemarker ("no_match.cpp", 3,
+                                                       &line_num,
+                                                       &source_line_num);
+  ASSERT_FALSE (best);
+  ASSERT_EQ (0, line_num);
+  ASSERT_EQ (0, source_line_num);
+}
+
+static void
+test_reading_source_line_preprocessed ()
+{
+  /* Create a tempfile and write some valid preprocessor output.  */
+  temp_source_file tmp (SELFTEST_LOCATION, ".cpp.ii",
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.h\" 1\n"
+                       "void test_func() {\n"
+                       "# 35 \"test.h\"\n"
+                       "    do_something_else ();\n"
+                       "}\n"
+                       "# 35 \"test.hpp\"\n"
+                       "    do_nothing ();\n"
+                       "}\n"
+                       "# 2 \"test.cpp\" 2\n"
+                       "\n"
+                       "int main() {\n"
+                       "    do_something ();\n"
+                       "\n"
+                       "    int i = 5;\n"
+                       "    unsigned j = 3;\n"
+                       "    if (i > j)\n"
+                       "    return 0;\n"
+                       "\n"
+                       "# 38 \"test.h\"\n"
+                       "    random_func();\n"
+                       "    test_func ();\n"
+                       "}\n");
+  file_cache fc;
+
+  // Perform all tests twice to verify memoized calls behave the same
+  for (int i = 0; i < 2; i++)
+    {
+      /* Read back a specific line from the tempfile.  */
+      char_span source_line
+       = fc.get_source_line_preprocessed ("test.cpp", tmp.get_filename (), 4);
+      ASSERT_TRUE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () != NULL);
+      ASSERT_EQ (20, source_line.length ());
+      ASSERT_TRUE (!strncmp ("    do_something ();", source_line.get_buffer (),
+                            source_line.length ()));
+
+      source_line
+       = fc.get_source_line_preprocessed ("test.h", tmp.get_filename (), 35);
+      ASSERT_TRUE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () != NULL);
+      ASSERT_EQ (25, source_line.length ());
+      ASSERT_TRUE (!strncmp ("    do_something_else ();",
+                            source_line.get_buffer (), source_line.length ()));
+
+      source_line
+       = fc.get_source_line_preprocessed ("test.h", tmp.get_filename (), 39);
+      ASSERT_TRUE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () != NULL);
+      ASSERT_EQ (17, source_line.length ());
+      ASSERT_TRUE (!strncmp ("    test_func ();", source_line.get_buffer (),
+                            source_line.length ()));
+
+      source_line
+       = fc.get_source_line_preprocessed ("test.hpp", tmp.get_filename (), 35);
+      ASSERT_TRUE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () != NULL);
+      ASSERT_EQ (18, source_line.length ());
+      ASSERT_TRUE (!strncmp ("    do_nothing ();", source_line.get_buffer (),
+                            source_line.length ()));
+
+      // file not present in preprocessor output
+      source_line
+       = fc.get_source_line_preprocessed ("other.h", tmp.get_filename (), 4);
+      ASSERT_FALSE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+      // empty lines omitted by preprocessor
+      source_line
+       = fc.get_source_line_preprocessed ("test.h", tmp.get_filename (), 2);
+      ASSERT_FALSE (source_line);
+      ASSERT_TRUE (source_line.get_buffer () == NULL);
+    }
+
+  // Verify that the cache is working as expected
+  // line not previously read but all linemarkers are in the cache
+  char_span source_line
+    = fc.get_source_line_preprocessed ("test.cpp", tmp.get_filename (), 6);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_TRUE (!strncmp ("    int i = 5;", source_line.get_buffer (),
+                        source_line.length ()));
+}
+
 /* Verify reading from buffers (e.g. for sarif-replay).  */
 
 static void
@@ -4357,6 +5050,9 @@ input_cc_tests ()
   for_each_line_table_case (test_lexer_char_constants);
 
   test_reading_source_line ();
+  test_parsing_linemarker ();
+  test_linemarker_cache ();
+  test_reading_source_line_preprocessed ();
   test_reading_source_buffer ();
   test_replacement ();
 
diff --git a/gcc/input.h b/gcc/input.h
index b0a1ca0f58f..86110739ce7 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -155,6 +155,8 @@ class file_cache
 
   char_span get_source_file_content (const char *file_path);
   char_span get_source_line (const char *file_path, int line);
+  char_span get_source_line_preprocessed (const char *source_name,
+                                         const char *file_path, int line);
   bool missing_trailing_newline_p (const char *file_path);
 
   void add_buffered_content (const char *file_path,
diff --git a/gcc/opts-global.cc b/gcc/opts-global.cc
index b9b42d3b233..4a8d0d6ca1d 100644
--- a/gcc/opts-global.cc
+++ b/gcc/opts-global.cc
@@ -231,6 +231,10 @@ read_cmdline_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
          if (opts->x_main_input_filename == NULL)
            {
              opts->x_main_input_filename = decoded_options[i].arg;
+             // remember original input filename in diagnostic context
+             // because if a preprocessed file is compiled, this is
+             // changed to the original source file name later
+             dc->m_main_input_file_path = opts->x_main_input_filename;
              opts->x_main_input_baselength
                = base_of_path (opts->x_main_input_filename,
                                &opts->x_main_input_basename);
diff --git a/gcc/testsuite/g++.dg/lookup/missing-std-include-11.C 
b/gcc/testsuite/g++.dg/lookup/missing-std-include-11.C
index ec2c494c557..5128f415c43 100644
--- a/gcc/testsuite/g++.dg/lookup/missing-std-include-11.C
+++ b/gcc/testsuite/g++.dg/lookup/missing-std-include-11.C
@@ -40,4 +40,4 @@ int main ()
 }
 // { dg-additional-files "missing-std-include-10.h" }
 // { dg-regexp {[^\n]*: error: 'strcmp' was not declared in this scope\n 
*return strcmp [^\n]*;\n *\^~*\n} }
-// { dg-regexp {[^\n]* note: 'strcmp' is defined in header[^\n]*\n #include 
"missing-std-include-10.h"\n\+#include <cstring>\n // HERE\n} }
+// { dg-regexp {[^\n]* note: 'strcmp' is defined in header[^\n]*\n} }
-- 
2.35.3

Reply via email to