On Sat, 2025-01-25 at 23:30 -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> For larger files the file_cache line index will be spread out to make
> the index fit into the fixed buffer, so any access to the non latest
> line
> will need some skipping of lines.
>
> Most accesses for line are near the latest line because
> a diagnostic is likely near where the scanner is currently lexing.
>
> Add a second cache for recent lines. It is organized as a ring buffer
> and maintains the last 256 lines relative to the last input line.
>
> With that, enabling -Wmisleading-indentation for the test case in
> PR preprocessor/118168, is within the run-to-run variation.
>
> gcc/ChangeLog:
>
> PR preprocessor/118168
> * input.cc (file_cache::m_line_recent,
> m_line_recent_first, m_line_recent_last): Add.
> (file_cache_slot::evict): Clear new fields.
> (file_cache_slot::create): Clear new fields.
> (file_cache_slot::file_cache_slot): Initialize new fields.
> (file_cache_slot::~file_cache_slot): Release m_line_recent.
> (file_cache_slot::get_next_line): Maintain ring buffer of
> lines
> in m_line_recent.
> (file_cache_slot::read_line_num): Use m_line_recent to look
> up
> recent lines quickly.
> ---
> gcc/input.cc | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/input.cc b/gcc/input.cc
> index 82a58fdef27..b3fe38eb77c 100644
> --- a/gcc/input.cc
> +++ b/gcc/input.cc
> @@ -126,6 +126,7 @@ public:
>
> static const size_t buffer_size = 4 * 1024;
> static size_t line_record_size;
> + static size_t recent_cached_lines_shift;
>
> /* The number of time this file has been accessed. This is used
> to designate which file cache to evict from the cache
> @@ -174,6 +175,13 @@ public:
> this is scaled down dynamically, with the line_info becoming
> anchors. */
> vec<line_info, va_heap> m_line_record;
>
> + /* A cache of the recently seen lines. This is maintained as a
> ring
> + buffer. */
> + vec<line_info, va_heap> m_line_recent;
> +
> + /* First and last valid entry in m_line_recent. */
> + size_t m_line_recent_last, m_line_recent_first;
> +
> void offset_buffer (int offset)
> {
> gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
> @@ -187,6 +195,7 @@ public:
> };
>
> size_t file_cache_slot::line_record_size = 100;
> +size_t file_cache_slot::recent_cached_lines_shift = 8;
>
> /* Tune file_cache. */
> void
> @@ -391,6 +400,8 @@ file_cache_slot::evict ()
> m_line_start_idx = 0;
> m_line_num = 0;
> m_line_record.truncate (0);
> + m_line_recent_first = 0;
> + m_line_recent_last = 0;
> m_use_count = 0;
> m_missing_trailing_newline = true;
> }
> @@ -486,6 +497,8 @@ file_cache_slot::create (const
> file_cache::input_context &in_context,
> m_nb_read = 0;
> m_line_start_idx = 0;
> m_line_num = 0;
> + m_line_recent_first = 0;
> + m_line_recent_last = 0;
> m_line_record.truncate (0);
> /* Ensure that this cache entry doesn't get evicted next time
> add_file_to_cache_tab is called. */
> @@ -592,9 +605,13 @@ file_cache::lookup_or_add_file (const char
> *file_path)
> file_cache_slot::file_cache_slot ()
> : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
> m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx
> (0),
> - m_line_num (0), m_missing_trailing_newline (true)
> + m_line_num (0), m_missing_trailing_newline (true),
> + m_line_recent_last (0), m_line_recent_first (0)
> {
> m_line_record.create (0);
> + m_line_recent.create (1U << recent_cached_lines_shift);
> + for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
> + m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
> }
>
> /* Destructor for a cache of file used by caret diagnostic. */
> @@ -613,6 +630,7 @@ file_cache_slot::~file_cache_slot ()
> m_data = 0;
> }
> m_line_record.release ();
> + m_line_recent.release ();
> }
>
> void
> @@ -865,6 +883,20 @@ file_cache_slot::get_next_line (char **line,
> ssize_t *line_len)
> line_end - m_data));
> }
>
> + /* Cache recent tail lines separately for fast access. This
> assumes
> + most accesses do not skip backwards. */
> + if (m_line_recent_last == m_line_recent_first
> + || m_line_recent[m_line_recent_last].line_num == m_line_num
> - 1)
> + {
> + size_t mask = ((size_t)1 << recent_cached_lines_shift) - 1;
> + m_line_recent_last = (m_line_recent_last + 1) & mask;
> + if (m_line_recent_last == m_line_recent_first)
> + m_line_recent_first = (m_line_recent_first + 1) & mask;
> + m_line_recent[m_line_recent_last] =
> + file_cache_slot::line_info (m_line_num, m_line_start_idx,
> + line_end - m_data);
> + }
> +
If I reading this right, calls to get_next_line lead to insertions into
the ring buffer whilst the buffer is empty or the last line in the ring
buffer cache is m_line_num - 1.
There are a few places where we update m_line_num, but this caching
code doesn't seem to touch those places. Should it? I don't know if
the lack of a reset is an issue, but it's an aspect of the patch that's
a bit hazy to me; sorry.
If I'm reading this right, the caching that this adds is only for the
final 256 lines read so far in the file, and lets us use a line offset
relative to the most recent entry to go direct to such a recent line in
the file.
Dave
> /* Update m_line_start_idx so that it points to the next line to
> be
> read. */
> if (next_line_start)
> @@ -910,6 +942,21 @@ file_cache_slot::read_line_num (size_t line_num,
> {
> gcc_assert (line_num > 0);
>
> + /* Is the line in the recent line cache? */
> + if (m_line_recent_first != m_line_recent_last
> + && m_line_recent[m_line_recent_first].line_num <= line_num
> + && m_line_recent[m_line_recent_last].line_num >= line_num)
> + {
> + line_info &last = m_line_recent[m_line_recent_last];
> + size_t mask = (1U << recent_cached_lines_shift) - 1;
> + size_t idx = (m_line_recent_last - (last.line_num - line_num))
> & mask;
> + line_info &recent = m_line_recent[idx];
> + gcc_assert (recent.line_num == line_num);
> + *line = m_data + recent.start_pos;
> + *line_len = recent.end_pos - recent.start_pos;
> + return true;
> + }
> +
> if (line_num <= m_line_num)
> {
> line_info l (line_num, 0, 0);