https://gcc.gnu.org/g:8a155c170b443e332b3db4922f0f0d588a17604f
commit r16-5170-g8a155c170b443e332b3db4922f0f0d588a17604f Author: David Malcolm <[email protected]> Date: Tue Nov 11 10:20:47 2025 -0500 diagnostics: add experimental SARIF JSON-RPC notifications for IDEs [PR115970] https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3358r0.html#msvc describes a feature of Visual Studio 2022 version 17.8. which can send its diagnostics in SARIF form to a pipe when setting the environment variable SARIF_OUTPUT_PIPE: https://learn.microsoft.com/en-us/cpp/build/reference/sarif-output?view=msvc-170#retrieving-sarif-through-a-pipe The precise mechanism above involves Windows-specific details (windows pipes and HANDLEs). The following patch implements an analogous feature for GCC, using Unix domain sockets rather than the Windows-specific details. With this patch, GCC's cc1, cc1plus, etc will check if EXPERIMENTAL_SARIF_SOCKET is set in the environment, and if so, will attempt to connect to that socket. It will send a JSON-RPC notification to the socket for every diagnostic emitted. Like the MSVC feature, the diagnostics are sent one-at-a-time as SARIF "result" objects, rather than sending a full SARIF "log" object. The patch includes a python test script which runs a server. Tested by running the script in one terminal: $ ../../src/contrib/sarif-listener.py listening on socket: /tmp/tmpjgts0u0i/socket and then invoking a build in another terminal with the envvar set to the pertinent socket: $ EXPERIMENTAL_SARIF_SOCKET=/tmp/tmpjgts0u0i/socket \ make check-gcc RUNTESTFLAGS="analyzer.exp=*" and watching as all the diagnostics generated during the build get sent to the listener. The idea is that an IDE ought to be able to create a socket and set the environment variable when invoking a build, and then listen for all the diagnostics, without needing to manually set build flags to inject SARIF output. This feature is experimental and subject to change or removal without notice; I'm adding it to make it easier for IDE developers to try it out and give feedback. contrib/ChangeLog: PR diagnostics/115970 * sarif-listener.py: New file. gcc/ChangeLog: PR diagnostics/115970 * diagnostics/sarif-sink.cc: Include <sys/un.h> and <sys/socket.h>. (sarif_builder::end_group): Update comment. (sarif_sink::on_end_group): Drop "final". (class sarif_socket_sink): New subclass. (maybe_open_sarif_sink_for_socket): New function. * diagnostics/sarif-sink.h: (maybe_open_sarif_sink_for_socket): New decl. * doc/invoke.texi (EXPERIMENTAL_SARIF_SOCKET): Document new environment variable. * toplev.cc: Define INCLUDE_VECTOR. Add include of "diagnostics/sarif-sink.h". (toplev::main): Call diagnostics::maybe_open_sarif_sink_for_socket. Signed-off-by: David Malcolm <[email protected]> Diff: --- contrib/sarif-listener.py | 41 +++++++++++++ gcc/diagnostics/sarif-sink.cc | 135 +++++++++++++++++++++++++++++++++++++++++- gcc/diagnostics/sarif-sink.h | 3 + gcc/doc/invoke.texi | 13 ++++ gcc/toplev.cc | 4 ++ 5 files changed, 194 insertions(+), 2 deletions(-) diff --git a/contrib/sarif-listener.py b/contrib/sarif-listener.py new file mode 100755 index 000000000000..92e38a698c24 --- /dev/null +++ b/contrib/sarif-listener.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# Example listener for SARIF output to a socket. +# Copyright (C) 2025 Free Software Foundation, Inc. +# Contributed by David Malcolm <[email protected]>. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +from pathlib import Path +from socketserver import UnixStreamServer, StreamRequestHandler, ThreadingMixIn +import tempfile + +class MyHandler(StreamRequestHandler): + def handle(self): + notification = self.rfile.read() + print(notification) + +class ThreadedUnixStreamServer(ThreadingMixIn, UnixStreamServer): + pass + +if __name__ == '__main__': + with tempfile.TemporaryDirectory() as tmpdir: + socket_path = Path(tmpdir) / 'socket' + with ThreadedUnixStreamServer(socket_path.as_posix(), + MyHandler) as server: + print('listening on socket: %s' % socket_path) + server.serve_forever() diff --git a/gcc/diagnostics/sarif-sink.cc b/gcc/diagnostics/sarif-sink.cc index 8bd026901c6b..ac0a27472551 100644 --- a/gcc/diagnostics/sarif-sink.cc +++ b/gcc/diagnostics/sarif-sink.cc @@ -55,6 +55,8 @@ along with GCC; see the file COPYING3. If not see #include "backtrace.h" #include "xml.h" #include "intl.h" +#include <sys/un.h> +#include <sys/socket.h> namespace diagnostics { @@ -1917,7 +1919,8 @@ sarif_builder::emit_diagram (const diagram &d) m_cur_group_result->on_diagram (d, *this); } -/* Implementation of "end_group_cb" for SARIF output. */ +/* Implementation of "end_group_cb" for SARIF output. + Append the current sarifResult to results, and set it to nullptr. */ void sarif_builder::end_group () @@ -4011,7 +4014,7 @@ public: { /* No-op, */ } - void on_end_group () final override + void on_end_group () override { m_builder.end_group (); } @@ -4123,6 +4126,93 @@ private: output_file m_output_file; }; +class sarif_socket_sink : public sarif_sink +{ +public: + sarif_socket_sink (context &dc, + const line_maps *line_maps, + std::unique_ptr<sarif_serialization_format> serialization_format, + const sarif_generation_options &sarif_gen_opts, + int fd) + : sarif_sink (dc, line_maps, + std::move (serialization_format), + sarif_gen_opts), + m_fd (fd) + { + } + ~sarif_socket_sink () + { + close (m_fd); + } + void dump_kind (FILE *out) const override + { + fprintf (out, "sarif_socket_sink: fd=%i", m_fd); + } + bool machine_readable_stderr_p () const final override + { + return false; + } + + /* Rather than appending it to the results array, instead + send it to the output socket as a JSON-RPC 2.0 notification. */ + void on_end_group () final override + { + // TODO: what about buffering? + + std::unique_ptr<sarif_result> result = m_builder.take_current_result (); + if (!result) + return; + + auto notification = std::make_unique<json::object> (); + notification->set_string ("jsonrpc", "2.0"); + notification->set_string ("method", "OnSarifResult"); + { + auto params = std::make_unique<json::object> (); + params->set ("result", std::move (result)); + notification->set ("params", std::move (params)); + } + + send_rpc_notification (*notification); + } + +private: + void + send_rpc_notification (const json::object ¬ification) + { + DIAGNOSTICS_LOG_SCOPE_PRINTF0 (m_context.get_logger (), + "sarif_socket_sink::send_rpc_notification"); + const bool formatted = false; + pretty_printer pp_content; + notification.print (&pp_content, formatted); + size_t content_length = strlen (pp_formatted_text (&pp_content)); + + pretty_printer pp_header; +#if __GNUC__ >= 10 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-diag" +#endif + pp_printf (&pp_header, "Content-Length: %li\n\n", content_length); +#if __GNUC__ >= 10 +# pragma GCC diagnostic pop +#endif + size_t header_length = strlen (pp_formatted_text (&pp_header)); + + size_t output_size = header_length + content_length; + char *buf = (char *)xmalloc (output_size); + memcpy (buf, pp_formatted_text (&pp_header), header_length); + memcpy (buf + header_length, + pp_formatted_text (&pp_content), + content_length); + + /* TODO: should we attempt to handle partial writes here? */ + write (m_fd, buf, output_size); + + free (buf); + } + + int m_fd; +}; + /* Print the start of an embedded link to PP, as per 3.11.6. */ static void @@ -4448,6 +4538,47 @@ sarif_generation_options::dump (FILE *outfile, int indent) const DIAGNOSTICS_DUMPING_EMIT_BOOL_FIELD (m_state_graph); } +void +maybe_open_sarif_sink_for_socket (context &dc) +{ + gcc_assert (line_table); + + const char * const env_var_name = "EXPERIMENTAL_SARIF_SOCKET"; + const char * const socket_name = getenv (env_var_name); + if (!socket_name) + return; + + int sfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (sfd == -1) + fatal_error (UNKNOWN_LOCATION, + "unable to create socket"); + + struct sockaddr_un addr; + memset (&addr, 0, sizeof (addr)); + addr.sun_family = AF_UNIX; + strncpy (addr.sun_path, socket_name, sizeof (addr.sun_path) - 1); + + if (connect (sfd, (struct sockaddr *)&addr, sizeof (addr)) == -1) + fatal_error (UNKNOWN_LOCATION, + "unable to connect to %qs", + socket_name); + + /* TODO: should there be a way to specify other key/value + pairs here? (as per -fdiagnostics-add-output, but as an + environment variable, perhaps). */ + sarif_generation_options sarif_gen_opts; + sarif_gen_opts.m_version = sarif_version::v2_1_0; + + auto sink_ = std::make_unique<sarif_socket_sink> + (dc, + line_table, + std::make_unique <sarif_serialization_format_json> (true), + sarif_gen_opts, + sfd); + sink_->update_printer (); + dc.add_sink (std::move (sink_)); +} + #if CHECKING_P namespace selftest { diff --git a/gcc/diagnostics/sarif-sink.h b/gcc/diagnostics/sarif-sink.h index e6f897b1f42e..5a9914ac571c 100644 --- a/gcc/diagnostics/sarif-sink.h +++ b/gcc/diagnostics/sarif-sink.h @@ -188,6 +188,9 @@ extern std::unique_ptr<sarif_edge> make_sarif_edge (const digraphs::edge &e, sarif_builder *builder); +extern void +maybe_open_sarif_sink_for_socket (context &ctxt); + } // namespace diagnostics #endif /* ! GCC_DIAGNOSTICS_SARIF_SINK_H */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e1a8134c1064..8dacd080c5a9 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -38425,6 +38425,19 @@ As @code{fixits-v1}, but columns are expressed as display columns, as per @option{-fdiagnostics-column-unit=display}. @end table +@vindex EXPERIMENTAL_SARIF_SOCKET +@item EXPERIMENTAL_SARIF_SOCKET +If @env{EXPERIMENTAL_SARIF_SOCKET} is set in the environment, then the +compiler will attempt to connect to a UNIX domain stream socket with +that name, and send an @code{OnSarifResult} JSON-RPC 2.0 notification to +it for each diagnostic that occurs, where the value of the notification +is a SARIF @code{result} object. + +The compiler will fail immediately if @env{EXPERIMENTAL_SARIF_SOCKET} is +set and it cannot connect to it. + +This feature is experimental and subject to change or removal without +notice. @end table @noindent diff --git a/gcc/toplev.cc b/gcc/toplev.cc index d26467450e37..bcf1b7604c83 100644 --- a/gcc/toplev.cc +++ b/gcc/toplev.cc @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see in the proper order, and counts the time used by each. Error messages and low-level interface to malloc also handled here. */ +#define INCLUDE_VECTOR #include "config.h" #include "system.h" #include "coretypes.h" @@ -83,6 +84,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-offload.h" #include "diagnostics/changes.h" #include "diagnostics/file-cache.h" +#include "diagnostics/sarif-sink.h" #include "tree-pass.h" #include "dumpfile.h" #include "ipa-fnsummary.h" @@ -2352,6 +2354,8 @@ toplev::main (int argc, char **argv) handle_common_deferred_options (); + diagnostics::maybe_open_sarif_sink_for_socket (*global_dc); + init_local_tick (); initialize_plugins ();
