This implements a new "policy" class to read a description of a set of checkers to run, along with a "checker" class to handle actually running the checkers, ouputting the results through gcc's diagnostic subsystem, and watermarking the generated binary with the results and metadata.
Caveats: * there's currently no way to express suppressions (e.g. "run clang-analyzer, but ignore errors foo, bar, and baz"); that said, it *does* capture that metadata about the diagnostics. I'm thinking of something like a 4-state enum value per test id: - error: hard error that fails the build - warn: warn, but don't fail the build - log: capture within watermark, but don't warn - drop: ignore altogether defaulting to "error". * the policy is read from a monolithic JSON file; this format is clunky for users to work with, and probably would be easier to do as simple gcc options, one per checker * to save time, some of this code relies on stuff within "selftest", which would need moving out of there for release builds * there are quite a few other FIXMEs in this patch gcc/ChangeLog: * Makefile.in (OBJS): Add checkers.o. * checkers.cc: New file. * checkers.h: New file. * selftest-run-tests.c (selftest::run_tests): Call selftest::checkers_cc_tests. * selftest.h (selftest::checkers_cc_tests): New decl. gcc/testsuite/ChangeLog: * selftests/checker-policy/test-policy.json: New file. --- gcc/Makefile.in | 1 + gcc/checkers.cc | 736 +++++++++++++++++++++ gcc/checkers.h | 26 + gcc/selftest-run-tests.c | 1 + gcc/selftest.h | 1 + .../selftests/checker-policy/test-policy.json | 7 + 6 files changed, 772 insertions(+) create mode 100644 gcc/checkers.cc create mode 100644 gcc/checkers.h create mode 100644 gcc/testsuite/selftests/checker-policy/test-policy.json diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 319e3f3..189833e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1236,6 +1236,7 @@ OBJS = \ cfgloopanal.o \ cfgloopmanip.o \ cfgrtl.o \ + checkers.o \ symtab.o \ cgraph.o \ cgraphbuild.o \ diff --git a/gcc/checkers.cc b/gcc/checkers.cc new file mode 100644 index 0000000..1a16799 --- /dev/null +++ b/gcc/checkers.cc @@ -0,0 +1,736 @@ +/* Running 3rd-party code analysis tools. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "options.h" +#include "diagnostic.h" +#include "selftest.h" +#include "firehose.h" +#include "json.h" +#include <pthread.h> +#include "checkers.h" +#include "annobin.h" +#include "cpplib.h" +#include "incpath.h" + + +static bool +diagnostic_at_rich_loc_va (rich_location *richloc, + diagnostic_info *diagnostic, + const char *gmsgid, + va_list *ap) ATTRIBUTE_GCC_DIAG(3,0); + +static bool +diagnostic_at (location_t loc, diagnostic_info *diagnostic, + const char *gmsgid, ...) ATTRIBUTE_GCC_DIAG(3,4); + +/* Print any trace of states associated with ISSUE. */ + +static void +print_any_trace (const firehose::issue &issue) +{ + if (!issue.m_trace) + return; + if (0) + inform (UNKNOWN_LOCATION, "got trace"); + + /* Filter out any states within the trace that don't have text. */ + issue.m_trace->filter (); + + /* If we're just left with a single state that duplicates what we + already printed for the issue, don't bother printing it. */ + if (issue.m_trace->is_redundant_p (issue)) + return; + + int i; + firehose::state *s; + int num_states = issue.m_trace->m_states.length (); + FOR_EACH_VEC_ELT (issue.m_trace->m_states, i, s) + { + if (s->m_notes) + inform (s->m_location, "state %i of %i: %s", i + 1, num_states, + s->m_notes); + else + inform (s->m_location, "state %i of %i", i + 1, num_states); + } +} + +/* FIXME. */ + +bool +diagnostic_at_rich_loc_va (rich_location *richloc, + diagnostic_info *diagnostic, + const char *gmsgid, + va_list *ap) +{ + gcc_assert (richloc); + gcc_assert (diagnostic); + gcc_assert (gmsgid); + gcc_assert (ap); + + diagnostic_t kind = diagnostic->kind; +#if 0 + if (kind == DK_PERMERROR) + { + diagnostic_set_info (diagnostic, gmsgid, ap, richloc, + permissive_error_kind (global_dc)); + diagnostic.option_index = permissive_error_option (global_dc); + } + else +#endif + { + diagnostic_set_info (diagnostic, gmsgid, ap, richloc, kind); +#if 0 + if (kind == DK_WARNING || kind == DK_PEDWARN) + diagnostic.option_index = opt; +#endif + } + return diagnostic_report_diagnostic (global_dc, diagnostic); +} + +/* Emit DIAGNOSTIC at LOC. */ + +bool +diagnostic_at (location_t loc, diagnostic_info *diagnostic, + const char *gmsgid, ...) +{ + va_list ap; + va_start (ap, gmsgid); + rich_location richloc (line_table, loc); + bool result = diagnostic_at_rich_loc_va (&richloc, diagnostic, gmsgid, &ap); + va_end (ap); + return result; +} + +/* Emit a diagnostic for ISSUE. */ + +static void +handle_issue (const firehose::analysis &analysis, + const firehose::issue &issue) +{ + diagnostic_info diagnostic; + diagnostic.kind = DK_ERROR; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = issue.m_testid; + diagnostic_at (issue.m_location, &diagnostic, "%s", + issue.m_message); + print_any_trace (issue); +} + +/* Emit a diagnostic for INFO. */ + +static void +handle_info (const firehose::analysis &analysis, + const firehose::info &info) +{ + diagnostic_info diagnostic; + diagnostic.kind = DK_NOTE; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = info.m_infoid; + diagnostic_at (info.m_location, &diagnostic, "%s", + info.m_message); +} + +/* Emit a diagnostic for FAILURE. */ + +static void +handle_failure (const firehose::analysis &analysis, + const firehose::failure &failure) +{ + diagnostic_info diagnostic; + diagnostic.kind = DK_ERROR; + diagnostic.external_tool = analysis.m_metadata.m_generator.m_name; + diagnostic.external_test_id = failure.m_failureid; + diagnostic_at (failure.m_location, &diagnostic, "%s", + failure.m_message); +} + +/* FIXME: taken from jit-playback.c. */ + +/* A subclass of auto_vec <char *> that frees all of its elements on + deletion. */ + +class auto_argvec : public auto_vec <char *> +{ + public: + ~auto_argvec (); +}; + +/* auto_argvec's dtor, freeing all contained strings, automatically + chaining up to ~auto_vec <char *>, which frees the internal buffer. */ + +auto_argvec::~auto_argvec () +{ + int i; + char *str; + FOR_EACH_VEC_ELT (*this, i, str) + free (str); +} + +/* A struct to hold the results from a checker-invocation thread. */ + +struct thread_result +{ + thread_result (char *utf8_buffer, char *err) + : m_utf8_buffer (utf8_buffer), m_err (err) {} + + ~thread_result () { free (m_utf8_buffer); free (m_err); } + + char *m_utf8_buffer; + char *m_err; +}; + +/* A particular checker to run. + A "checker" is an executable which takes GCC-style command-line + arguments and writes a Firehose JSON file to stdout. */ + +class checker +{ + public: + checker (); + ~checker (); + + static checker *from_json (const json::value *jv, char *&out_err); + + void start (); + void finish (); + void run_single_threaded (); + + const char *get_executable () const { return m_executable; } + + const char *get_output () const { return m_utf8_buffer; } + const json::value *get_json_output () const { return m_jv; } + + private: + static void *run_checker_thread (void *ptr); + thread_result *run_in_thread (); + char *capture_stdout (char *&out_err); + void make_args (auto_argvec &out) const; + void handle_json (); + + char *m_executable; + pthread_t m_tid; + + char *m_utf8_buffer; + json::value *m_jv; +}; + +/* A policy, listing which checkers to run. */ + +class policy +{ + public: + ~policy (); + + bool read_from_file (const char *path, char *&out_err); + + auto_vec<checker *> m_checkers; +}; + +/* checker's ctor. */ + +checker::checker () : m_executable (NULL), m_utf8_buffer (NULL), m_jv (NULL) +{ +} + +/* checker's dtor. */ + +checker::~checker () +{ + free (m_executable); + free (m_utf8_buffer); + delete m_jv; +} + +/* Attempt to allocate a new checker based on JV. + On failure, return NULL and write to OUT_ERR (which must be freed + by the caller). */ + +checker * +checker::from_json (const json::value *jv, char *&out_err) +{ + checker *ch = new checker (); + + const char *executable; + if (!jv->get_string_by_key ("executable", executable, out_err)) + { + delete ch; + return NULL; + } + ch->m_executable = xstrdup (executable); + + // FIXME: languages + + return ch; +} + +/* Callback to pthread_create, for running one checker within a thread. + This is the entrypoint of the per-checker thread. */ + +void * +checker::run_checker_thread (void *ptr) +{ + checker *ch = static_cast <checker *> (ptr); + return ch->run_in_thread (); +} + +/* Create a thread for this checker, calling run_in_thread within it. + This is run on the main thread, called by checkers_start. */ + +void +checker::start () +{ + pthread_create (&m_tid, + NULL, + run_checker_thread, + this); +} + +/* Wait for this checker's thread to finish, and call + handle_json on the UTF-8 JSON result. + + This is run on the main thread, called by checkers_finish. */ + +void +checker::finish () +{ + /* Wait for the thread to finish. */ + void *retval; + if (int err = pthread_join (m_tid, &retval)) + { + error_at (UNKNOWN_LOCATION, "error invoking checker %qs: pthread_join failed: %i", + m_executable, err); + return; + } + thread_result *result = static_cast <thread_result *> (retval); + + /* Process the output. */ + if (!result->m_utf8_buffer) + { + error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs", + m_executable, result->m_err); + delete result; + return; + } + + m_utf8_buffer = result->m_utf8_buffer; + result->m_utf8_buffer = NULL; + handle_json (); + delete result; +} + +/* This is the "main" routine of the per-checker thread when + run in multithreaded mode. + Attempt to run the checker in a subprocess, and return the stdout + and any error messages. */ + +thread_result * +checker::run_in_thread () +{ + char *err = NULL; + char *utf8_buffer = capture_stdout (err); + + return new thread_result (utf8_buffer, err); +} + +/* This is the single-threaded way to invoke a checker. + Run the checker in a subprocess, capture its stdout as UTF-8 JSON, + and call handle_json on it. */ + +void +checker::run_single_threaded () +{ + char *err = NULL; + m_utf8_buffer = capture_stdout (err); + if (!m_utf8_buffer) + { + error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs", + m_executable, err); + free (err); + return; + } + handle_json (); +} + +/* Run the checker, capturing its stdout. + + Return a buffer containing the captured stdout, which must be freed + by the caller. + + This can be run either on the main thread, or within the + per-checker thread. */ + +// FIXME: this assumes that pex is thread-safe; is it? + +char * +checker::capture_stdout (char *&out_err) +{ + auto_argvec argvec; + + make_args (argvec); + + /* pex argv arrays are NULL-terminated. */ + argvec.safe_push (NULL); + + struct pex_obj *obj; + const char *errmsg; + int exit_status = 0; + int err = 0; + + obj = pex_init (0, progname, NULL); + + errmsg = pex_run (obj, + PEX_SEARCH | PEX_USE_PIPES, /* int flags, */ + m_executable, /* const char *executable, */ + const_cast <char *const *> (argvec.address ()), /* argv, */ + NULL, /* const char *outname */ + NULL, /* const char *errname */ + &err); /* int *err*/ + if (errmsg == NULL) + { + if (!pex_get_status (obj, 1, &exit_status)) + { + err = 0; + out_err = xstrdup ("pex_get_status failed"); + } + } + + FILE *outf = pex_read_output (obj, 0); + if (!outf) + { + out_err = xstrdup ("unable to read stdout"); + return NULL; + } + /* "outf" is owned by "obj". */ + + // FIXME: use something other than a selftest API for this! + char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, outf, + "stdout from checker"); + + pex_free (obj); + + if (errmsg) + { + out_err = xstrdup (errmsg); + return NULL; + } + if (exit_status || err) + { + out_err = xasprintf ("exit_status: %i err: %i", + exit_status, err); + return NULL; + } + + return utf8_buffer; +} + +/* Subroutine of checker::capture_stdout. + This can be run either on the main thread, or within the + per-checker thread. */ + +void +checker::make_args (auto_argvec &out) const +{ +#define ADD_ARG(arg) out.safe_push (xstrdup (arg)) + + ADD_ARG (m_executable); + + /* FIXME: for some reason the clang-analyzer harness attempts to invoke cc1 + and fails if it can't find it. */ + ADD_ARG ("-B."); + + /* Provide -I arguments. */ + /* FIXME: other kinds of cpp_dir? */ + for (cpp_dir *dir = get_added_cpp_dirs (QUOTE); dir; dir = dir->next) + { + char *dash_i_arg = concat ("-I", dir->name, NULL); + out.safe_push (dash_i_arg); + } + + /* FIXME: supply -D args. */ + + ADD_ARG ("-c"); + ADD_ARG (main_input_filename); + +#undef ADD_ARG +} + +/* Given UTF8_BUFFER, a non-NULL UTF-8-encoded buffer of JSON output + from a tool, in Firehose JSON format, emit the results through GCC's + diagnostic subsystem. + This must be run on the main thread (firehose::analysis::from_json can + touch the global line_table, and error_at and the other diagnostic + emission requires the main thread). */ + +void +checker::handle_json () +{ + /* Attempt to parse the buffer as UTF-8-encoded JSON. */ + char *err = NULL; + m_jv = json::parse_utf8_string (m_utf8_buffer, &err); + if (!m_jv) + { + gcc_assert (err); + error_at (UNKNOWN_LOCATION, + "unable to parse tool output as UTF-8 JSON: %s", err); + free (err); + return; + } + + gcc_assert (err == NULL); + + if (0) + { + m_jv->dump(stderr); + fprintf (stderr, "\n"); + } + + /* Attempt to parse the JSON values into Firehose objects. */ + firehose::analysis analysis; + analysis.from_json (m_jv, err); + if (err) + { + error_at (UNKNOWN_LOCATION, "error parsing JSON output: %qs", err); + free (err); + return; + } + + gcc_assert (err == NULL); + + /* Emit the results as GCC diagnostics. */ + int i; + firehose::result *result; + FOR_EACH_VEC_ELT (analysis.m_results, i, result) + { + switch (result->get_kind ()) + { + case firehose::result::FIREHOSE_ISSUE: + handle_issue (analysis, *static_cast <firehose::issue *> (result)); + break; + case firehose::result::FIREHOSE_INFO: + handle_info (analysis, *static_cast <firehose::info *> (result)); + break; + case firehose::result::FIREHOSE_FAILURE: + handle_failure (analysis, *static_cast <firehose::failure *> (result)); + break; + default: + gcc_unreachable (); + } + } +} + +/* policy's dtor. */ + +policy::~policy () +{ + int i; + checker *checker; + FOR_EACH_VEC_ELT (m_checkers, i, checker) + delete checker; +} + +/* Load policy from the JSON file at PATH. + If successful, return true. + Othewise, return false, writing to out_err (the caller + must free the string). */ + +bool +policy::read_from_file (const char *path, char *&out_err) +{ + // FIXME: this shouldn't be just in the selftests + char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, path); + // FIXME: error-checking + + /* Attempt to parse the buffer as UTF-8-encoded JSON. */ + json::value *jv = json::parse_utf8_string (utf8_buffer, &out_err); + if (!jv) + { + free (utf8_buffer); + return false; + } + + /* Convert to a policy object. */ + const json::array *arr = jv->as_array (); + if (!arr) + { + out_err = xstrdup ("not an array"); + delete jv; + return false; + } + + for (unsigned i = 0; i < arr->get_length (); i++) + { + checker *ch = checker::from_json (arr->get (i), out_err); + if (!ch) + { + delete jv; + return false; + } + m_checkers.safe_push (ch); + } + + delete jv; + return true; +} + +/* Interface for use by toplev.c */ + +static bool use_threads = true; // FIXME; move to class policy? +static policy *the_policy = NULL; + +/* Called near the beginning of toplev.c. + + Load a policy file from PATH. + If using threads, invoke the checkers specified by the policy, + each with their own thread reading the stdout from the checker. */ + +void +checkers_start (const char *path) +{ + the_policy = new policy (); + + /* Try to load a policy file. */ + char *err = NULL; + if (!the_policy->read_from_file (path, err)) + { + error_at (UNKNOWN_LOCATION, + "unable to load checker policy %qs: %qs", + path, err); + free (err); + return; + } + + /* If using threads, start the checkers specified by the policy now, + each on their own thread. */ + if (use_threads) + { + int i; + checker *ch; + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->start (); + } +} + +class note_buffer : public auto_vec<char> +{ + public: + void push_string (const char *str) + { + while (char ch = *str++) + safe_push (ch); + } +}; + +/* Called near the end of toplev.c. + + If using threads, wait for each checker thread to finish, and + process the results. + Otherwise, run each checker now in the main thread, sequentially, + processing the results. */ + +void +checkers_finish () +{ + int i; + checker *ch; + + /* This should have been created in checkers_start. */ + gcc_assert (the_policy); + + if (use_threads) + { + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->finish (); + } + else + { + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + ch->run_single_threaded (); + } + + /* Watermark the binary with the analysis results/metadata. */ + { + json::array *all_results = new json::array (); + FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch) + if (ch->get_json_output ()) + all_results->append (ch->get_json_output ()->clone ()); + else + { + /* FIXME: what to do about recording failures? presumably we should do these + in JSON format also. */ + } + char *all_results_str = all_results->to_str (); + + /* annobin_output_string_note uses ".asciz" to write the "name", without + escaping newlines, quotes, or backslashes. Hence we have to use + annobin_output_note directly, with name_is_string=false, which + uses ".dc.b" to write the name. */ + size_t len = strlen (all_results_str); + char *buffer = (char *) xmalloc (len + 5); + sprintf (buffer, "GA%c%c%s", GNU_BUILD_ATTRIBUTE_TYPE_STRING, + GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS, all_results_str); + free (all_results_str); + + annobin_output_note (buffer, len + 5, false, "static analysis results", + NULL, 0, false, NT_GNU_BUILD_ATTRIBUTE_OPEN); + + free (buffer); + } + + delete the_policy; + the_policy = NULL; +} + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +/* Verify that we can load a policy file. */ + +static void +test_policy_parsing () +{ + char *filename = locate_file ("checker-policy/test-policy.json"); + char *err = NULL; + policy p; + bool success = p.policy::read_from_file (filename, err); + ASSERT_TRUE (success); + ASSERT_EQ (NULL, err); + free (filename); + + ASSERT_EQ (4, p.m_checkers.length ()); + ASSERT_STREQ ("../../src/checkers/clang_analyzer.py", + p.m_checkers[0]->get_executable ()); +} + +/* Run all of the selftests within this file. */ + +void +checkers_cc_tests () +{ + test_policy_parsing (); +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/checkers.h b/gcc/checkers.h new file mode 100644 index 0000000..f023871 --- /dev/null +++ b/gcc/checkers.h @@ -0,0 +1,26 @@ +/* Running 3rd-party code analysis tools. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_CHECKERS_H +#define GCC_CHECKERS_H + +extern void checkers_start (const char *path); +extern void checkers_finish (); + +#endif /* GCC_CHECKERS_H. */ diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index 8afcd43..18c50a6 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -75,6 +75,7 @@ selftest::run_tests () rtl_tests_c_tests (); read_rtl_function_c_tests (); firehose_cc_tests (); + checkers_cc_tests (); /* Higher-level tests, or for components that other selftests don't rely on. */ diff --git a/gcc/selftest.h b/gcc/selftest.h index 541bb71..0fe2c57 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -178,6 +178,7 @@ extern const char *path_to_selftest_files; /* Declarations for specific families of tests (by source file), in alphabetical order. */ extern void bitmap_c_tests (); +extern void checkers_cc_tests (); extern void diagnostic_c_tests (); extern void diagnostic_show_locus_c_tests (); extern void edit_context_c_tests (); diff --git a/gcc/testsuite/selftests/checker-policy/test-policy.json b/gcc/testsuite/selftests/checker-policy/test-policy.json new file mode 100644 index 0000000..90532b2 --- /dev/null +++ b/gcc/testsuite/selftests/checker-policy/test-policy.json @@ -0,0 +1,7 @@ +[{ "executable": "../../src/checkers/clang_analyzer.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/cppcheck.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/flawfinder.py", + "languages": ["c", "c++"] }, + { "executable": "../../src/checkers/ianal.py"}] -- 1.8.5.3