On Fri, 2022-11-04 at 09:44 -0400, Lewis Hyatt via Gcc-patches wrote: > Add awareness that diagnostic locations may be in generated buffers > rather > than an actual file to other places in the diagnostics code that may > care, > most notably SARIF output (which needs to obtain its own snapshots of > the code > involved). For edit context output, which outputs fixit hints as > diffs, for > now just make sure we ignore generated data buffers. At the moment, > there is > no ability for a fixit hint to be generated in such a buffer. > > Because SARIF uses JSON as well, also add the ability to the > json::string > class to handle a buffer with nulls in the middle (since we place no > restriction on LC_GEN content) by providing the option to specify the > data > length.
Please can you split this patch into three parts: - the SARIF part - the json changes - the edit-context.cc changes (I think this at least counts as an "obvious" change with respect to the other changes in the kit, though I'm still working my way through patch 4 in the kit). Please add a DejaGnu testcase to the SARIF part, with a diagnostic that references a generated data buffer; see gcc/testsuite/c-c++-common/diagnostic-format-sarif-file-*.c for examples of SARIF testcases. Please add a selftest to the json change so that we have a unit test of constructing a json::string with an embedded NUL, and how we serialize such a string (probably to json.cc's test_writing_strings) Thanks Dave > > gcc/ChangeLog: > > * diagnostic-format-sarif.cc (sarif_builder::xloc_to_fb): New > function. > (sarif_builder::maybe_make_physical_location_object): Support > generated data locations. > (sarif_builder::make_artifact_location_object): Likewise. > (sarif_builder::maybe_make_region_object_for_context): > Likewise. > (sarif_builder::make_artifact_object): Likewise. > (sarif_builder::maybe_make_artifact_content_object): > Likewise. > (get_source_lines): Likewise. > * edit-context.cc (edit_context::apply_fixit): Ignore > generated > locations if one should make its way this far. > * json.cc (string::string): Support non-null-terminated > string. > (string::print): Likewise. > * json.h (class string): Likewise. > --- > gcc/diagnostic-format-sarif.cc | 86 +++++++++++++++++++++----------- > -- > gcc/edit-context.cc | 4 ++ > gcc/json.cc | 17 +++++-- > gcc/json.h | 5 +- > 4 files changed, 75 insertions(+), 37 deletions(-) > > diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format- > sarif.cc > index 7110db4edd6..c2d18a1a16e 100644 > --- a/gcc/diagnostic-format-sarif.cc > +++ b/gcc/diagnostic-format-sarif.cc > @@ -125,7 +125,10 @@ private: > json::array *maybe_make_kinds_array (diagnostic_event::meaning m) > const; > json::object *maybe_make_physical_location_object (location_t > loc); > json::object *make_artifact_location_object (location_t loc); > - json::object *make_artifact_location_object (const char > *filename); > + > + typedef std::pair<const char *, unsigned int> filename_or_buffer; > + json::object *make_artifact_location_object (filename_or_buffer > fb); > + > json::object *make_artifact_location_object_for_pwd () const; > json::object *maybe_make_region_object (location_t loc) const; > json::object *maybe_make_region_object_for_context (location_t > loc) const; > @@ -146,16 +149,17 @@ private: > json::object *make_reporting_descriptor_object_for_cwe_id (int > cwe_id) const; > json::object * > make_reporting_descriptor_reference_object_for_cwe_id (int > cwe_id); > - json::object *make_artifact_object (const char *filename); > - json::object *maybe_make_artifact_content_object (const char > *filename) const; > - json::object *maybe_make_artifact_content_object (const char > *filename, > - int start_line, > + json::object *make_artifact_object (filename_or_buffer fb); > + json::object * > + maybe_make_artifact_content_object (filename_or_buffer fb) const; > + json::object *maybe_make_artifact_content_object > (expanded_location xloc, > int end_line) > const; > json::object *make_fix_object (const rich_location &rich_loc); > json::object *make_artifact_change_object (const rich_location > &richloc); > json::object *make_replacement_object (const fixit_hint &hint) > const; > json::object *make_artifact_content_object (const char *text) > const; > int get_sarif_column (expanded_location exploc) const; > + static filename_or_buffer xloc_to_fb (expanded_location xloc); > > diagnostic_context *m_context; > > @@ -166,7 +170,11 @@ private: > diagnostic group. */ > sarif_result *m_cur_group_result; > > - hash_set <const char *> m_filenames; > + /* If the second member is >0, then this is a buffer of generated > content, > + with that length, not a filename. */ > + hash_set <pair_hash <nofree_ptr_hash <const char>, > + int_hash <unsigned int, -1U> > > + > m_filenames; > bool m_seen_any_relative_paths; > hash_set <free_string_hash> m_rule_id_set; > json::array *m_rules_arr; > @@ -588,6 +596,15 @@ sarif_builder::make_location_object (const > diagnostic_event &event) > return location_obj; > } > > +/* Populate a filename_or_buffer pair from an expanded location. */ > +sarif_builder::filename_or_buffer > +sarif_builder::xloc_to_fb (expanded_location xloc) > +{ > + if (xloc.generated_data_len) > + return filename_or_buffer (xloc.generated_data, > xloc.generated_data_len); > + return filename_or_buffer (xloc.file, 0); > +} > + > /* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for > LOC, > or return NULL; > Add any filename to the m_artifacts. */ > @@ -603,7 +620,7 @@ > sarif_builder::maybe_make_physical_location_object (location_t loc) > /* "artifactLocation" property (SARIF v2.1.0 section 3.29.3). */ > json::object *artifact_loc_obj = make_artifact_location_object > (loc); > phys_loc_obj->set ("artifactLocation", artifact_loc_obj); > - m_filenames.add (LOCATION_FILE (loc)); > + m_filenames.add (xloc_to_fb (expand_location (loc))); > > /* "region" property (SARIF v2.1.0 section 3.29.4). */ > if (json::object *region_obj = maybe_make_region_object (loc)) > @@ -627,7 +644,7 @@ > sarif_builder::maybe_make_physical_location_object (location_t loc) > json::object * > sarif_builder::make_artifact_location_object (location_t loc) > { > - return make_artifact_location_object (LOCATION_FILE (loc)); > + return make_artifact_location_object (xloc_to_fb (expand_location > (loc))); > } > > /* The ID value for use in "uriBaseId" properties (SARIF v2.1.0 > section 3.4.4) > @@ -639,10 +656,12 @@ sarif_builder::make_artifact_location_object > (location_t loc) > or return NULL. */ > > json::object * > -sarif_builder::make_artifact_location_object (const char *filename) > +sarif_builder::make_artifact_location_object (filename_or_buffer fb) > { > json::object *artifact_loc_obj = new json::object (); > > + const auto filename = (fb.second ? special_fname_generated () : > fb.first); > + > /* "uri" property (SARIF v2.1.0 section 3.4.3). */ > artifact_loc_obj->set ("uri", new json::string (filename)); > > @@ -795,9 +814,7 @@ > sarif_builder::maybe_make_region_object_for_context (location_t loc) > const > > /* "snippet" property (SARIF v2.1.0 section 3.30.13). */ > if (json::object *artifact_content_obj > - = maybe_make_artifact_content_object (exploc_start.file, > - exploc_start.line, > - exploc_finish.line)) > + = maybe_make_artifact_content_object (exploc_start, > exploc_finish.line)) > region_obj->set ("snippet", artifact_content_obj); > > return region_obj; > @@ -1248,24 +1265,24 @@ sarif_builder::maybe_make_cwe_taxonomy_object > () const > /* Make an artifact object (SARIF v2.1.0 section 3.24). */ > > json::object * > -sarif_builder::make_artifact_object (const char *filename) > +sarif_builder::make_artifact_object (filename_or_buffer fb) > { > json::object *artifact_obj = new json::object (); > > /* "location" property (SARIF v2.1.0 section 3.24.2). */ > - json::object *artifact_loc_obj = make_artifact_location_object > (filename); > + json::object *artifact_loc_obj = make_artifact_location_object > (fb); > artifact_obj->set ("location", artifact_loc_obj); > > /* "contents" property (SARIF v2.1.0 section 3.24.8). */ > if (json::object *artifact_content_obj > - = maybe_make_artifact_content_object (filename)) > + = maybe_make_artifact_content_object (fb)) > artifact_obj->set ("contents", artifact_content_obj); > > /* "sourceLanguage" property (SARIF v2.1.0 section 3.24.10). */ > if (m_context->m_client_data_hooks) > if (const char *source_lang > = m_context->m_client_data_hooks- > >maybe_get_sarif_source_language > - (filename)) > + (fb.first)) > artifact_obj->set ("sourceLanguage", new json::string > (source_lang)); > > return artifact_obj; > @@ -1331,16 +1348,21 @@ maybe_read_file (const char *filename) > full contents of FILENAME. */ > > json::object * > -sarif_builder::maybe_make_artifact_content_object (const char > *filename) const > +sarif_builder::maybe_make_artifact_content_object > (filename_or_buffer fb) const > { > - char *text_utf8 = maybe_read_file (filename); > - if (!text_utf8) > - return NULL; > - > - json::object *artifact_content_obj = new json::object (); > - artifact_content_obj->set ("text", new json::string (text_utf8)); > - free (text_utf8); > - > + json::object *artifact_content_obj = nullptr; > + if (fb.second) > + { > + artifact_content_obj = new json::object (); > + artifact_content_obj->set ("text", new json::string (fb.first, > + > fb.second)); > + } > + else if (char *text_utf8 = maybe_read_file (fb.first)) > + { > + artifact_content_obj = new json::object (); > + artifact_content_obj->set ("text", new json::string > (text_utf8)); > + free (text_utf8); > + } > return artifact_content_obj; > } > > @@ -1348,15 +1370,14 @@ > sarif_builder::maybe_make_artifact_content_object (const char > *filename) const > a freshly-allocated 0-terminated buffer containing them, or > NULL. */ > > static char * > -get_source_lines (const char *filename, > - int start_line, > +get_source_lines (expanded_location xloc, > int end_line) > { > auto_vec<char> result; > > - for (int line = start_line; line <= end_line; line++) > + for (int line = xloc.line; line <= end_line; line++) > { > - char_span line_content = location_get_source_line (filename, > line); > + char_span line_content = location_get_source_line (xloc, > line); > if (!line_content.get_buffer ()) > return NULL; > result.reserve (line_content.length () + 1); > @@ -1370,14 +1391,13 @@ get_source_lines (const char *filename, > } > > /* Make an artifactContent object (SARIF v2.1.0 section 3.3) for the > given > - run of lines within FILENAME (including the endpoints). */ > + run of lines starting at XLOC (including the endpoints). */ > > json::object * > -sarif_builder::maybe_make_artifact_content_object (const char > *filename, > - int start_line, > +sarif_builder::maybe_make_artifact_content_object (expanded_location > xloc, > int end_line) > const > { > - char *text_utf8 = get_source_lines (filename, start_line, > end_line); > + char *text_utf8 = get_source_lines (xloc, end_line); > > if (!text_utf8) > return NULL; > diff --git a/gcc/edit-context.cc b/gcc/edit-context.cc > index 6879ddd41b4..aa95bc0834f 100644 > --- a/gcc/edit-context.cc > +++ b/gcc/edit-context.cc > @@ -301,8 +301,12 @@ edit_context::apply_fixit (const fixit_hint > *hint) > return false; > if (start.column == 0) > return false; > + if (start.generated_data) > + return false; > if (next_loc.column == 0) > return false; > + if (next_loc.generated_data) > + return false; > > edited_file &file = get_or_insert_file (start.file); > if (!m_valid) > diff --git a/gcc/json.cc b/gcc/json.cc > index 974f8c36825..3ebe8495e96 100644 > --- a/gcc/json.cc > +++ b/gcc/json.cc > @@ -190,6 +190,15 @@ string::string (const char *utf8) > { > gcc_assert (utf8); > m_utf8 = xstrdup (utf8); > + m_len = strlen (utf8); > +} > + > +string::string (const char *utf8, size_t len) > +{ > + gcc_assert (utf8); > + m_utf8 = XNEWVEC (char, len); > + m_len = len; > + memcpy (m_utf8, utf8, len); > } > > /* Implementation of json::value::print for json::string. */ > @@ -198,9 +207,9 @@ void > string::print (pretty_printer *pp) const > { > pp_character (pp, '"'); > - for (const char *ptr = m_utf8; *ptr; ptr++) > + for (size_t i = 0; i != m_len; ++i) > { > - char ch = *ptr; > + char ch = m_utf8[i]; > switch (ch) > { > case '"': > @@ -224,7 +233,9 @@ string::print (pretty_printer *pp) const > case '\t': > pp_string (pp, "\\t"); > break; > - > + case '\0': > + pp_string (pp, "\\0"); > + break; > default: > pp_character (pp, ch); > } > diff --git a/gcc/json.h b/gcc/json.h > index f272981259b..f7afd843dc5 100644 > --- a/gcc/json.h > +++ b/gcc/json.h > @@ -156,16 +156,19 @@ class integer_number : public value > class string : public value > { > public: > - string (const char *utf8); > + explicit string (const char *utf8); > + string (const char *utf8, size_t len); > ~string () { free (m_utf8); } > > enum kind get_kind () const final override { return JSON_STRING; } > void print (pretty_printer *pp) const final override; > > const char *get_string () const { return m_utf8; } > + size_t get_length () const { return m_len; } > > private: > char *m_utf8; > + size_t m_len; > }; > > /* Subclass of value for the three JSON literals "true", "false", >