https://gcc.gnu.org/g:07485ccd31935b1f82d321f91c677840bd05247c

commit r14-10953-g07485ccd31935b1f82d321f91c677840bd05247c
Author: David Malcolm <dmalc...@redhat.com>
Date:   Fri Jun 21 08:46:13 2024 -0400

    diagnostics: fixes to SARIF output [PR109360]
    
    When adding validation of .sarif files against the schema
    (PR testsuite/109360) I discovered various issues where we were
    generating invalid .sarif files.
    
    Specifically, in
      c-c++-common/diagnostic-format-sarif-file-bad-utf8-pr109098-1.c
    the relatedLocations for the "note" diagnostics were missing column
    numbers, leading to validation failure due to non-unique elements,
    such as multiple:
            "message": {"text": "invalid UTF-8 character <bf>"}},
    on line 25 with no column information.
    
    Root cause is that for some diagnostics in libcpp we have a location_t
    representing the line as a whole, setting a column_override on the
    rich_location (since the line hasn't been fully read yet).  We were
    handling this column override for plain text output, but not for .sarif
    output.
    
    Similarly, in diagnostic-format-sarif-file-pr111700.c there is a warning
    emitted on "line 0" of the file, whereas SARIF requires line numbers to
    be positive.
    
    We also use column == 0 internally to mean "the line as a whole",
    whereas SARIF required column numbers to be positive.
    
    This patch fixes these various issues.
    
    gcc/ChangeLog:
            PR testsuite/109360
            * diagnostic-format-sarif.cc
            (sarif_builder::make_location_object): Pass any column override
            from rich_loc to maybe_make_physical_location_object.
            (sarif_builder::maybe_make_physical_location_object): Add
            "column_override" param and pass it to maybe_make_region_object.
            (sarif_builder::maybe_make_region_object): Add "column_override"
            param and use it when the location has 0 for a column.  Don't
            add "startLine", "startColumn", "endLine", or "endColumn" if
            the values aren't positive.
            (sarif_builder::maybe_make_region_object_for_context): Don't
            add "startLine" or "endLine" if the values aren't positive.
    
    libcpp/ChangeLog:
            PR testsuite/109360
            * include/rich-location.h (rich_location::get_column_override):
            New accessor.
    
    Signed-off-by: David Malcolm <dmalc...@redhat.com>
    (cherry picked from commit 9f4fdc3acebcf6b045edea1361570658da4bc0ab)

Diff:
---
 gcc/diagnostic-format-sarif.cc | 79 ++++++++++++++++++++++++++++++------------
 libcpp/include/rich-location.h |  2 ++
 2 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 97c5943cd339..d27614d6c87b 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -189,11 +189,14 @@ private:
   make_thread_flow_location_object (const diagnostic_event &event,
                                    int path_event_idx);
   json::array *maybe_make_kinds_array (diagnostic_event::meaning m) const;
-  json::object *maybe_make_physical_location_object (location_t loc);
+  json::object *
+  maybe_make_physical_location_object (location_t loc,
+                                      int column_override);
   json::object *make_artifact_location_object (location_t loc);
   json::object *make_artifact_location_object (const char *filename);
   json::object *make_artifact_location_object_for_pwd () const;
-  json::object *maybe_make_region_object (location_t loc) const;
+  json::object *maybe_make_region_object (location_t loc,
+                                         int column_override) const;
   json::object *maybe_make_region_object_for_context (location_t loc) const;
   json::object *make_region_object_for_hint (const fixit_hint &hint) const;
   json::object *make_multiformat_message_string (const char *msg) const;
@@ -771,7 +774,9 @@ sarif_builder::make_location_object (const rich_location 
&rich_loc,
   location_t loc = rich_loc.get_loc ();
 
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
-  if (json::object *phs_loc_obj = maybe_make_physical_location_object (loc))
+  if (json::object *phs_loc_obj
+       = maybe_make_physical_location_object (loc,
+                                              rich_loc.get_column_override ()))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -790,7 +795,8 @@ sarif_builder::make_location_object (const diagnostic_event 
&event)
 
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
   location_t loc = event.get_location ();
-  if (json::object *phs_loc_obj = maybe_make_physical_location_object (loc))
+  if (json::object *phs_loc_obj
+       = maybe_make_physical_location_object (loc, 0))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -805,12 +811,15 @@ sarif_builder::make_location_object (const 
diagnostic_event &event)
   return location_obj;
 }
 
-/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC,
-   or return NULL;
-   Add any filename to the m_artifacts.  */
+/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.  */
 
 json::object *
-sarif_builder::maybe_make_physical_location_object (location_t loc)
+sarif_builder::
+maybe_make_physical_location_object (location_t loc,
+                                    int column_override)
 {
   if (loc <= BUILTINS_LOCATION || LOCATION_FILE (loc) == NULL)
     return NULL;
@@ -823,7 +832,8 @@ sarif_builder::maybe_make_physical_location_object 
(location_t loc)
   m_filenames.add (LOCATION_FILE (loc));
 
   /* "region" property (SARIF v2.1.0 section 3.29.4).  */
-  if (json::object *region_obj = maybe_make_region_object (loc))
+  if (json::object *region_obj = maybe_make_region_object (loc,
+                                                          column_override))
     phys_loc_obj->set ("region", region_obj);
 
   /* "contextRegion" property (SARIF v2.1.0 section 3.29.5).  */
@@ -929,10 +939,14 @@ sarif_builder::get_sarif_column (expanded_location 
exploc) const
 }
 
 /* Make a region object (SARIF v2.1.0 section 3.30) for LOC,
-   or return NULL.  */
+   or return NULL.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.  */
 
 json::object *
-sarif_builder::maybe_make_region_object (location_t loc) const
+sarif_builder::maybe_make_region_object (location_t loc,
+                                        int column_override) const
 {
   location_t caret_loc = get_pure_location (loc);
 
@@ -954,21 +968,40 @@ sarif_builder::maybe_make_region_object (location_t loc) 
const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
-  /* "startColumn" property (SARIF v2.1.0 section 3.30.6) */
-  region_obj->set_integer ("startColumn", get_sarif_column (exploc_start));
+  /* "startColumn" property (SARIF v2.1.0 section 3.30.6).
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case, unless COLUMN_OVERRIDE is non-zero,
+     (for handling certain awkward lexer diagnostics)  */
+
+  if (exploc_start.column == 0 && column_override)
+    /* Use the provided column number.  */
+    exploc_start.column = column_override;
+
+  if (exploc_start.column > 0)
+    {
+      int start_column = get_sarif_column (exploc_start);
+      region_obj->set_integer ("startColumn", start_column);
+    }
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "endColumn" property (SARIF v2.1.0 section 3.30.8).
-     This expresses the column immediately beyond the range.  */
-  {
-    int next_column = sarif_builder::get_sarif_column (exploc_finish) + 1;
-    region_obj->set_integer ("endColumn", next_column);
-  }
+     This expresses the column immediately beyond the range.
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case.  */
+  if (exploc_finish.column > 0)
+    {
+      int next_column = get_sarif_column (exploc_finish) + 1;
+      region_obj->set_integer ("endColumn", next_column);
+    }
 
   return region_obj;
 }
@@ -1004,10 +1037,12 @@ sarif_builder::maybe_make_region_object_for_context 
(location_t loc) const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "snippet" property (SARIF v2.1.0 section 3.30.13).  */
diff --git a/libcpp/include/rich-location.h b/libcpp/include/rich-location.h
index 94c6e4400532..d5c343defcd3 100644
--- a/libcpp/include/rich-location.h
+++ b/libcpp/include/rich-location.h
@@ -511,6 +511,8 @@ class rich_location
 
   const line_maps *get_line_table () const { return m_line_table; }
 
+  int get_column_override () const { return m_column_override; }
+
 private:
   bool reject_impossible_fixit (location_t where);
   void stop_supporting_fixits ();

Reply via email to