https://gcc.gnu.org/g:a874b8301d9aa0421522d5aa11736f1510edb13a

commit r15-2465-ga874b8301d9aa0421522d5aa11736f1510edb13a
Author: David Malcolm <dmalc...@redhat.com>
Date:   Wed Jul 31 20:38:41 2024 -0400

    diagnostics: SARIF output: capture unlabelled secondary locations
    
    This patch extends
    * the work done in r15-2291-gd7a688fc960f78 to capture labels
      on location ranges in rich_locations in SARIF form as
      "annotations" (§3.28.6)
    * the work done in r15-2354-g4d1f71d49e396c to support
      related locations (§3.27.22 and §3.34)
    
    so that all location ranges in a rich_location now get captured in
    the SARIF output:
    - those with a label are handled as before as "annotations" (§3.28.6),
      per r15-2291-gd7a688fc960f78
    - those without a label now get captured, in the result's
      "relatedLocations" (§3.27.22)
    
    For example, given:
    
      int missing_semicolon (void)
      {
        return 42
      }
    
    for which the textual output looks like this:
    
      PATH/missing-semicolon.c: In function 'missing_semicolon':
      PATH/missing-semicolon.c:9:12: error: expected ';' before '}' token
          9 |   return 42
            |            ^
            |            ;
         10 | }
            | ~
    
    with this patch the SARIF output now has this for the result's location:
    
       "relationships": [{"target": 0,
                          "kinds": ["relevant"]}]}],
    
    where the result gains a related location :
    
      "relatedLocations": [{"physicalLocation": {"artifactLocation": { 
[...snip...] },
                                                 "region": {"startLine": 10,
                                                            "startColumn": 1,
                                                            "endColumn": 2},
                                                 "contextRegion": {"startLine": 
10,
                                                                   "snippet": 
{"text": "}\n"}}},
                            "id": 0}]}]}]}
    
    i.e. that the error also has the secondary location at the trailing
    close brace which has the relationship "relevant" to the primary
    location (at the suggested insertion point).
    
    The patch also adds test coverage for the SARIF encoding of the fix-it hint.
    
    gcc/ChangeLog:
            * diagnostic-format-sarif.cc
            
(sarif_location_manager::worklist_item::unlabelled_secondary_location):
            New enum value.
            (sarif_location_manager::m_unlabelled_secondary_locations): New
            field.
            (sarif_location_manager::process_worklist_item): Handle unlabelled
            secondary locations.
            (sarif_builder::make_location_object): Generalize code to handle
            ranges within a rich_location so as well as using annotations for
            those with labels, we now add related locations for those without
            labels.
    
    gcc/testsuite/ChangeLog:
            * gcc.dg/sarif-output/missing-semicolon.c: New test.
            * gcc.dg/sarif-output/sarif.py (get_location_physical_region): New.
            (get_location_snippet_text): New.
            * gcc.dg/sarif-output/test-missing-semicolon.py: New test.
    
    Signed-off-by: David Malcolm <dmalc...@redhat.com>

Diff:
---
 gcc/diagnostic-format-sarif.cc                     | 54 +++++++++++++--
 .../gcc.dg/sarif-output/missing-semicolon.c        | 22 ++++++
 gcc/testsuite/gcc.dg/sarif-output/sarif.py         |  3 +
 .../gcc.dg/sarif-output/test-missing-semicolon.py  | 79 ++++++++++++++++++++++
 4 files changed, 152 insertions(+), 6 deletions(-)

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 2232883281bf..7c2e96f4f746 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -323,7 +323,11 @@ public:
     {
      /* Process a #include relationship where m_location_obj
        was #included-d at m_where.  */
-     included_from
+     included_from,
+
+     /* Process a location_t that was added as a secondary location
+       to a rich_location without a label.  */
+     unlabelled_secondary_location
     };
 
     worklist_item (sarif_location &location_obj,
@@ -369,6 +373,7 @@ private:
 
   std::list<worklist_item> m_worklist;
   std::map<location_t, sarif_location *> m_included_from_locations;
+  std::map<location_t, sarif_location *> m_unlabelled_secondary_locations;
 };
 
 /* Subclass of sarif_object for SARIF "result" objects
@@ -559,6 +564,7 @@ public:
    - diagnostic groups (see limitations below)
    - logical locations (e.g. cfun)
    - labelled ranges (as annotations)
+   - secondary ranges without labels (as related locations)
 
    Known limitations:
    - GCC supports one-deep nesting of diagnostics (via auto_diagnostic_group),
@@ -566,9 +572,6 @@ public:
      diagnostics (e.g. we ignore fix-it hints on them)
    - although we capture command-line arguments (section 3.20.2), we don't
      yet capture response files.
-   - doesn't capture secondary locations within a rich_location
-     (perhaps we should use the "relatedLocations" property: SARIF v2.1.0
-     section 3.27.22)
    - doesn't capture "artifact.encoding" property
      (SARIF v2.1.0 section 3.24.9).
    - doesn't capture hashes of the source files
@@ -987,6 +990,34 @@ sarif_location_manager::process_worklist_item 
(sarif_builder &builder,
           *this);
       }
       break;
+    case worklist_item::kind::unlabelled_secondary_location:
+      {
+       sarif_location &primary_loc_obj = item.m_location_obj;
+       sarif_location *secondary_loc_obj = nullptr;
+       auto iter = m_unlabelled_secondary_locations.find (item.m_where);
+       if (iter != m_unlabelled_secondary_locations.end ())
+         secondary_loc_obj = iter->second;
+       else
+         {
+           std::unique_ptr<sarif_location> new_loc_obj
+             = builder.make_location_object
+                 (*this,
+                  item.m_where,
+                  diagnostic_artifact_role::scanned_file);
+           secondary_loc_obj = new_loc_obj.get ();
+           add_related_location (std::move (new_loc_obj));
+           auto kv
+             = std::pair<location_t, sarif_location *> (item.m_where,
+                                                        secondary_loc_obj);
+           m_unlabelled_secondary_locations.insert (kv);
+         }
+       gcc_assert (secondary_loc_obj);
+       primary_loc_obj.lazily_add_relationship
+         (*secondary_loc_obj,
+          location_relationship_kind::relevant,
+          *this);
+      }
+      break;
     }
 }
 
@@ -1739,18 +1770,19 @@ sarif_builder::make_location_object 
(sarif_location_manager &loc_mgr,
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
   set_any_logical_locs_arr (*location_obj, logical_loc);
 
-  /* "annotations" property (SARIF v2.1.0 section 3.28.6).  */
+  /* Handle labelled ranges and/or secondary locations.  */
   {
-    /* Create annotations for any labelled ranges.  */
     std::unique_ptr<json::array> annotations_arr = nullptr;
     for (unsigned int i = 0; i < rich_loc.get_num_locations (); i++)
       {
        const location_range *range = rich_loc.get_range (i);
+       bool handled = false;
        if (const range_label *label = range->m_label)
          {
            label_text text = label->get_text (i);
            if (text.get ())
              {
+               /* Create annotations for any labelled ranges.  */
                location_t range_loc = rich_loc.get_loc (i);
                auto region
                  = maybe_make_region_object (range_loc,
@@ -1762,11 +1794,21 @@ sarif_builder::make_location_object 
(sarif_location_manager &loc_mgr,
                    region->set<sarif_message>
                      ("message", make_message_object (text.get ()));
                    annotations_arr->append<sarif_region> (std::move (region));
+                   handled = true;
                  }
              }
          }
+
+       /* Add related locations for any secondary locations in RICH_LOC
+          that don't have labels (and thus aren't added to "annotations"). */
+       if (i > 0 && !handled)
+         loc_mgr.add_relationship_to_worklist
+           (*location_obj.get (),
+            
sarif_location_manager::worklist_item::kind::unlabelled_secondary_location,
+            range->m_loc);
       }
     if (annotations_arr)
+      /* "annotations" property (SARIF v2.1.0 section 3.28.6).  */
       location_obj->set<json::array> ("annotations",
                                      std::move (annotations_arr));
   }
diff --git a/gcc/testsuite/gcc.dg/sarif-output/missing-semicolon.c 
b/gcc/testsuite/gcc.dg/sarif-output/missing-semicolon.c
new file mode 100644
index 000000000000..426b930cfc2c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sarif-output/missing-semicolon.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-fdiagnostics-format=sarif-file" } */
+
+/* Verify that SARIF output can capture secondary locations
+   relating to a diagnostic.  */
+
+int missing_semicolon (void)
+{
+  return 42
+}
+
+/* We expect a failing compile due to the error, but the use of 
+   -fdiagnostics-format=sarif-file means there should be no output to stderr.
+   DejaGnu injects this message; ignore it:
+   { dg-prune-output "exit status is 1" } */
+
+/* Verify that some JSON was written to a file with the expected name:
+   { dg-final { verify-sarif-file } } */
+
+/* Use a Python script to verify various properties about the generated
+   .sarif file:
+   { dg-final { run-sarif-pytest missing-semicolon.c 
"test-missing-semicolon.py" } } */
diff --git a/gcc/testsuite/gcc.dg/sarif-output/sarif.py 
b/gcc/testsuite/gcc.dg/sarif-output/sarif.py
index 54c96a006b65..a34678791aca 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/sarif.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/sarif.py
@@ -14,6 +14,9 @@ def sarif_from_env():
 def get_location_artifact_uri(location):
     return location['physicalLocation']['artifactLocation']['uri']
 
+def get_location_physical_region(location):
+    return location['physicalLocation']['region']
+
 def get_location_snippet_text(location):
     return location['physicalLocation']['contextRegion']['snippet']['text']
 
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py 
b/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py
new file mode 100644
index 000000000000..795980d88cc4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py
@@ -0,0 +1,79 @@
+from sarif import *
+
+import pytest
+
+@pytest.fixture(scope='function', autouse=True)
+def sarif():
+    return sarif_from_env()
+
+def test_basics(sarif):
+    schema = sarif['$schema']
+    assert schema == 
"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json";
+
+    version = sarif['version']
+    assert version == "2.1.0"
+
+def test_location_relationships(sarif):
+    runs = sarif['runs']
+    run = runs[0]
+    results = run['results']
+
+    # We expect a single error with a secondary location and a fix-it hint.
+    #
+    # The textual form of the diagnostic would look like this:
+    #  . PATH/missing-semicolon.c: In function 'missing_semicolon':
+    #  . PATH/missing-semicolon.c:19:12: error: expected ';' before '}' token
+    #  .    19 |   return 42
+    #  .       |            ^
+    #  .       |            ;
+    #  .    20 | }
+    #  .       | ~           
+    assert len(results) == 1
+    
+    result = results[0]
+    assert result['level'] == 'error'
+    assert result['message']['text'] == "expected ';' before '}' token"
+    locations = result['locations']
+    assert len(locations) == 1
+
+    location = locations[0]
+    assert get_location_artifact_uri(location).endswith('missing-semicolon.c')
+    assert get_location_snippet_text(location) == '  return 42\n'
+    assert get_location_physical_region(location)['startLine'] == 9
+    assert get_location_physical_region(location)['startColumn'] == 12
+    assert get_location_physical_region(location)['endColumn'] == 13
+
+    # We don't expect the secondary location to have a relationship back
+    # to the primary location, and so the primary doesn't get an id.
+    assert 'id' not in location
+
+    # We expect the primary location to reference the secondary location.
+    assert len(location['relationships']) == 1
+    assert location['relationships'][0]['target'] == 0
+    assert location['relationships'][0]['kinds'] == ['relevant']
+
+    # We expect one related location, for the closing brace on the next line
+    relatedLocations = result['relatedLocations']
+    assert len(relatedLocations) == 1
+
+    rel_loc = relatedLocations[0]
+    assert rel_loc['id'] == 0
+    assert get_location_artifact_uri(rel_loc).endswith('missing-semicolon.c')
+    assert get_location_snippet_text(rel_loc) == '}\n'
+    assert get_location_physical_region(rel_loc)['startLine'] == 10
+    assert get_location_physical_region(rel_loc)['startColumn'] == 1
+    assert get_location_physical_region(rel_loc)['endColumn'] == 2
+    assert 'relatedLocations' not in rel_loc
+    assert 'message' not in rel_loc
+
+    # We expect one fix-it hint representing an insertion of ';'
+    assert len(result['fixes']) == 1
+    assert len(result['fixes'][0]['artifactChanges']) == 1
+    change = result['fixes'][0]['artifactChanges'][0]
+    assert change['artifactLocation']['uri'].endswith('missing-semicolon.c')
+    assert len(change['replacements']) == 1
+    replacement = change['replacements'][0]
+    assert replacement['deletedRegion']['startLine'] == 9
+    assert replacement['deletedRegion']['startColumn'] == 12
+    assert replacement['deletedRegion']['endColumn'] == 12
+    assert replacement['insertedContent']['text'] == ';'

Reply via email to