[PATCH] D81563: [analyzer] SATest: Move from csv to json project maps

Valeriy Savchenko via Phabricator via cfe-commits Wed, 10 Jun 2020 06:33:04 -0700

vsavchenko created this revision.
vsavchenko added reviewers: NoQ, dcoughlin.
Herald added subscribers: cfe-commits, ASDenysPetrov, Charusso, dkrupp, 
donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, 
xazax.hun.
Herald added a project: clang.


JSON format is a bit more verbose and easier to reason about
and extend.  For this reason, before extending SATestBuild
functionality it is better to refactor the part of how we
configure the whole system.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81563

Files:
  clang/utils/analyzer/ProjectMap.py
  clang/utils/analyzer/SATestAdd.py
  clang/utils/analyzer/SATestBuild.py
  clang/utils/analyzer/SATestUpdateDiffs.py

Index: clang/utils/analyzer/SATestUpdateDiffs.py
===================================================================
--- clang/utils/analyzer/SATestUpdateDiffs.py
+++ clang/utils/analyzer/SATestUpdateDiffs.py
@@ -4,6 +4,7 @@
 Update reference results for static analyzer.
 """
 import SATestBuild
+from ProjectMap import ProjectInfo, ProjectMap
 
 import os
 import shutil
@@ -14,9 +15,9 @@
 Verbose = 0
 
 
-def update_reference_results(project_name: str, build_mode: int):
-    project_info = SATestBuild.ProjectInfo(project_name, build_mode)
-    tester = SATestBuild.ProjectTester(project_info)
+def update_reference_results(project: ProjectInfo):
+    test_info = SATestBuild.TestInfo(project)
+    tester = SATestBuild.ProjectTester(test_info)
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
@@ -54,7 +55,7 @@
         SATestBuild.run_cleanup_script(project_dir, build_log_file)
 
         SATestBuild.normalize_reference_results(
-            project_dir, ref_results_path, build_mode)
+            project_dir, ref_results_path, project.mode)
 
         # Clean up the generated difference results.
         SATestBuild.cleanup_reference_results(ref_results_path)
@@ -62,6 +63,7 @@
         run_cmd(f"git add '{ref_results_path}'")
 
 
+# TODO: use argparse
 def main(argv):
     if len(argv) == 2 and argv[1] in ("-h", "--help"):
         print("Update static analyzer reference results based "
@@ -70,9 +72,9 @@
               file=sys.stderr)
         sys.exit(1)
 
-    with open(SATestBuild.get_project_map_path(), "r") as f:
-        for project_name, build_mode in SATestBuild.get_projects(f):
-            update_reference_results(project_name, int(build_mode))
+    project_map = ProjectMap()
+    for project in project_map.projects:
+        update_reference_results(project)
 
 
 if __name__ == '__main__':
Index: clang/utils/analyzer/SATestBuild.py
===================================================================
--- clang/utils/analyzer/SATestBuild.py
+++ clang/utils/analyzer/SATestBuild.py
@@ -44,9 +44,9 @@
 """
 import CmpRuns
 import SATestUtils
+from ProjectMap import ProjectInfo, ProjectMap
 
 import argparse
-import csv
 import glob
 import logging
 import math
@@ -59,9 +59,11 @@
 import time
 
 from queue import Queue
+# mypy has problems finding InvalidFileException in the module
+# and this is we can shush that false positive
+from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import (cast, Dict, Iterable, IO, List, NamedTuple, Optional,
-                    Tuple, TYPE_CHECKING)
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
 
 
 ###############################################################################
@@ -105,9 +107,6 @@
 # Number of jobs.
 MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 
-# Project map stores info about all the "registered" projects.
-PROJECT_MAP_FILE = "projectMap.csv"
-
 # Names of the project specific scripts.
 # The script that downloads the project.
 DOWNLOAD_SCRIPT = "download_project.sh"
@@ -187,18 +186,6 @@
 ###############################################################################
 
 
-def get_project_map_path(should_exist: bool = True) -> str:
-    project_map_path = os.path.join(os.path.abspath(os.curdir),
-                                    PROJECT_MAP_FILE)
-
-    if should_exist and not os.path.exists(project_map_path):
-        stderr(f"Error: Cannot find the project map file {project_map_path}"
-               f"\nRunning script for the wrong directory?\n")
-        sys.exit(1)
-
-    return project_map_path
-
-
 def run_cleanup_script(directory: str, build_log_file: IO):
     """
     Run pre-processing script if any.
@@ -268,12 +255,11 @@
         sys.exit(1)
 
 
-class ProjectInfo(NamedTuple):
+class TestInfo(NamedTuple):
     """
     Information about a project and settings for its analysis.
     """
-    name: str
-    build_mode: int
+    project: ProjectInfo
     override_compiler: bool = False
     extra_analyzer_config: str = ""
     is_reference_build: bool = False
@@ -287,9 +273,9 @@
 # It is a common workaround for this situation:
 # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
 if TYPE_CHECKING:
-    ProjectQueue = Queue[ProjectInfo]  # this is only processed by mypy
+    TestQueue = Queue[TestInfo]  # this is only processed by mypy
 else:
-    ProjectQueue = Queue  # this will be executed at runtime
+    TestQueue = Queue  # this will be executed at runtime
 
 
 class RegressionTester:
@@ -306,25 +292,24 @@
         self.strictness = strictness
 
     def test_all(self) -> bool:
-        projects_to_test: List[ProjectInfo] = []
-
-        with open(get_project_map_path(), "r") as map_file:
-            validate_project_file(map_file)
-
-            # Test the projects.
-            for proj_name, proj_build_mode in get_projects(map_file):
-                projects_to_test.append(
-                    ProjectInfo(proj_name, int(proj_build_mode),
-                                self.override_compiler,
-                                self.extra_analyzer_config,
-                                self.regenerate, self.strictness))
+        projects_to_test: List[TestInfo] = []
+
+        project_map = ProjectMap()
+
+        # Test the projects.
+        for project in project_map.projects:
+            projects_to_test.append(
+                TestInfo(project,
+                         self.override_compiler,
+                         self.extra_analyzer_config,
+                         self.regenerate, self.strictness))
         if self.jobs <= 1:
             return self._single_threaded_test_all(projects_to_test)
         else:
             return self._multi_threaded_test_all(projects_to_test)
 
     def _single_threaded_test_all(self,
-                                  projects_to_test: List[ProjectInfo]) -> bool:
+                                  projects_to_test: List[TestInfo]) -> bool:
         """
         Run all projects.
         :return: whether tests have passed.
@@ -336,7 +321,7 @@
         return success
 
     def _multi_threaded_test_all(self,
-                                 projects_to_test: List[ProjectInfo]) -> bool:
+                                 projects_to_test: List[TestInfo]) -> bool:
         """
         Run each project in a separate thread.
 
@@ -345,7 +330,7 @@
 
         :return: whether tests have passed.
         """
-        tasks_queue = ProjectQueue()
+        tasks_queue = TestQueue()
 
         for project_info in projects_to_test:
             tasks_queue.put(project_info)
@@ -370,13 +355,12 @@
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, project_info: ProjectInfo):
-        self.project_name = project_info.name
-        self.build_mode = project_info.build_mode
-        self.override_compiler = project_info.override_compiler
-        self.extra_analyzer_config = project_info.extra_analyzer_config
-        self.is_reference_build = project_info.is_reference_build
-        self.strictness = project_info.strictness
+    def __init__(self, test_info: TestInfo):
+        self.project = test_info.project
+        self.override_compiler = test_info.override_compiler
+        self.extra_analyzer_config = test_info.extra_analyzer_config
+        self.is_reference_build = test_info.is_reference_build
+        self.strictness = test_info.strictness
 
     def test(self) -> bool:
         """
@@ -384,7 +368,11 @@
         :return tests_passed: Whether tests have passed according
         to the :param strictness: criteria.
         """
-        stdout(f" \n\n--- Building project {self.project_name}\n")
+        if not self.project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            return True
+
+        stdout(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
@@ -405,13 +393,13 @@
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project_name} "
+        stdout(f"Completed tests for project {self.project.name} "
                f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
     def get_project_dir(self) -> str:
-        return os.path.join(os.path.abspath(os.curdir), self.project_name)
+        return os.path.join(os.path.abspath(os.curdir), self.project.name)
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
@@ -441,7 +429,7 @@
 
         # Build and analyze the project.
         with open(build_log_path, "w+") as build_log_file:
-            if self.build_mode == 1:
+            if self.project.mode == 1:
                 download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 self.scan_build(directory, output_dir, build_log_file)
@@ -451,7 +439,7 @@
             if self.is_reference_build:
                 run_cleanup_script(directory, build_log_file)
                 normalize_reference_results(directory, output_dir,
-                                            self.build_mode)
+                                            self.project.mode)
 
         stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
                f"See the log for more details: {build_log_path}\n")
@@ -549,7 +537,7 @@
         prefix += " -Xclang -analyzer-config "
         prefix += f"-Xclang {self.generate_config()} "
 
-        if self.build_mode == 2:
+        if self.project.mode == 2:
             prefix += "-std=c++11 "
 
         plist_path = os.path.join(directory, output_dir, "date")
@@ -601,7 +589,7 @@
 
 
 class TestProjectThread(threading.Thread):
-    def __init__(self, tasks_queue: ProjectQueue,
+    def __init__(self, tasks_queue: TestQueue,
                  results_differ: threading.Event,
                  failure_flag: threading.Event):
         """
@@ -621,13 +609,13 @@
     def run(self):
         while not self.tasks_queue.empty():
             try:
-                project_info = self.tasks_queue.get()
+                test_info = self.tasks_queue.get()
 
-                Logger = logging.getLogger(project_info.name)
+                Logger = logging.getLogger(test_info.project.name)
                 LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
                 LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
 
-                tester = ProjectTester(project_info)
+                tester = ProjectTester(test_info)
                 if not tester.test():
                     self.results_differ.set()
 
@@ -841,7 +829,7 @@
                 os.remove(plist)
                 continue
 
-        except plistlib.InvalidFileException as e:
+        except InvalidFileException as e:
             stderr(f"Error parsing plist file {plist}: {str(e)}")
             continue
 
@@ -856,36 +844,6 @@
             os.removedirs(subdir)
 
 
-def get_projects(map_file: IO) -> Iterable[Tuple[str, str]]:
-    """
-    Iterate over all projects defined in the project file handler `map_file`
-    from the start.
-    """
-    map_file.seek(0)
-    # TODO: csv format is not very readable, change it to JSON
-    for project_info in csv.reader(map_file):
-        if SATestUtils.is_comment_csv_line(project_info):
-            continue
-        # suppress mypy error
-        yield cast(Tuple[str, str], project_info)
-
-
-def validate_project_file(map_file: IO):
-    """
-    Validate project file.
-    """
-    for project_info in get_projects(map_file):
-        if len(project_info) != 2:
-            stderr("Error: Rows in the project map file "
-                   "should have 2 entries.")
-            raise Exception()
-
-        if project_info[1] not in ('0', '1', '2'):
-            stderr("Error: Second entry in the project map file should be 0"
-                   " (single file), 1 (project), or 2(single file c++11).")
-            raise Exception()
-
-
 if __name__ == "__main__":
     # Parse command line arguments.
     parser = argparse.ArgumentParser(
Index: clang/utils/analyzer/SATestAdd.py
===================================================================
--- clang/utils/analyzer/SATestAdd.py
+++ clang/utils/analyzer/SATestAdd.py
@@ -43,13 +43,11 @@
                                               > changes_for_analyzer.patch
 """
 import SATestBuild
+from ProjectMap import ProjectMap, ProjectInfo
 
-import csv
 import os
 import sys
 
-from typing import IO
-
 
 def add_new_project(name: str, build_mode: int):
     """
@@ -57,9 +55,10 @@
     :param name: is a short string used to identify a project.
     """
 
-    project_info = SATestBuild.ProjectInfo(name, build_mode,
-                                           is_reference_build=True)
-    tester = SATestBuild.ProjectTester(project_info)
+    project_info = ProjectInfo(name, build_mode)
+    test_info = SATestBuild.TestInfo(project_info,
+                                     is_reference_build=True)
+    tester = SATestBuild.ProjectTester(test_info)
 
     project_dir = tester.get_project_dir()
     if not os.path.exists(project_dir):
@@ -70,33 +69,20 @@
     tester.test()
 
     # Add the project name to the project map.
-    project_map_path = SATestBuild.get_project_map_path(should_exist=False)
+    project_map = ProjectMap(should_exist=False)
 
-    if os.path.exists(project_map_path):
-        file_mode = "r+"
+    if is_existing_project(project_map, name):
+        print(f"Warning: Project with name '{name}' already exists.",
+              file=sys.stdout)
+        print("Reference output has been regenerated.", file=sys.stdout)
     else:
-        print("Warning: Creating the project map file!")
-        file_mode = "w+"
-
-    with open(project_map_path, file_mode) as map_file:
-        if is_existing_project(map_file, name):
-            print(f"Warning: Project with name '{name}' already exists.",
-                  file=sys.stdout)
-            print("Reference output has been regenerated.", file=sys.stdout)
-        else:
-            map_writer = csv.writer(map_file)
-            map_writer.writerow((name, build_mode))
-            print(f"The project map is updated: {project_map_path}")
-
-
-def is_existing_project(map_file: IO, project_name: str) -> bool:
-    map_reader = csv.reader(map_file)
+        project_map.projects.append(project_info)
+        project_map.save()
 
-    for raw_info in map_reader:
-        if project_name == raw_info[0]:
-            return True
 
-    return False
+def is_existing_project(project_map: ProjectMap, project_name: str) -> bool:
+    return any(existing_project.name == project_name
+               for existing_project in project_map.projects)
 
 
 # TODO: Use argparse
Index: clang/utils/analyzer/ProjectMap.py
===================================================================
--- /dev/null
+++ clang/utils/analyzer/ProjectMap.py
@@ -0,0 +1,94 @@
+import json
+import os
+
+from typing import Any, Dict, List, NamedTuple, Optional
+
+
+JSON = Dict[str, Any]
+
+
+DEFAULT_MAP_FILE = "projects.json"
+
+
+class ProjectInfo(NamedTuple):
+    """
+    Information about a project to analyze.
+    """
+    name: str
+    mode: int
+    enabled: bool = True
+
+
+class ProjectMap:
+    """
+    Project map stores info about all the "registered" projects.
+    """
+    def __init__(self, path: Optional[str] = None, should_exist: bool = True):
+        """
+        :param path: optional path to a project JSON file, when None defaults
+                     to DEFAULT_MAP_FILE.
+        :param should_exist: flag to tell if it's an exceptional situation when
+                             the project file doesn't exist, creates an empty
+                             project list instead if we are not expecting it to
+                             exist.
+        """
+        if path is None:
+            path = os.path.join(os.path.abspath(os.curdir), DEFAULT_MAP_FILE)
+
+        if not os.path.exists(path):
+            if should_exist:
+                raise ValueError(
+                    f"Cannot find the project map file {path}"
+                    f"\nRunning script for the wrong directory?\n")
+            else:
+                self._create_empty(path)
+
+        self.path = path
+        self._load_projects()
+
+    def save(self):
+        """
+        Save project map back to its original file.
+        """
+        self._save(self.projects, self.path)
+
+    def _load_projects(self):
+        with open(self.path) as raw_data:
+            raw_projects = json.load(raw_data)
+
+            if not isinstance(raw_projects, list):
+                raise ValueError(
+                    "Project map should be a list of JSON objects")
+
+            self.projects = self._parse(raw_projects)
+
+    @staticmethod
+    def _parse(raw_projects: List[JSON]) -> List[ProjectInfo]:
+        return [ProjectMap._parse_project(raw_project)
+                for raw_project in raw_projects]
+
+    @staticmethod
+    def _parse_project(raw_project: JSON) -> ProjectInfo:
+        try:
+            name: str = raw_project["name"]
+            build_mode: int = raw_project["mode"]
+            enabled: bool = raw_project.get("enabled", True)
+            return ProjectInfo(name, build_mode, enabled)
+
+        except KeyError as e:
+            raise ValueError(
+                f"Project info is required to have a '{e.args[0]}' field")
+
+    @staticmethod
+    def _create_empty(path: str):
+        ProjectMap._save([], path)
+
+    @staticmethod
+    def _save(projects: List[ProjectInfo], path: str):
+        with open(path, "w") as output:
+            json.dump(ProjectMap._convert_infos_to_dicts(projects),
+                      output, indent=2)
+
+    @staticmethod
+    def _convert_infos_to_dicts(projects: List[ProjectInfo]) -> List[JSON]:
+        return [project._asdict() for project in projects]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D81563: [analyzer] SATest: Move from csv to json project maps

Reply via email to