Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 1 | # Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Runs cargo clippy across the given files, dumping diagnostics to a JSON file. |
| 6 | |
| 7 | This script is intended specifically for use with Tricium (go/tricium). |
| 8 | """ |
| 9 | |
| 10 | import json |
Chris McDonald | 59650c3 | 2021-07-20 15:29:28 -0600 | [diff] [blame] | 11 | import logging |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 12 | import os |
| 13 | from pathlib import Path |
Chris McDonald | 59650c3 | 2021-07-20 15:29:28 -0600 | [diff] [blame] | 14 | from typing import Any, Dict, Iterable, List, NamedTuple, Text |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 15 | |
| 16 | from chromite.lib import commandline |
| 17 | from chromite.lib import cros_build_lib |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 18 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 19 | |
| 20 | class Error(Exception): |
| 21 | """Base error class for tricium-cargo-clippy.""" |
| 22 | |
| 23 | |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 24 | class CargoClippyPackagePathError(Error): |
| 25 | """Raised when no Package Path is provided.""" |
| 26 | |
| 27 | def __init__(self, source: Text): |
| 28 | super().__init__(f'{source} does not start with a package path') |
| 29 | self.source = source |
| 30 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 31 | class CargoClippyJSONError(Error): |
| 32 | """Raised when cargo-clippy parsing jobs are not proper JSON.""" |
| 33 | |
| 34 | def __init__(self, source: Text, line_num: int): |
| 35 | super().__init__(f'{source}:{line_num}: is not valid JSON') |
| 36 | self.source = source |
| 37 | self.line_num = line_num |
| 38 | |
| 39 | |
| 40 | class CargoClippyReasonError(Error): |
| 41 | """Raised when cargo-clippy parsing jobs don't provide a "reason" field.""" |
| 42 | |
| 43 | def __init__(self, source: Text, line_num: int): |
| 44 | super().__init__(f'{source}:{line_num}: is missing its reason') |
| 45 | self.source = source |
| 46 | self.line_num = line_num |
| 47 | |
| 48 | |
| 49 | class CargoClippyFieldError(Error): |
| 50 | """Raised when cargo-clippy parsing jobs fail to determine a field.""" |
| 51 | |
| 52 | def __init__(self, source: Text, line_num: int, field: Text): |
| 53 | super().__init__( |
| 54 | f'{source}:{line_num}: {field} could not be parsed from original json' |
| 55 | ) |
| 56 | self.source = source |
| 57 | self.line_num = line_num |
| 58 | self.field = field |
| 59 | |
| 60 | |
| 61 | def resolve_path(file_path: Text) -> Text: |
| 62 | return str(Path(file_path).resolve()) |
| 63 | |
| 64 | |
| 65 | class CodeLocation(NamedTuple): |
| 66 | """Holds the location a ClippyDiagnostic Finding.""" |
| 67 | file_path: Text |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 68 | line_start: int |
| 69 | line_end: int |
| 70 | column_start: int |
| 71 | column_end: int |
| 72 | |
| 73 | def to_dict(self): |
| 74 | return { |
| 75 | **self._asdict(), |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 76 | 'file_path': self.file_path |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 77 | } |
| 78 | |
| 79 | |
| 80 | class ClippyDiagnostic(NamedTuple): |
| 81 | """Holds information about a compiler message from Clippy.""" |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 82 | locations: Iterable['CodeLocation'] |
| 83 | level: Text |
| 84 | message: Text |
| 85 | |
| 86 | def as_json(self): |
| 87 | return json.dumps({ |
| 88 | **self._asdict(), |
| 89 | 'locations': [loc.to_dict() for loc in self.locations], |
| 90 | }) |
| 91 | |
| 92 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 93 | def parse_locations( |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 94 | orig_json: Dict[Text, Any], |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 95 | package_path: Text, git_repo: Text) -> Iterable['CodeLocation']: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 96 | """The code locations associated with this diagnostic as an iter. |
| 97 | |
| 98 | The relevant code location can appear in either the messages[spans] field, |
| 99 | which will be used if present, or else child messages each have their own |
| 100 | locations specified. |
| 101 | |
| 102 | Args: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 103 | orig_json: An iterable of clippy entries in original json. |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 104 | package_path: A resolved path to the rust package. |
| 105 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 106 | |
| 107 | Yields: |
| 108 | A CodeLocation object associated with a relevant span. |
| 109 | |
| 110 | Raises: |
| 111 | CargoClippyFieldError: Parsing failed to determine any code locations. |
| 112 | """ |
| 113 | spans = orig_json.get('message', {}).get('spans', []) |
| 114 | children = orig_json.get('message', {}).get('children', []) |
| 115 | for child in children: |
| 116 | spans = spans + child.get('spans', []) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 117 | locations = set() |
| 118 | for span in spans: |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 119 | file_path = os.path.join(package_path, span.get('file_name')) |
| 120 | if file_path.startswith(f'{git_repo}/'): |
| 121 | file_path = file_path[len(git_repo)+1:] |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 122 | location = CodeLocation( |
| 123 | file_path=file_path, |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 124 | line_start=span.get('line_start'), |
| 125 | line_end=span.get('line_end'), |
| 126 | column_start=span.get('column_start'), |
| 127 | column_end=span.get('column_end')) |
| 128 | if location not in locations: |
| 129 | locations.add(location) |
| 130 | yield location |
| 131 | |
| 132 | |
| 133 | def parse_level(src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 134 | """The level (error or warning) associated with this diagnostic. |
| 135 | |
| 136 | Args: |
| 137 | src: Name of the file orig_json was found in. |
| 138 | src_line: Line number where orig_json was found. |
| 139 | orig_json: An iterable of clippy entries in original json. |
| 140 | |
| 141 | Returns: |
| 142 | The level of the diagnostic as a string (either error or warning). |
| 143 | |
| 144 | Raises: |
| 145 | CargoClippyFieldError: Parsing failed to determine the level. |
| 146 | """ |
| 147 | level = orig_json.get('level') |
| 148 | if not level: |
| 149 | level = orig_json.get('message', {}).get('level') |
| 150 | if not level: |
| 151 | raise CargoClippyFieldError(src, src_line, 'level') |
| 152 | return level |
| 153 | |
| 154 | |
| 155 | def parse_message( |
| 156 | src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 157 | """The formatted linter message for this diagnostic. |
| 158 | |
| 159 | Args: |
| 160 | src: Name of the file orig_json was found in. |
| 161 | src_line: Line number where orig_json was found. |
| 162 | orig_json: An iterable of clippy entries in original json. |
| 163 | |
| 164 | Returns: |
| 165 | The rendered message of the diagnostic. |
| 166 | |
| 167 | Raises: |
| 168 | CargoClippyFieldError: Parsing failed to determine the message. |
| 169 | """ |
| 170 | message = orig_json.get('message', {}).get('rendered') |
| 171 | if message is None: |
| 172 | raise CargoClippyFieldError(src, src_line, 'message') |
| 173 | return message |
| 174 | |
| 175 | |
| 176 | def parse_diagnostics( |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 177 | src: Text, orig_jsons: Iterable[Text], git_repo: Text) -> ClippyDiagnostic: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 178 | """Parses original JSON to find the fields of a Clippy Diagnostic. |
| 179 | |
| 180 | Args: |
| 181 | src: Name of the file orig_json was found in. |
| 182 | orig_jsons: An iterable of clippy entries in original json. |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 183 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 184 | |
| 185 | Yields: |
| 186 | A ClippyDiagnostic for orig_json. |
| 187 | |
| 188 | Raises: |
| 189 | CargoClippyJSONError: if a diagnostic is not valid JSON. |
| 190 | CargoClippyReasonError: if a diagnostic is missing a "reason" field. |
| 191 | CargoClippyFieldError: if a field cannot be determined while parsing. |
| 192 | """ |
| 193 | for src_line, orig_json in enumerate(orig_jsons): |
| 194 | try: |
| 195 | line_json = json.loads(orig_json) |
| 196 | except json.decoder.JSONDecodeError: |
| 197 | json_error = CargoClippyJSONError(src, src_line) |
| 198 | logging.error(json_error) |
| 199 | raise json_error |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 200 | |
| 201 | # We pass the path to the package in a special JSON on the first line |
| 202 | if src_line == 0: |
| 203 | package_path = line_json.get('package_path') |
| 204 | if not package_path: |
| 205 | raise CargoClippyPackagePathError(src) |
| 206 | package_path = resolve_path(package_path) |
| 207 | continue |
| 208 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 209 | # Clippy outputs several types of logs, as distinguished by the "reason" |
| 210 | # field, but we only want to process "compiler-message" logs. |
| 211 | reason = line_json.get('reason') |
| 212 | if reason is None: |
| 213 | reason_error = CargoClippyReasonError(src, src_line) |
| 214 | logging.error(reason_error) |
| 215 | raise reason_error |
| 216 | if reason != 'compiler-message': |
| 217 | continue |
| 218 | |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 219 | locations = parse_locations(line_json, package_path, git_repo) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 220 | level = parse_level(src, src_line, line_json) |
| 221 | message = parse_message(src, src_line, line_json) |
| 222 | |
| 223 | # TODO(ryanbeltran): Export suggested replacements |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 224 | yield ClippyDiagnostic(locations, level, message) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 225 | |
| 226 | |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 227 | def parse_files(input_dir: Text, git_repo: Text) -> Iterable[ClippyDiagnostic]: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 228 | """Gets all compiler-message lints from all the input files in input_dir. |
| 229 | |
| 230 | Args: |
| 231 | input_dir: path to directory to scan for files |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 232 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 233 | |
| 234 | Yields: |
| 235 | Clippy Diagnostics objects found in files in the input directory |
| 236 | """ |
| 237 | for root_path, _, file_names in os.walk(input_dir): |
| 238 | for file_name in file_names: |
| 239 | file_path = os.path.join(root_path, file_name) |
| 240 | with open(file_path, encoding='utf-8') as clippy_file: |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 241 | yield from parse_diagnostics(file_path, clippy_file, git_repo) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 242 | |
| 243 | |
| 244 | def filter_diagnostics( |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 245 | diags: Iterable[ClippyDiagnostic]) -> Iterable[ClippyDiagnostic]: |
| 246 | """Filters diagnostics and validates schemas.""" |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame] | 247 | for diag in diags: |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame] | 248 | # ignore redundant messages: "aborting due to previous error..." |
| 249 | if 'aborting due to previous error' in diag.message: |
| 250 | continue |
| 251 | # findings with no location are never useful |
| 252 | if not diag.locations: |
| 253 | continue |
| 254 | yield diag |
| 255 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 256 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 257 | def get_arg_parser() -> commandline.ArgumentParser: |
| 258 | """Creates an argument parser for this script.""" |
| 259 | parser = commandline.ArgumentParser(description=__doc__) |
| 260 | parser.add_argument( |
| 261 | '--output', required=True, type='path', help='File to write results to.') |
| 262 | parser.add_argument( |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 263 | '--clippy-json-dir', |
| 264 | type='path', |
| 265 | help='Directory where clippy outputs were previously written to.') |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 266 | parser.add_argument( |
| 267 | '--git-repo-path', |
| 268 | type='path', |
| 269 | default='', |
| 270 | help='Base directory for git repo to strip out in diagnostics.') |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 271 | return parser |
| 272 | |
| 273 | |
| 274 | def main(argv: List[str]) -> None: |
| 275 | cros_build_lib.AssertInsideChroot() |
| 276 | |
| 277 | logging.basicConfig() |
| 278 | |
| 279 | parser = get_arg_parser() |
| 280 | opts = parser.parse_args(argv) |
| 281 | opts.Freeze() |
| 282 | |
| 283 | input_dir = resolve_path(opts.clippy_json_dir) |
| 284 | output_path = resolve_path(opts.output) |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 285 | git_repo = opts.git_repo_path |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 286 | |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 287 | diagnostics = filter_diagnostics(parse_files(input_dir, git_repo)) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 288 | with open(output_path, 'w', encoding='utf-8') as output_file: |
| 289 | output_file.writelines(f'{diag}\n' for diag in diagnostics) |