Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 1 | # Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Runs cargo clippy across the given files, dumping diagnostics to a JSON file. |
| 6 | |
| 7 | This script is intended specifically for use with Tricium (go/tricium). |
| 8 | """ |
| 9 | |
| 10 | import json |
| 11 | import os |
| 12 | from pathlib import Path |
| 13 | import re |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 14 | from typing import List, Dict, Iterable, Any, Text, NamedTuple |
| 15 | |
| 16 | from chromite.lib import commandline |
| 17 | from chromite.lib import cros_build_lib |
| 18 | from chromite.lib import cros_logging as logging |
| 19 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 20 | |
| 21 | class Error(Exception): |
| 22 | """Base error class for tricium-cargo-clippy.""" |
| 23 | |
| 24 | |
| 25 | class CargoClippyJSONError(Error): |
| 26 | """Raised when cargo-clippy parsing jobs are not proper JSON.""" |
| 27 | |
| 28 | def __init__(self, source: Text, line_num: int): |
| 29 | super().__init__(f'{source}:{line_num}: is not valid JSON') |
| 30 | self.source = source |
| 31 | self.line_num = line_num |
| 32 | |
| 33 | |
| 34 | class CargoClippyReasonError(Error): |
| 35 | """Raised when cargo-clippy parsing jobs don't provide a "reason" field.""" |
| 36 | |
| 37 | def __init__(self, source: Text, line_num: int): |
| 38 | super().__init__(f'{source}:{line_num}: is missing its reason') |
| 39 | self.source = source |
| 40 | self.line_num = line_num |
| 41 | |
| 42 | |
| 43 | class CargoClippyFieldError(Error): |
| 44 | """Raised when cargo-clippy parsing jobs fail to determine a field.""" |
| 45 | |
| 46 | def __init__(self, source: Text, line_num: int, field: Text): |
| 47 | super().__init__( |
| 48 | f'{source}:{line_num}: {field} could not be parsed from original json' |
| 49 | ) |
| 50 | self.source = source |
| 51 | self.line_num = line_num |
| 52 | self.field = field |
| 53 | |
| 54 | |
| 55 | def resolve_path(file_path: Text) -> Text: |
| 56 | return str(Path(file_path).resolve()) |
| 57 | |
| 58 | |
| 59 | class CodeLocation(NamedTuple): |
| 60 | """Holds the location a ClippyDiagnostic Finding.""" |
| 61 | file_path: Text |
| 62 | file_name: Text |
| 63 | line_start: int |
| 64 | line_end: int |
| 65 | column_start: int |
| 66 | column_end: int |
| 67 | |
| 68 | def to_dict(self): |
| 69 | return { |
| 70 | **self._asdict(), |
| 71 | 'file_path': resolve_path(self.file_path) |
| 72 | } |
| 73 | |
| 74 | |
| 75 | class ClippyDiagnostic(NamedTuple): |
| 76 | """Holds information about a compiler message from Clippy.""" |
| 77 | file_path: Text |
| 78 | locations: Iterable['CodeLocation'] |
| 79 | level: Text |
| 80 | message: Text |
| 81 | |
| 82 | def as_json(self): |
| 83 | return json.dumps({ |
| 84 | **self._asdict(), |
| 85 | 'locations': [loc.to_dict() for loc in self.locations], |
| 86 | }) |
| 87 | |
| 88 | |
| 89 | def parse_file_path( |
| 90 | src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 91 | """The path to the file targeted by the lint. |
| 92 | |
| 93 | Args: |
| 94 | src: Name of the file orig_json was found in. |
| 95 | src_line: Line number where orig_json was found. |
| 96 | orig_json: An iterable of clippy entries in original json. |
| 97 | |
| 98 | Returns: |
| 99 | A resolved path to the original source location as a string. |
| 100 | |
| 101 | Raises: |
| 102 | CargoClippyFieldError: Parsing failed to determine the file path. |
| 103 | """ |
| 104 | target_src_path = orig_json.get('target', {}).get('src_path') |
| 105 | if not target_src_path: |
| 106 | raise CargoClippyFieldError(src, src_line, 'file_path') |
| 107 | return resolve_path(target_src_path) |
| 108 | |
| 109 | |
| 110 | def parse_locations( |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 111 | orig_json: Dict[Text, Any], |
| 112 | file_path: Text) -> Iterable['CodeLocation']: |
| 113 | """The code locations associated with this diagnostic as an iter. |
| 114 | |
| 115 | The relevant code location can appear in either the messages[spans] field, |
| 116 | which will be used if present, or else child messages each have their own |
| 117 | locations specified. |
| 118 | |
| 119 | Args: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 120 | orig_json: An iterable of clippy entries in original json. |
| 121 | file_path: A resolved path to the original source location. |
| 122 | |
| 123 | Yields: |
| 124 | A CodeLocation object associated with a relevant span. |
| 125 | |
| 126 | Raises: |
| 127 | CargoClippyFieldError: Parsing failed to determine any code locations. |
| 128 | """ |
| 129 | spans = orig_json.get('message', {}).get('spans', []) |
| 130 | children = orig_json.get('message', {}).get('children', []) |
| 131 | for child in children: |
| 132 | spans = spans + child.get('spans', []) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 133 | locations = set() |
| 134 | for span in spans: |
| 135 | location = CodeLocation( |
| 136 | file_path=file_path, |
| 137 | file_name=span.get('file_name'), |
| 138 | line_start=span.get('line_start'), |
| 139 | line_end=span.get('line_end'), |
| 140 | column_start=span.get('column_start'), |
| 141 | column_end=span.get('column_end')) |
| 142 | if location not in locations: |
| 143 | locations.add(location) |
| 144 | yield location |
| 145 | |
| 146 | |
| 147 | def parse_level(src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 148 | """The level (error or warning) associated with this diagnostic. |
| 149 | |
| 150 | Args: |
| 151 | src: Name of the file orig_json was found in. |
| 152 | src_line: Line number where orig_json was found. |
| 153 | orig_json: An iterable of clippy entries in original json. |
| 154 | |
| 155 | Returns: |
| 156 | The level of the diagnostic as a string (either error or warning). |
| 157 | |
| 158 | Raises: |
| 159 | CargoClippyFieldError: Parsing failed to determine the level. |
| 160 | """ |
| 161 | level = orig_json.get('level') |
| 162 | if not level: |
| 163 | level = orig_json.get('message', {}).get('level') |
| 164 | if not level: |
| 165 | raise CargoClippyFieldError(src, src_line, 'level') |
| 166 | return level |
| 167 | |
| 168 | |
| 169 | def parse_message( |
| 170 | src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 171 | """The formatted linter message for this diagnostic. |
| 172 | |
| 173 | Args: |
| 174 | src: Name of the file orig_json was found in. |
| 175 | src_line: Line number where orig_json was found. |
| 176 | orig_json: An iterable of clippy entries in original json. |
| 177 | |
| 178 | Returns: |
| 179 | The rendered message of the diagnostic. |
| 180 | |
| 181 | Raises: |
| 182 | CargoClippyFieldError: Parsing failed to determine the message. |
| 183 | """ |
| 184 | message = orig_json.get('message', {}).get('rendered') |
| 185 | if message is None: |
| 186 | raise CargoClippyFieldError(src, src_line, 'message') |
| 187 | return message |
| 188 | |
| 189 | |
| 190 | def parse_diagnostics( |
| 191 | src: Text, orig_jsons: Iterable[Text]) -> ClippyDiagnostic: |
| 192 | """Parses original JSON to find the fields of a Clippy Diagnostic. |
| 193 | |
| 194 | Args: |
| 195 | src: Name of the file orig_json was found in. |
| 196 | orig_jsons: An iterable of clippy entries in original json. |
| 197 | |
| 198 | Yields: |
| 199 | A ClippyDiagnostic for orig_json. |
| 200 | |
| 201 | Raises: |
| 202 | CargoClippyJSONError: if a diagnostic is not valid JSON. |
| 203 | CargoClippyReasonError: if a diagnostic is missing a "reason" field. |
| 204 | CargoClippyFieldError: if a field cannot be determined while parsing. |
| 205 | """ |
| 206 | for src_line, orig_json in enumerate(orig_jsons): |
| 207 | try: |
| 208 | line_json = json.loads(orig_json) |
| 209 | except json.decoder.JSONDecodeError: |
| 210 | json_error = CargoClippyJSONError(src, src_line) |
| 211 | logging.error(json_error) |
| 212 | raise json_error |
| 213 | # Clippy outputs several types of logs, as distinguished by the "reason" |
| 214 | # field, but we only want to process "compiler-message" logs. |
| 215 | reason = line_json.get('reason') |
| 216 | if reason is None: |
| 217 | reason_error = CargoClippyReasonError(src, src_line) |
| 218 | logging.error(reason_error) |
| 219 | raise reason_error |
| 220 | if reason != 'compiler-message': |
| 221 | continue |
| 222 | |
| 223 | file_path = parse_file_path(src, src_line, line_json) |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame^] | 224 | locations = parse_locations(line_json, file_path) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 225 | level = parse_level(src, src_line, line_json) |
| 226 | message = parse_message(src, src_line, line_json) |
| 227 | |
| 228 | # TODO(ryanbeltran): Export suggested replacements |
| 229 | yield ClippyDiagnostic(file_path, locations, level, message) |
| 230 | |
| 231 | |
| 232 | def parse_files(input_dir: Text) -> Iterable[ClippyDiagnostic]: |
| 233 | """Gets all compiler-message lints from all the input files in input_dir. |
| 234 | |
| 235 | Args: |
| 236 | input_dir: path to directory to scan for files |
| 237 | |
| 238 | Yields: |
| 239 | Clippy Diagnostics objects found in files in the input directory |
| 240 | """ |
| 241 | for root_path, _, file_names in os.walk(input_dir): |
| 242 | for file_name in file_names: |
| 243 | file_path = os.path.join(root_path, file_name) |
| 244 | with open(file_path, encoding='utf-8') as clippy_file: |
| 245 | yield from parse_diagnostics(file_path, clippy_file) |
| 246 | |
| 247 | |
| 248 | def filter_diagnostics( |
| 249 | diags: Iterable[ClippyDiagnostic], |
| 250 | file_filter: Text) -> Iterable[ClippyDiagnostic]: |
| 251 | """Filters diagnostics by file_path and message and validates schemas.""" |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame^] | 252 | for diag in diags: |
| 253 | # only include diagnostics if their file path matches the file_filter |
| 254 | if not include_file_pattern(file_filter).fullmatch(diag.file_path): |
| 255 | continue |
| 256 | # ignore redundant messages: "aborting due to previous error..." |
| 257 | if 'aborting due to previous error' in diag.message: |
| 258 | continue |
| 259 | # findings with no location are never useful |
| 260 | if not diag.locations: |
| 261 | continue |
| 262 | yield diag |
| 263 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 264 | |
| 265 | |
| 266 | def include_file_pattern(file_filter: Text) -> 're.Pattern': |
| 267 | """Constructs a regex pattern matching relevant file paths.""" |
| 268 | # FIXME(ryanbeltran): currently does not support prefixes for recursive |
| 269 | # wildcards such as a**/b. |
| 270 | assert not re.search(r'[^/]\*\*', file_filter), ( |
| 271 | 'prefixes for recursive wildcard ** not supported unless ending with /') |
| 272 | tmp_char = chr(0) |
| 273 | return re.compile( |
| 274 | file_filter |
| 275 | # Escape any .'s |
| 276 | .replace('.', r'\.') |
| 277 | # Squash recursive wildcards into a single symbol |
| 278 | .replace('**/', tmp_char) |
| 279 | .replace('**', tmp_char) |
| 280 | # Nonrecursive wildcards match any string of non-"/" symbols |
| 281 | .replace('*', r'[^/]*') |
| 282 | # Recursive wildcards match any string of symbols |
| 283 | .replace(tmp_char, r'(.*/)?') |
| 284 | # Some paths may contain "//" which is equivalent to "/" |
| 285 | .replace('//', '/') |
| 286 | ) |
| 287 | |
| 288 | |
| 289 | def get_arg_parser() -> commandline.ArgumentParser: |
| 290 | """Creates an argument parser for this script.""" |
| 291 | parser = commandline.ArgumentParser(description=__doc__) |
| 292 | parser.add_argument( |
| 293 | '--output', required=True, type='path', help='File to write results to.') |
| 294 | parser.add_argument( |
| 295 | '--files', |
| 296 | required=False, |
| 297 | default='/**/*', |
| 298 | type='path', |
| 299 | help='File(s) to output lints for. If none are specified, this tool ' |
| 300 | 'outputs all lints from clippy after applying filtering ' |
| 301 | 'from |--git-repo-base|, if applicable.') |
| 302 | parser.add_argument( |
| 303 | '--clippy-json-dir', |
| 304 | type='path', |
| 305 | help='Directory where clippy outputs were previously written to.') |
| 306 | return parser |
| 307 | |
| 308 | |
| 309 | def main(argv: List[str]) -> None: |
| 310 | cros_build_lib.AssertInsideChroot() |
| 311 | |
| 312 | logging.basicConfig() |
| 313 | |
| 314 | parser = get_arg_parser() |
| 315 | opts = parser.parse_args(argv) |
| 316 | opts.Freeze() |
| 317 | |
| 318 | input_dir = resolve_path(opts.clippy_json_dir) |
| 319 | output_path = resolve_path(opts.output) |
| 320 | file_filter = resolve_path(opts.files) |
| 321 | |
| 322 | diagnostics = filter_diagnostics(parse_files(input_dir), file_filter) |
| 323 | with open(output_path, 'w', encoding='utf-8') as output_file: |
| 324 | output_file.writelines(f'{diag}\n' for diag in diagnostics) |