Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 1 | # Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Runs cargo clippy across the given files, dumping diagnostics to a JSON file. |
| 6 | |
| 7 | This script is intended specifically for use with Tricium (go/tricium). |
| 8 | """ |
| 9 | |
| 10 | import json |
Chris McDonald | 59650c3 | 2021-07-20 15:29:28 -0600 | [diff] [blame] | 11 | import logging |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 12 | import os |
| 13 | from pathlib import Path |
Ryan Beltran | a4b45a3 | 2021-08-11 08:26:38 +0000 | [diff] [blame] | 14 | import re |
Chris McDonald | 59650c3 | 2021-07-20 15:29:28 -0600 | [diff] [blame] | 15 | from typing import Any, Dict, Iterable, List, NamedTuple, Text |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 16 | |
| 17 | from chromite.lib import commandline |
| 18 | from chromite.lib import cros_build_lib |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 19 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 20 | |
| 21 | class Error(Exception): |
| 22 | """Base error class for tricium-cargo-clippy.""" |
| 23 | |
| 24 | |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 25 | class CargoClippyPackagePathError(Error): |
| 26 | """Raised when no Package Path is provided.""" |
| 27 | |
| 28 | def __init__(self, source: Text): |
| 29 | super().__init__(f'{source} does not start with a package path') |
| 30 | self.source = source |
| 31 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 32 | class CargoClippyJSONError(Error): |
| 33 | """Raised when cargo-clippy parsing jobs are not proper JSON.""" |
| 34 | |
| 35 | def __init__(self, source: Text, line_num: int): |
| 36 | super().__init__(f'{source}:{line_num}: is not valid JSON') |
| 37 | self.source = source |
| 38 | self.line_num = line_num |
| 39 | |
| 40 | |
| 41 | class CargoClippyReasonError(Error): |
| 42 | """Raised when cargo-clippy parsing jobs don't provide a "reason" field.""" |
| 43 | |
| 44 | def __init__(self, source: Text, line_num: int): |
| 45 | super().__init__(f'{source}:{line_num}: is missing its reason') |
| 46 | self.source = source |
| 47 | self.line_num = line_num |
| 48 | |
| 49 | |
| 50 | class CargoClippyFieldError(Error): |
| 51 | """Raised when cargo-clippy parsing jobs fail to determine a field.""" |
| 52 | |
| 53 | def __init__(self, source: Text, line_num: int, field: Text): |
| 54 | super().__init__( |
| 55 | f'{source}:{line_num}: {field} could not be parsed from original json' |
| 56 | ) |
| 57 | self.source = source |
| 58 | self.line_num = line_num |
| 59 | self.field = field |
| 60 | |
| 61 | |
| 62 | def resolve_path(file_path: Text) -> Text: |
| 63 | return str(Path(file_path).resolve()) |
| 64 | |
| 65 | |
| 66 | class CodeLocation(NamedTuple): |
| 67 | """Holds the location a ClippyDiagnostic Finding.""" |
| 68 | file_path: Text |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 69 | line_start: int |
| 70 | line_end: int |
| 71 | column_start: int |
| 72 | column_end: int |
| 73 | |
| 74 | def to_dict(self): |
| 75 | return { |
| 76 | **self._asdict(), |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 77 | 'file_path': self.file_path |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 78 | } |
| 79 | |
| 80 | |
| 81 | class ClippyDiagnostic(NamedTuple): |
| 82 | """Holds information about a compiler message from Clippy.""" |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 83 | locations: Iterable['CodeLocation'] |
| 84 | level: Text |
| 85 | message: Text |
| 86 | |
| 87 | def as_json(self): |
| 88 | return json.dumps({ |
| 89 | **self._asdict(), |
| 90 | 'locations': [loc.to_dict() for loc in self.locations], |
| 91 | }) |
| 92 | |
| 93 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 94 | def parse_locations( |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 95 | orig_json: Dict[Text, Any], |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 96 | package_path: Text, git_repo: Text) -> Iterable['CodeLocation']: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 97 | """The code locations associated with this diagnostic as an iter. |
| 98 | |
| 99 | The relevant code location can appear in either the messages[spans] field, |
| 100 | which will be used if present, or else child messages each have their own |
| 101 | locations specified. |
| 102 | |
| 103 | Args: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 104 | orig_json: An iterable of clippy entries in original json. |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 105 | package_path: A resolved path to the rust package. |
| 106 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 107 | |
| 108 | Yields: |
| 109 | A CodeLocation object associated with a relevant span. |
| 110 | |
| 111 | Raises: |
| 112 | CargoClippyFieldError: Parsing failed to determine any code locations. |
| 113 | """ |
| 114 | spans = orig_json.get('message', {}).get('spans', []) |
| 115 | children = orig_json.get('message', {}).get('children', []) |
| 116 | for child in children: |
| 117 | spans = spans + child.get('spans', []) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 118 | locations = set() |
| 119 | for span in spans: |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 120 | file_path = os.path.join(package_path, span.get('file_name')) |
Ryan Beltran | a4b45a3 | 2021-08-11 08:26:38 +0000 | [diff] [blame] | 121 | if git_repo and file_path.startswith(f'{git_repo}/'): |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 122 | file_path = file_path[len(git_repo)+1:] |
Ryan Beltran | a4b45a3 | 2021-08-11 08:26:38 +0000 | [diff] [blame] | 123 | else: |
| 124 | # Remove ebuild work directories from prefix |
| 125 | # Such as: "**/<package>-9999/work/<package>-9999/" |
| 126 | # or: "**/<package>-0.24.52-r9/work/<package>-0.24.52/" |
| 127 | file_path = re.sub(r'(.*/)?([^/]+)-[^/]+/work/[^/]+/+', '', file_path) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 128 | location = CodeLocation( |
| 129 | file_path=file_path, |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 130 | line_start=span.get('line_start'), |
| 131 | line_end=span.get('line_end'), |
| 132 | column_start=span.get('column_start'), |
| 133 | column_end=span.get('column_end')) |
| 134 | if location not in locations: |
| 135 | locations.add(location) |
| 136 | yield location |
| 137 | |
| 138 | |
| 139 | def parse_level(src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 140 | """The level (error or warning) associated with this diagnostic. |
| 141 | |
| 142 | Args: |
| 143 | src: Name of the file orig_json was found in. |
| 144 | src_line: Line number where orig_json was found. |
| 145 | orig_json: An iterable of clippy entries in original json. |
| 146 | |
| 147 | Returns: |
| 148 | The level of the diagnostic as a string (either error or warning). |
| 149 | |
| 150 | Raises: |
| 151 | CargoClippyFieldError: Parsing failed to determine the level. |
| 152 | """ |
| 153 | level = orig_json.get('level') |
| 154 | if not level: |
| 155 | level = orig_json.get('message', {}).get('level') |
| 156 | if not level: |
| 157 | raise CargoClippyFieldError(src, src_line, 'level') |
| 158 | return level |
| 159 | |
| 160 | |
| 161 | def parse_message( |
| 162 | src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text: |
| 163 | """The formatted linter message for this diagnostic. |
| 164 | |
| 165 | Args: |
| 166 | src: Name of the file orig_json was found in. |
| 167 | src_line: Line number where orig_json was found. |
| 168 | orig_json: An iterable of clippy entries in original json. |
| 169 | |
| 170 | Returns: |
| 171 | The rendered message of the diagnostic. |
| 172 | |
| 173 | Raises: |
| 174 | CargoClippyFieldError: Parsing failed to determine the message. |
| 175 | """ |
| 176 | message = orig_json.get('message', {}).get('rendered') |
| 177 | if message is None: |
| 178 | raise CargoClippyFieldError(src, src_line, 'message') |
| 179 | return message |
| 180 | |
| 181 | |
| 182 | def parse_diagnostics( |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 183 | src: Text, orig_jsons: Iterable[Text], git_repo: Text) -> ClippyDiagnostic: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 184 | """Parses original JSON to find the fields of a Clippy Diagnostic. |
| 185 | |
| 186 | Args: |
| 187 | src: Name of the file orig_json was found in. |
| 188 | orig_jsons: An iterable of clippy entries in original json. |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 189 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 190 | |
| 191 | Yields: |
| 192 | A ClippyDiagnostic for orig_json. |
| 193 | |
| 194 | Raises: |
| 195 | CargoClippyJSONError: if a diagnostic is not valid JSON. |
| 196 | CargoClippyReasonError: if a diagnostic is missing a "reason" field. |
| 197 | CargoClippyFieldError: if a field cannot be determined while parsing. |
| 198 | """ |
| 199 | for src_line, orig_json in enumerate(orig_jsons): |
| 200 | try: |
| 201 | line_json = json.loads(orig_json) |
| 202 | except json.decoder.JSONDecodeError: |
| 203 | json_error = CargoClippyJSONError(src, src_line) |
| 204 | logging.error(json_error) |
| 205 | raise json_error |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 206 | |
| 207 | # We pass the path to the package in a special JSON on the first line |
| 208 | if src_line == 0: |
| 209 | package_path = line_json.get('package_path') |
| 210 | if not package_path: |
| 211 | raise CargoClippyPackagePathError(src) |
| 212 | package_path = resolve_path(package_path) |
| 213 | continue |
| 214 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 215 | # Clippy outputs several types of logs, as distinguished by the "reason" |
| 216 | # field, but we only want to process "compiler-message" logs. |
| 217 | reason = line_json.get('reason') |
| 218 | if reason is None: |
| 219 | reason_error = CargoClippyReasonError(src, src_line) |
| 220 | logging.error(reason_error) |
| 221 | raise reason_error |
| 222 | if reason != 'compiler-message': |
| 223 | continue |
| 224 | |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 225 | locations = parse_locations(line_json, package_path, git_repo) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 226 | level = parse_level(src, src_line, line_json) |
| 227 | message = parse_message(src, src_line, line_json) |
| 228 | |
| 229 | # TODO(ryanbeltran): Export suggested replacements |
Ryan Beltran | c0fa16a | 2021-08-05 20:45:14 +0000 | [diff] [blame] | 230 | yield ClippyDiagnostic(locations, level, message) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 231 | |
| 232 | |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 233 | def parse_files(input_dir: Text, git_repo: Text) -> Iterable[ClippyDiagnostic]: |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 234 | """Gets all compiler-message lints from all the input files in input_dir. |
| 235 | |
| 236 | Args: |
| 237 | input_dir: path to directory to scan for files |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 238 | git_repo: Base directory for git repo to strip out in diagnostics. |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 239 | |
| 240 | Yields: |
| 241 | Clippy Diagnostics objects found in files in the input directory |
| 242 | """ |
| 243 | for root_path, _, file_names in os.walk(input_dir): |
| 244 | for file_name in file_names: |
| 245 | file_path = os.path.join(root_path, file_name) |
| 246 | with open(file_path, encoding='utf-8') as clippy_file: |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 247 | yield from parse_diagnostics(file_path, clippy_file, git_repo) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 248 | |
| 249 | |
| 250 | def filter_diagnostics( |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 251 | diags: Iterable[ClippyDiagnostic]) -> Iterable[ClippyDiagnostic]: |
| 252 | """Filters diagnostics and validates schemas.""" |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame] | 253 | for diag in diags: |
Ryan Beltran | 43a0066 | 2021-05-17 16:55:24 +0000 | [diff] [blame] | 254 | # ignore redundant messages: "aborting due to previous error..." |
| 255 | if 'aborting due to previous error' in diag.message: |
| 256 | continue |
| 257 | # findings with no location are never useful |
| 258 | if not diag.locations: |
| 259 | continue |
| 260 | yield diag |
| 261 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 262 | |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 263 | def get_arg_parser() -> commandline.ArgumentParser: |
| 264 | """Creates an argument parser for this script.""" |
| 265 | parser = commandline.ArgumentParser(description=__doc__) |
| 266 | parser.add_argument( |
| 267 | '--output', required=True, type='path', help='File to write results to.') |
| 268 | parser.add_argument( |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 269 | '--clippy-json-dir', |
| 270 | type='path', |
| 271 | help='Directory where clippy outputs were previously written to.') |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 272 | parser.add_argument( |
| 273 | '--git-repo-path', |
| 274 | type='path', |
| 275 | default='', |
| 276 | help='Base directory for git repo to strip out in diagnostics.') |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 277 | return parser |
| 278 | |
| 279 | |
| 280 | def main(argv: List[str]) -> None: |
| 281 | cros_build_lib.AssertInsideChroot() |
| 282 | |
| 283 | logging.basicConfig() |
| 284 | |
| 285 | parser = get_arg_parser() |
| 286 | opts = parser.parse_args(argv) |
| 287 | opts.Freeze() |
| 288 | |
| 289 | input_dir = resolve_path(opts.clippy_json_dir) |
| 290 | output_path = resolve_path(opts.output) |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 291 | git_repo = opts.git_repo_path |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 292 | |
Ryan Beltran | 923a131 | 2021-07-30 00:28:13 +0000 | [diff] [blame] | 293 | diagnostics = filter_diagnostics(parse_files(input_dir, git_repo)) |
Ryan Beltran | cfc5c36 | 2021-03-02 18:36:18 +0000 | [diff] [blame] | 294 | with open(output_path, 'w', encoding='utf-8') as output_file: |
| 295 | output_file.writelines(f'{diag}\n' for diag in diagnostics) |