blob: 921f83ea8e08840d51636866f6242f40cf506aba [file] [log] [blame]
Ryan Beltrancfc5c362021-03-02 18:36:18 +00001# Copyright 2021 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs cargo clippy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10import json
Chris McDonald59650c32021-07-20 15:29:28 -060011import logging
Ryan Beltrancfc5c362021-03-02 18:36:18 +000012import os
13from pathlib import Path
Ryan Beltrana4b45a32021-08-11 08:26:38 +000014import re
Chris McDonald59650c32021-07-20 15:29:28 -060015from typing import Any, Dict, Iterable, List, NamedTuple, Text
Ryan Beltrancfc5c362021-03-02 18:36:18 +000016
17from chromite.lib import commandline
18from chromite.lib import cros_build_lib
Ryan Beltrancfc5c362021-03-02 18:36:18 +000019
Ryan Beltrancfc5c362021-03-02 18:36:18 +000020
21class Error(Exception):
Alex Klein1699fab2022-09-08 08:46:06 -060022 """Base error class for tricium-cargo-clippy."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000023
24
Ryan Beltranc0fa16a2021-08-05 20:45:14 +000025class CargoClippyPackagePathError(Error):
Alex Klein1699fab2022-09-08 08:46:06 -060026 """Raised when no Package Path is provided."""
Ryan Beltranc0fa16a2021-08-05 20:45:14 +000027
Alex Klein1699fab2022-09-08 08:46:06 -060028 def __init__(self, source: Text):
29 super().__init__(f"{source} does not start with a package path")
30 self.source = source
31
Ryan Beltranc0fa16a2021-08-05 20:45:14 +000032
Ryan Beltrancfc5c362021-03-02 18:36:18 +000033class CargoClippyJSONError(Error):
Alex Klein1699fab2022-09-08 08:46:06 -060034 """Raised when cargo-clippy parsing jobs are not proper JSON."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000035
Alex Klein1699fab2022-09-08 08:46:06 -060036 def __init__(self, source: Text, line_num: int):
37 super().__init__(f"{source}:{line_num}: is not valid JSON")
38 self.source = source
39 self.line_num = line_num
Ryan Beltrancfc5c362021-03-02 18:36:18 +000040
41
42class CargoClippyReasonError(Error):
Alex Klein1699fab2022-09-08 08:46:06 -060043 """Raised when cargo-clippy parsing jobs don't provide a "reason" field."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000044
Alex Klein1699fab2022-09-08 08:46:06 -060045 def __init__(self, source: Text, line_num: int):
46 super().__init__(f"{source}:{line_num}: is missing its reason")
47 self.source = source
48 self.line_num = line_num
Ryan Beltrancfc5c362021-03-02 18:36:18 +000049
50
51class CargoClippyFieldError(Error):
Alex Klein1699fab2022-09-08 08:46:06 -060052 """Raised when cargo-clippy parsing jobs fail to determine a field."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000053
Alex Klein1699fab2022-09-08 08:46:06 -060054 def __init__(self, source: Text, line_num: int, field: Text):
55 super().__init__(
56 f"{source}:{line_num}: {field} could not be parsed from original json"
57 )
58 self.source = source
59 self.line_num = line_num
60 self.field = field
Ryan Beltrancfc5c362021-03-02 18:36:18 +000061
62
63def resolve_path(file_path: Text) -> Text:
Alex Klein1699fab2022-09-08 08:46:06 -060064 return str(Path(file_path).resolve())
Ryan Beltrancfc5c362021-03-02 18:36:18 +000065
66
67class CodeLocation(NamedTuple):
Alex Klein1699fab2022-09-08 08:46:06 -060068 """Holds the location a ClippyDiagnostic Finding."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000069
Alex Klein1699fab2022-09-08 08:46:06 -060070 file_path: Text
71 line_start: int
72 line_end: int
73 column_start: int
74 column_end: int
75
76 def to_dict(self):
77 return {**self._asdict(), "file_path": self.file_path}
Ryan Beltrancfc5c362021-03-02 18:36:18 +000078
79
80class ClippyDiagnostic(NamedTuple):
Alex Klein1699fab2022-09-08 08:46:06 -060081 """Holds information about a compiler message from Clippy."""
Ryan Beltrancfc5c362021-03-02 18:36:18 +000082
Alex Klein1699fab2022-09-08 08:46:06 -060083 locations: Iterable["CodeLocation"]
84 level: Text
85 message: Text
86
87 def as_json(self):
88 return json.dumps(
89 {
90 **self._asdict(),
91 "locations": [loc.to_dict() for loc in self.locations],
92 }
93 )
Ryan Beltrancfc5c362021-03-02 18:36:18 +000094
95
Ryan Beltrancfc5c362021-03-02 18:36:18 +000096def parse_locations(
Alex Klein1699fab2022-09-08 08:46:06 -060097 orig_json: Dict[Text, Any], package_path: Text, git_repo: Text
98) -> Iterable["CodeLocation"]:
99 """The code locations associated with this diagnostic as an iter.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 The relevant code location can appear in either the messages[spans] field,
102 which will be used if present, or else child messages each have their own
103 locations specified.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000104
Alex Klein1699fab2022-09-08 08:46:06 -0600105 Args:
106 orig_json: An iterable of clippy entries in original json.
107 package_path: A resolved path to the rust package.
108 git_repo: Base directory for git repo to strip out in diagnostics.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000109
Alex Klein1699fab2022-09-08 08:46:06 -0600110 Yields:
111 A CodeLocation object associated with a relevant span.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000112
Alex Klein1699fab2022-09-08 08:46:06 -0600113 Raises:
114 CargoClippyFieldError: Parsing failed to determine any code locations.
115 """
116 spans = orig_json.get("message", {}).get("spans", [])
117 children = orig_json.get("message", {}).get("children", [])
118 for child in children:
119 spans = spans + child.get("spans", [])
120 locations = set()
121 for span in spans:
122 file_path = os.path.join(package_path, span.get("file_name"))
123 if git_repo and file_path.startswith(f"{git_repo}/"):
124 file_path = file_path[len(git_repo) + 1 :]
125 else:
126 # Remove ebuild work directories from prefix
127 # Such as: "**/<package>-9999/work/<package>-9999/"
128 # or: "**/<package>-0.24.52-r9/work/<package>-0.24.52/"
129 file_path = re.sub(
130 r"(.*/)?([^/]+)-[^/]+/work/[^/]+/+", "", file_path
131 )
132 location = CodeLocation(
133 file_path=file_path,
134 line_start=span.get("line_start"),
135 line_end=span.get("line_end"),
136 column_start=span.get("column_start"),
137 column_end=span.get("column_end"),
138 )
139 if location not in locations:
140 locations.add(location)
141 yield location
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000142
143
144def parse_level(src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text:
Alex Klein1699fab2022-09-08 08:46:06 -0600145 """The level (error or warning) associated with this diagnostic.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000146
Alex Klein1699fab2022-09-08 08:46:06 -0600147 Args:
148 src: Name of the file orig_json was found in.
149 src_line: Line number where orig_json was found.
150 orig_json: An iterable of clippy entries in original json.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000151
Alex Klein1699fab2022-09-08 08:46:06 -0600152 Returns:
153 The level of the diagnostic as a string (either error or warning).
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000154
Alex Klein1699fab2022-09-08 08:46:06 -0600155 Raises:
156 CargoClippyFieldError: Parsing failed to determine the level.
157 """
158 level = orig_json.get("level")
159 if not level:
160 level = orig_json.get("message", {}).get("level")
161 if not level:
162 raise CargoClippyFieldError(src, src_line, "level")
163 return level
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000164
165
Alex Klein1699fab2022-09-08 08:46:06 -0600166def parse_message(src: Text, src_line: int, orig_json: Dict[Text, Any]) -> Text:
167 """The formatted linter message for this diagnostic.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000168
Alex Klein1699fab2022-09-08 08:46:06 -0600169 Args:
170 src: Name of the file orig_json was found in.
171 src_line: Line number where orig_json was found.
172 orig_json: An iterable of clippy entries in original json.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000173
Alex Klein1699fab2022-09-08 08:46:06 -0600174 Returns:
175 The rendered message of the diagnostic.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000176
Alex Klein1699fab2022-09-08 08:46:06 -0600177 Raises:
178 CargoClippyFieldError: Parsing failed to determine the message.
179 """
180 message = orig_json.get("message", {}).get("rendered")
181 if message is None:
182 raise CargoClippyFieldError(src, src_line, "message")
183 return message
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000184
185
186def parse_diagnostics(
Alex Klein1699fab2022-09-08 08:46:06 -0600187 src: Text, orig_jsons: Iterable[Text], git_repo: Text
188) -> ClippyDiagnostic:
189 """Parses original JSON to find the fields of a Clippy Diagnostic.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000190
Alex Klein1699fab2022-09-08 08:46:06 -0600191 Args:
192 src: Name of the file orig_json was found in.
193 orig_jsons: An iterable of clippy entries in original json.
194 git_repo: Base directory for git repo to strip out in diagnostics.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000195
Alex Klein1699fab2022-09-08 08:46:06 -0600196 Yields:
197 A ClippyDiagnostic for orig_json.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000198
Alex Klein1699fab2022-09-08 08:46:06 -0600199 Raises:
200 CargoClippyJSONError: if a diagnostic is not valid JSON.
201 CargoClippyReasonError: if a diagnostic is missing a "reason" field.
202 CargoClippyFieldError: if a field cannot be determined while parsing.
203 """
204 for src_line, orig_json in enumerate(orig_jsons):
205 try:
206 line_json = json.loads(orig_json)
207 except json.decoder.JSONDecodeError:
208 json_error = CargoClippyJSONError(src, src_line)
209 logging.error(json_error)
210 raise json_error
Ryan Beltranc0fa16a2021-08-05 20:45:14 +0000211
Alex Klein1699fab2022-09-08 08:46:06 -0600212 # We pass the path to the package in a special JSON on the first line
213 if src_line == 0:
214 package_path = line_json.get("package_path")
215 if not package_path:
216 raise CargoClippyPackagePathError(src)
217 package_path = resolve_path(package_path)
218 continue
Ryan Beltranc0fa16a2021-08-05 20:45:14 +0000219
Alex Klein1699fab2022-09-08 08:46:06 -0600220 # Clippy outputs several types of logs, as distinguished by the "reason"
221 # field, but we only want to process "compiler-message" logs.
222 reason = line_json.get("reason")
223 if reason is None:
224 reason_error = CargoClippyReasonError(src, src_line)
225 logging.error(reason_error)
226 raise reason_error
227 if reason != "compiler-message":
228 continue
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000229
Alex Klein1699fab2022-09-08 08:46:06 -0600230 locations = parse_locations(line_json, package_path, git_repo)
231 level = parse_level(src, src_line, line_json)
232 message = parse_message(src, src_line, line_json)
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000233
Alex Klein1699fab2022-09-08 08:46:06 -0600234 # TODO(ryanbeltran): Export suggested replacements
235 yield ClippyDiagnostic(locations, level, message)
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000236
237
Ryan Beltran923a1312021-07-30 00:28:13 +0000238def parse_files(input_dir: Text, git_repo: Text) -> Iterable[ClippyDiagnostic]:
Alex Klein1699fab2022-09-08 08:46:06 -0600239 """Gets all compiler-message lints from all the input files in input_dir.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000240
Alex Klein1699fab2022-09-08 08:46:06 -0600241 Args:
242 input_dir: path to directory to scan for files
243 git_repo: Base directory for git repo to strip out in diagnostics.
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000244
Alex Klein1699fab2022-09-08 08:46:06 -0600245 Yields:
246 Clippy Diagnostics objects found in files in the input directory
247 """
248 for root_path, _, file_names in os.walk(input_dir):
249 for file_name in file_names:
250 file_path = os.path.join(root_path, file_name)
251 with open(file_path, encoding="utf-8") as clippy_file:
252 yield from parse_diagnostics(file_path, clippy_file, git_repo)
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000253
254
255def filter_diagnostics(
Alex Klein1699fab2022-09-08 08:46:06 -0600256 diags: Iterable[ClippyDiagnostic],
257) -> Iterable[ClippyDiagnostic]:
258 """Filters diagnostics and validates schemas."""
259 for diag in diags:
260 # ignore redundant messages: "aborting due to previous error..."
261 if "aborting due to previous error" in diag.message:
262 continue
263 # findings with no location are never useful
264 if not diag.locations:
265 continue
266 yield diag
Ryan Beltran43a00662021-05-17 16:55:24 +0000267
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000268
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000269def get_arg_parser() -> commandline.ArgumentParser:
Alex Klein1699fab2022-09-08 08:46:06 -0600270 """Creates an argument parser for this script."""
271 parser = commandline.ArgumentParser(description=__doc__)
272 parser.add_argument(
273 "--output", required=True, type="path", help="File to write results to."
274 )
275 parser.add_argument(
276 "--clippy-json-dir",
277 type="path",
278 help="Directory where clippy outputs were previously written to.",
279 )
280 parser.add_argument(
281 "--git-repo-path",
282 type="path",
283 default="",
284 help="Base directory for git repo to strip out in diagnostics.",
285 )
286 return parser
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000287
288
289def main(argv: List[str]) -> None:
Alex Klein1699fab2022-09-08 08:46:06 -0600290 cros_build_lib.AssertInsideChroot()
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000291
Alex Klein1699fab2022-09-08 08:46:06 -0600292 logging.basicConfig()
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000293
Alex Klein1699fab2022-09-08 08:46:06 -0600294 parser = get_arg_parser()
295 opts = parser.parse_args(argv)
296 opts.Freeze()
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000297
Alex Klein1699fab2022-09-08 08:46:06 -0600298 input_dir = resolve_path(opts.clippy_json_dir)
299 output_path = resolve_path(opts.output)
300 git_repo = opts.git_repo_path
Ryan Beltrancfc5c362021-03-02 18:36:18 +0000301
Alex Klein1699fab2022-09-08 08:46:06 -0600302 diagnostics = filter_diagnostics(parse_files(input_dir, git_repo))
303 with open(output_path, "w", encoding="utf-8") as output_file:
304 output_file.writelines(f"{diag}\n" for diag in diagnostics)