blob: 312137c9cf007e36d36fee8127a1669d61014d09 [file] [log] [blame]
George Burgess IV853d65b2020-02-25 13:13:15 -08001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs clang-tidy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10# From an implementation perspective, it's good to note that this script
11# cooperates with the toolchain's compiler wrapper. In particular,
12# ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go.
13#
14# When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX)
15# is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler.
16# This clang-tidy invocation will result in a few files being dumped to
17# |LINT_BASE| (below):
18# - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents
19# clang-tidy's diagnostics for the file the compiler was asked to build
20# - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file
21# was generated, including clang-tidy's exit code, stdout, etc. See
22# |InvocationMetadata| below.
23#
24# As one might expect, the compiler wrapper writes the JSON file only after
25# clang-tidy is done executing.
26#
27# This directory might contain other files, as well; these are ignored by this
28# script.
29
30import bisect
31import json
Chris McDonald59650c32021-07-20 15:29:28 -060032import logging
George Burgess IV853d65b2020-02-25 13:13:15 -080033import multiprocessing
34import os
35from pathlib import Path
36import re
37import subprocess
38import sys
39import tempfile
40import traceback
Mike Frysinger807d8282022-04-28 22:45:17 -040041from typing import (
42 Any,
43 Dict,
44 Iterable,
45 List,
46 NamedTuple,
47 Optional,
48 Set,
49 Tuple,
50 Union,
51)
George Burgess IV853d65b2020-02-25 13:13:15 -080052
53import yaml # pylint: disable=import-error
Mike Frysinger06a51c82021-04-06 11:39:17 -040054
55from chromite.lib import build_target_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080056from chromite.lib import commandline
57from chromite.lib import cros_build_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080058from chromite.lib import osutils
59from chromite.lib import portage_util
60from chromite.lib import workon_helper
61
George Burgess IV853d65b2020-02-25 13:13:15 -080062
63# The directory under which the compiler wrapper stores clang-tidy reports.
64LINT_BASE = Path('/tmp/linting_output/clang-tidy')
65
66
67class TidyReplacement(NamedTuple):
68 """Represents a replacement emitted by clang-tidy.
69
70 File path is omitted, since these are intended to be associated with
71 TidyDiagnostics with identical paths.
72 """
73 new_text: str
74 start_line: int
75 end_line: int
76 start_char: int
77 end_char: int
78
79
80class TidyExpandedFrom(NamedTuple):
81 """Represents a macro expansion.
82
83 When a diagnostic is inside of a macro expansion, clang-tidy emits
84 information about where said macro was expanded from. |TidyDiagnostic|s will
85 have one |TidyExpandedFrom| for each level of this expansion.
86 """
87 file_path: Path
88 line_number: int
89
90 def to_dict(self) -> Dict[str, Any]:
91 """Converts this |TidyExpandedFrom| to a dict serializeable as JSON."""
92 return {
93 'file_path': self.file_path.as_posix(),
94 'line_number': self.line_number,
95 }
96
97
98class Error(Exception):
99 """Base error class for tricium-clang-tidy."""
100
101
102class ClangTidyParseError(Error):
103 """Raised when clang-tidy parsing jobs fail."""
104
105 def __init__(self, failed_jobs: int, total_jobs: int):
106 super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed')
107 self.failed_jobs = failed_jobs
108 self.total_jobs = total_jobs
109
110
111class TidyDiagnostic(NamedTuple):
112 """A diagnostic emitted by clang-tidy.
113
114 Note that we shove these in a set for cheap deduplication, and we sort based
115 on the natural element order here. Sorting is mostly just for
116 deterministic/pretty output.
117 """
118 file_path: Path
119 line_number: int
120 diag_name: str
121 message: str
122 replacements: Tuple[TidyReplacement]
123 expansion_locs: Tuple[TidyExpandedFrom]
124
125 def normalize_paths_to(self, where: str) -> 'TidyDiagnostic':
126 """Creates a new TidyDiagnostic with all paths relative to |where|."""
127 return self._replace(
128 # Use relpath because Path.relative_to requires that `self` is rooted
129 # at `where`.
130 file_path=Path(os.path.relpath(self.file_path, where)),
131 expansion_locs=tuple(
132 x._replace(file_path=Path(os.path.relpath(x.file_path, where)))
133 for x in self.expansion_locs))
134
135 def to_dict(self) -> Dict[str, Any]:
136 """Converts this |TidyDiagnostic| to a dict serializeable as JSON."""
137 return {
138 'file_path': self.file_path.as_posix(),
139 'line_number': self.line_number,
140 'diag_name': self.diag_name,
141 'message': self.message,
142 'replacements': [x._asdict() for x in self.replacements],
143 'expansion_locs': [x.to_dict() for x in self.expansion_locs],
144 }
145
146
147class ClangTidySchemaError(Error):
148 """Raised when we encounter malformed YAML."""
149
150 def __init__(self, err_msg: str):
151 super().__init__(err_msg)
152 self.err_msg = err_msg
153
154
155class LineOffsetMap:
156 """Convenient API to turn offsets in a file into line numbers."""
157
158 def __init__(self, newline_locations: Iterable[int]):
159 line_starts = [x + 1 for x in newline_locations]
160 # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit
161 # complicated around the first and last lines of a file. Adding boundaries
162 # here removes some complexity from those implementations.
163 line_starts.append(0)
164 line_starts.append(sys.maxsize)
165 line_starts.sort()
166
167 assert line_starts[0] == 0, line_starts[0]
168 assert line_starts[1] != 0, line_starts[1]
169 assert line_starts[-2] < sys.maxsize, line_starts[-2]
170 assert line_starts[-1] == sys.maxsize, line_starts[-1]
171
172 self._line_starts = line_starts
173
174 def get_line_number(self, char_number: int) -> int:
175 """Given a char offset into a file, returns its line number."""
176 assert 0 <= char_number < sys.maxsize, char_number
177 return bisect.bisect_right(self._line_starts, char_number)
178
179 def get_line_offset(self, char_number: int) -> int:
180 """Given a char offset into a file, returns its column number."""
181 assert 0 <= char_number < sys.maxsize, char_number
182 line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1
183 return char_number - self._line_starts[line_start_index]
184
185 @staticmethod
186 def for_text(data: str) -> 'LineOffsetMap':
187 """Creates a LineOffsetMap for the given string."""
188 return LineOffsetMap(m.start() for m in re.finditer(r'\n', data))
189
190
191def parse_tidy_fixes_file(tidy_invocation_dir: Path,
192 yaml_data: Any) -> Iterable[TidyDiagnostic]:
193 """Parses a clang-tidy YAML file.
194
195 Args:
196 yaml_data: The parsed YAML data from clang-tidy's fixits file.
197 tidy_invocation_dir: The directory clang-tidy was run in.
198
199 Returns:
200 A generator of |TidyDiagnostic|s.
201 """
202 assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir
203
204 if yaml_data is None:
205 return
206
207 # A cache of file_path => LineOffsetMap so we only need to load offsets once
208 # per file per |parse_tidy_fixes_file| invocation.
209 cached_line_offsets = {}
210
211 def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap:
212 """Gets a LineOffsetMap for the given |file_path|."""
213 assert not file_path or file_path.is_absolute(), file_path
214
215 if file_path in cached_line_offsets:
216 return cached_line_offsets[file_path]
217
218 # Sometimes tidy will give us empty file names; they don't map to any file,
219 # and are generally issues it has with CFLAGS, etc. File offsets don't
220 # matter in those, so use an empty map.
221 if file_path:
222 offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8'))
223 else:
224 offsets = LineOffsetMap(())
225 cached_line_offsets[file_path] = offsets
226 return offsets
227
228 # Rarely (e.g., in the case of missing |#include|s, clang will emit relative
229 # file paths for diagnostics. This fixes those.
230 def makeabs(file_path: str) -> Path:
231 """Resolves a |file_path| emitted by clang-tidy to an absolute path."""
232 if not file_path:
233 return None
234 path = Path(file_path)
235 if not path.is_absolute():
236 path = tidy_invocation_dir / path
237 return path.resolve()
238
239 try:
240 for diag in yaml_data['Diagnostics']:
241 message = diag['DiagnosticMessage']
242 file_path = message['FilePath']
243
244 absolute_file_path = makeabs(file_path)
245 line_offsets = get_line_offsets(absolute_file_path)
246
247 replacements = []
248 for replacement in message.get('Replacements', ()):
249 replacement_file_path = makeabs(replacement['FilePath'])
250
251 # FIXME(gbiv): This happens in practice with things like
252 # hicpp-member-init. Supporting it should be simple, but I'd like to
253 # get the basics running first.
254 if replacement_file_path != absolute_file_path:
255 logging.warning(
256 "Replacement %r wasn't in original file %r (diag: %r)",
257 replacement_file_path, file_path, diag)
258 continue
259
260 start_offset = replacement['Offset']
261 end_offset = start_offset + replacement['Length']
262 replacements.append(
263 TidyReplacement(
264 new_text=replacement['ReplacementText'],
265 start_line=line_offsets.get_line_number(start_offset),
266 end_line=line_offsets.get_line_number(end_offset),
267 start_char=line_offsets.get_line_offset(start_offset),
268 end_char=line_offsets.get_line_offset(end_offset),
269 ))
270
271 expansion_locs = []
272 for note in diag.get('Notes', ()):
273 if not note['Message'].startswith('expanded from macro '):
274 continue
275
276 absolute_note_path = makeabs(note['FilePath'])
277 note_offsets = get_line_offsets(absolute_note_path)
278 expansion_locs.append(
279 TidyExpandedFrom(
280 file_path=absolute_note_path,
281 line_number=note_offsets.get_line_number(note['FileOffset']),
282 ))
283
284 yield TidyDiagnostic(
285 diag_name=diag['DiagnosticName'],
286 message=message['Message'],
287 file_path=absolute_file_path,
288 line_number=line_offsets.get_line_number(message['FileOffset']),
289 replacements=tuple(replacements),
290 expansion_locs=tuple(expansion_locs),
291 )
292 except KeyError as k:
293 key_name = k.args[0]
294 raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}')
295
296
297# Represents metadata about a clang-tidy invocation.
298class InvocationMetadata(NamedTuple):
299 """Metadata describing a singular invocation of clang-tidy."""
300 exit_code: int
301 invocation: List[str]
302 lint_target: str
303 stdstreams: str
304 wd: str
305
306
307class ExceptionData:
308 """Info about an exception that can be sent across processes."""
309
310 def __init__(self):
311 """Builds an instance; only intended to be called from `except` blocks."""
312 self._str = traceback.format_exc()
313
314 def __str__(self):
315 return self._str
316
317
318def parse_tidy_invocation(
319 json_file: Path,
320) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]:
321 """Parses a clang-tidy invocation result based on a JSON file.
322
323 This is intended to be run in a separate process, which Exceptions and
324 locking and such work notoriously poorly over, so it's never intended to
325 |raise| (except under a KeyboardInterrupt or similar).
326
327 Args:
328 json_file: The JSON invocation metadata file to parse.
329
330 Returns:
331 An |ExceptionData| instance on failure. On success, it returns a
332 (InvocationMetadata, [TidyLint]).
333 """
334 try:
335 assert json_file.suffix == '.json', json_file
336
337 with json_file.open(encoding='utf-8') as f:
338 raw_meta = json.load(f)
339
340 meta = InvocationMetadata(
341 exit_code=raw_meta['exit_code'],
342 invocation=[raw_meta['executable']] + raw_meta['args'],
343 lint_target=raw_meta['lint_target'],
344 stdstreams=raw_meta['stdstreams'],
345 wd=raw_meta['wd'],
346 )
347
348 raw_crash_output = raw_meta.get('crash_output')
349 if raw_crash_output:
350 crash_reproducer_path = raw_crash_output['crash_reproducer_path']
351 output = raw_crash_output['stdstreams']
352 raise RuntimeError(f"""\
353Clang-tidy apparently crashed; dumping lots of invocation info:
354## Tidy JSON file target: {json_file}
355## Invocation: {meta.invocation}
356## Target: {meta.lint_target}
357## Crash reproducer is at: {crash_reproducer_path}
358## Output producing reproducer:
359{output}
360## Output from the crashing invocation:
361{meta.stdstreams}
362""")
363
364 yaml_file = json_file.with_suffix('.yaml')
365 # If this happened, clang-tidy was probably killed. Dump output as part of
366 # the exception so it's easier to reason about what happened.
367 if not yaml_file.exists():
368 raise RuntimeError("clang-tidy didn't produce an output file for "
369 f'{json_file}. Output:\n{meta.stdstreams}')
370
371 with yaml_file.open('rb') as f:
George Engelbrecht77e0bf82021-05-18 13:28:32 -0600372 yaml_data = yaml.safe_load(f)
George Burgess IV853d65b2020-02-25 13:13:15 -0800373 return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data))
374 except Exception:
375 return ExceptionData()
376
377
378def generate_lints(board: str, ebuild_path: str) -> Path:
379 """Collects the lints for a given package on a given board.
380
381 Args:
382 board: the board to collect lints for.
383 ebuild_path: the path to the ebuild to collect lints for.
384
385 Returns:
386 The path to a tmpdir that all of the lint YAML files (if any) will be in.
387 This will also be populated by JSON files containing InvocationMetadata.
388 The generation of this is handled by our compiler wrapper.
389 """
390 logging.info('Running lints for %r on board %r', ebuild_path, board)
391
392 osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True)
393 osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True)
394
395 # FIXME(gbiv): |test| might be better here?
396 result = cros_build_lib.run(
397 [f'ebuild-{board}', ebuild_path, 'clean', 'compile'],
398 check=False,
399 print_cmd=True,
400 extra_env={'WITH_TIDY': 'tricium'},
401 capture_output=True,
402 encoding='utf-8',
403 errors='replace',
404 )
405
406 if result.returncode:
407 status = f'failed with code {result.returncode}; output:\n{result.stdout}'
408 log_fn = logging.warning
409 else:
410 status = 'succeeded'
411 log_fn = logging.info
412
413 log_fn('Running |ebuild| on %s %s', ebuild_path, status)
414 lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy')
415 osutils.CopyDirContents(LINT_BASE, lint_tmpdir)
416 return Path(lint_tmpdir)
417
418
419def collect_lints(lint_tmpdir: Path,
420 yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]:
421 """Collects the lints for a given directory filled with linting artifacts."""
422 json_files = list(lint_tmpdir.glob('*.json'))
423 pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files)
424
425 parses_failed = 0
426 all_complaints = set()
427 for path, parse in zip(json_files, pending_parses):
428 if isinstance(parse, ExceptionData):
429 parses_failed += 1
430 logging.error('Parsing %r failed with an exception\n%s', path, parse)
431 continue
432
433 meta, complaints = parse
434 if meta.exit_code:
435 logging.warning(
436 'Invoking clang-tidy on %r with flags %r exited with code %d; '
437 'output:\n%s',
438 meta.lint_target,
439 meta.invocation,
440 meta.exit_code,
441 meta.stdstreams,
442 )
443
444 all_complaints.update(complaints)
445
446 if parses_failed:
447 raise ClangTidyParseError(parses_failed, len(json_files))
448
449 return all_complaints
450
451
452def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]):
453 """Sets up to run clang-tidy on the given ebuilds for the given board."""
454 packages = [x.package for x in ebuild_list]
455 logging.info('Setting up to lint %r', packages)
456
Mike Frysinger06a51c82021-04-06 11:39:17 -0400457 workon = workon_helper.WorkonHelper(
458 build_target_lib.get_default_sysroot_path(board))
George Burgess IV853d65b2020-02-25 13:13:15 -0800459 workon.StopWorkingOnPackages(packages=[], use_all=True)
460 workon.StartWorkingOnPackages(packages)
461
462 # We're going to be hacking with |ebuild| later on, so having all
463 # dependencies in place is necessary so one |ebuild| won't stomp on another.
464 cmd = [
465 f'emerge-{board}',
466 '--onlydeps',
467 # Since each `emerge` may eat up to `ncpu` cores, limit the maximum
468 # concurrency we can get here to (arbitrarily) 8 jobs. Having
469 # `configure`s and such run in parallel is nice.
470 f'-j{min(8, multiprocessing.cpu_count())}',
471 ]
472 cmd += packages
473 result = cros_build_lib.run(cmd, print_cmd=True, check=False)
474 if result.returncode:
475 logging.error('Setup failed with exit code %d; some lints may fail.',
476 result.returncode)
477
478
479def run_tidy(board: str, ebuild_list: List[portage_util.EBuild],
480 keep_dirs: bool,
481 parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]:
482 """Runs clang-tidy on the given ebuilds for the given board.
483
484 Returns the set of |TidyDiagnostic|s produced by doing so.
485 """
486 # Since we rely on build actions _actually_ running, we can't live with a
487 # cache.
488 osutils.RmDir(
Mike Frysinger06a51c82021-04-06 11:39:17 -0400489 Path(build_target_lib.get_default_sysroot_path(
490 board)) / 'var' / 'cache' / 'portage',
George Burgess IV853d65b2020-02-25 13:13:15 -0800491 ignore_missing=True,
492 sudo=True,
493 )
494
495 results = set()
496 # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while
George Engelbrecht77e0bf82021-05-18 13:28:32 -0600497 # holding the GIL to |yaml.safe_load| on my otherwise-idle dev box.
498 # |yaml_pool| lets us do this in parallel.
George Burgess IV853d65b2020-02-25 13:13:15 -0800499 with multiprocessing.pool.Pool() as yaml_pool:
500 for ebuild in ebuild_list:
501 lint_tmpdir = generate_lints(board, ebuild.ebuild_path)
502 try:
503 results |= collect_lints(lint_tmpdir, yaml_pool)
504 except ClangTidyParseError:
505 if not parse_errors_are_nonfatal:
506 raise
507 logging.exception('Working on %r', ebuild)
508 finally:
509 if keep_dirs:
510 logging.info('Lints for %r are in %r', ebuild.ebuild_path,
511 lint_tmpdir)
512 else:
513 osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True)
514 return results
515
516
517def resolve_package_ebuilds(board: str,
518 package_names: Iterable[str]) -> List[str]:
519 """Figures out ebuild paths for the given package names."""
520
521 def resolve_package(package_name_or_ebuild):
522 """Resolves a single package name an ebuild path."""
523 if package_name_or_ebuild.endswith('.ebuild'):
524 return package_name_or_ebuild
525 return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild],
526 check=True,
527 stdout=subprocess.PIPE,
528 encoding='utf-8').stdout.strip()
529
530 # Resolving ebuilds takes time. If we get more than one (like when I'm tesing
531 # on 50 of them), parallelism speeds things up quite a bit.
532 with multiprocessing.pool.ThreadPool() as pool:
533 return pool.map(resolve_package, package_names)
534
535
536def filter_tidy_lints(only_files: Optional[Set[Path]],
537 git_repo_base: Optional[Path],
538 diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]:
539 """Transforms and filters the given TidyDiagnostics.
540
541 Args:
542 only_files: a set of file paths, or None; if this is not None, only
543 |TidyDiagnostic|s in these files will be kept.
544 git_repo_base: if not None, only files in the given directory will be kept.
545 All paths of the returned diagnostics will be made relative to
546 |git_repo_base|.
547 diags: diagnostics to transform/filter.
548
549 Returns:
550 A sorted list of |TidyDiagnostic|s.
551 """
552 result_diags = []
553 total_diags = 0
554
555 for diag in diags:
556 total_diags += 1
557
558 if not diag.file_path:
559 # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're
560 # redefining |FOO| with a different value") in 'virtual' files; these
561 # receive no name in clang.
562 logging.info('Dropping diagnostic %r, since it has no associated file',
563 diag)
564 continue
565
566 file_path = Path(diag.file_path)
567 if only_files and file_path not in only_files:
568 continue
569
570 if git_repo_base:
571 if git_repo_base not in file_path.parents:
572 continue
573 diag = diag.normalize_paths_to(git_repo_base)
574
575 result_diags.append(diag)
576
577 logging.info('Dropped %d/%d diags', total_diags - len(result_diags),
578 total_diags)
579
580 result_diags.sort()
581 return result_diags
582
583
584def get_parser() -> commandline.ArgumentParser:
585 """Creates an argument parser for this script."""
586 parser = commandline.ArgumentParser(description=__doc__)
587 parser.add_argument(
588 '--output', required=True, type='path', help='File to write results to.')
589 parser.add_argument(
590 '--git-repo-base',
591 type='path',
592 help="Base directory of the git repo we're looking at. If specified, "
593 'only diagnostics in files in this directory will be emitted. All '
594 'diagnostic file paths will be made relative to this directory.')
595 parser.add_argument('--board', required=True, help='Board to run under.')
596 parser.add_argument(
597 '--package',
598 action='append',
599 required=True,
600 help='Package(s) to build and lint. Required.')
601 parser.add_argument(
602 '--keep-lint-dirs',
603 action='store_true',
604 help='Keep directories with tidy lints around; meant primarily for '
605 'debugging.')
606 parser.add_argument(
607 '--nonfatal-parse-errors',
608 action='store_true',
609 help="Keep going even if clang-tidy's output is impossible to parse.")
610 parser.add_argument(
611 'file',
612 nargs='*',
613 type='path',
614 help='File(s) to output lints for. If none are specified, this tool '
615 'outputs all lints that clang-tidy emits after applying filtering '
616 'from |--git-repo-base|, if applicable.')
617 return parser
618
619
620def main(argv: List[str]) -> None:
621 cros_build_lib.AssertInsideChroot()
622 parser = get_parser()
623 opts = parser.parse_args(argv)
624 opts.Freeze()
625
626 only_files = {Path(f).resolve() for f in opts.file}
627
628 git_repo_base = opts.git_repo_base
629 if git_repo_base:
630 git_repo_base = Path(opts.git_repo_base)
631 if not (git_repo_base / '.git').exists():
632 # This script doesn't strictly care if there's a .git dir there; more of
633 # a smoke check.
634 parser.error(f'Given git repo base ({git_repo_base}) has no .git dir')
635
636 package_ebuilds = [
637 portage_util.EBuild(x)
638 for x in resolve_package_ebuilds(opts.board, opts.package)
639 ]
640
641 setup_tidy(opts.board, package_ebuilds)
642 lints = filter_tidy_lints(
643 only_files,
644 git_repo_base,
645 diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs,
646 opts.nonfatal_parse_errors))
647
648 osutils.WriteFile(
649 opts.output,
650 json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}),
651 atomic=True)