blob: 4e3fa4e48fb0836d3ef0cffc31709302f81f09e5 [file] [log] [blame]
George Burgess IV853d65b2020-02-25 13:13:15 -08001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs clang-tidy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10# From an implementation perspective, it's good to note that this script
11# cooperates with the toolchain's compiler wrapper. In particular,
12# ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go.
13#
14# When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX)
15# is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler.
16# This clang-tidy invocation will result in a few files being dumped to
17# |LINT_BASE| (below):
18# - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents
19# clang-tidy's diagnostics for the file the compiler was asked to build
20# - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file
21# was generated, including clang-tidy's exit code, stdout, etc. See
22# |InvocationMetadata| below.
23#
24# As one might expect, the compiler wrapper writes the JSON file only after
25# clang-tidy is done executing.
26#
27# This directory might contain other files, as well; these are ignored by this
28# script.
29
30import bisect
31import json
32import multiprocessing
33import os
34from pathlib import Path
35import re
36import subprocess
37import sys
38import tempfile
39import traceback
40from typing import (Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple,
41 Union)
42
43import yaml # pylint: disable=import-error
Mike Frysinger06a51c82021-04-06 11:39:17 -040044
45from chromite.lib import build_target_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080046from chromite.lib import commandline
47from chromite.lib import cros_build_lib
48from chromite.lib import cros_logging as logging
49from chromite.lib import osutils
50from chromite.lib import portage_util
51from chromite.lib import workon_helper
52
53assert sys.version_info >= (3, 6), 'This module requires Python 3.6+'
54
55# The directory under which the compiler wrapper stores clang-tidy reports.
56LINT_BASE = Path('/tmp/linting_output/clang-tidy')
57
58
59class TidyReplacement(NamedTuple):
60 """Represents a replacement emitted by clang-tidy.
61
62 File path is omitted, since these are intended to be associated with
63 TidyDiagnostics with identical paths.
64 """
65 new_text: str
66 start_line: int
67 end_line: int
68 start_char: int
69 end_char: int
70
71
72class TidyExpandedFrom(NamedTuple):
73 """Represents a macro expansion.
74
75 When a diagnostic is inside of a macro expansion, clang-tidy emits
76 information about where said macro was expanded from. |TidyDiagnostic|s will
77 have one |TidyExpandedFrom| for each level of this expansion.
78 """
79 file_path: Path
80 line_number: int
81
82 def to_dict(self) -> Dict[str, Any]:
83 """Converts this |TidyExpandedFrom| to a dict serializeable as JSON."""
84 return {
85 'file_path': self.file_path.as_posix(),
86 'line_number': self.line_number,
87 }
88
89
90class Error(Exception):
91 """Base error class for tricium-clang-tidy."""
92
93
94class ClangTidyParseError(Error):
95 """Raised when clang-tidy parsing jobs fail."""
96
97 def __init__(self, failed_jobs: int, total_jobs: int):
98 super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed')
99 self.failed_jobs = failed_jobs
100 self.total_jobs = total_jobs
101
102
103class TidyDiagnostic(NamedTuple):
104 """A diagnostic emitted by clang-tidy.
105
106 Note that we shove these in a set for cheap deduplication, and we sort based
107 on the natural element order here. Sorting is mostly just for
108 deterministic/pretty output.
109 """
110 file_path: Path
111 line_number: int
112 diag_name: str
113 message: str
114 replacements: Tuple[TidyReplacement]
115 expansion_locs: Tuple[TidyExpandedFrom]
116
117 def normalize_paths_to(self, where: str) -> 'TidyDiagnostic':
118 """Creates a new TidyDiagnostic with all paths relative to |where|."""
119 return self._replace(
120 # Use relpath because Path.relative_to requires that `self` is rooted
121 # at `where`.
122 file_path=Path(os.path.relpath(self.file_path, where)),
123 expansion_locs=tuple(
124 x._replace(file_path=Path(os.path.relpath(x.file_path, where)))
125 for x in self.expansion_locs))
126
127 def to_dict(self) -> Dict[str, Any]:
128 """Converts this |TidyDiagnostic| to a dict serializeable as JSON."""
129 return {
130 'file_path': self.file_path.as_posix(),
131 'line_number': self.line_number,
132 'diag_name': self.diag_name,
133 'message': self.message,
134 'replacements': [x._asdict() for x in self.replacements],
135 'expansion_locs': [x.to_dict() for x in self.expansion_locs],
136 }
137
138
139class ClangTidySchemaError(Error):
140 """Raised when we encounter malformed YAML."""
141
142 def __init__(self, err_msg: str):
143 super().__init__(err_msg)
144 self.err_msg = err_msg
145
146
147class LineOffsetMap:
148 """Convenient API to turn offsets in a file into line numbers."""
149
150 def __init__(self, newline_locations: Iterable[int]):
151 line_starts = [x + 1 for x in newline_locations]
152 # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit
153 # complicated around the first and last lines of a file. Adding boundaries
154 # here removes some complexity from those implementations.
155 line_starts.append(0)
156 line_starts.append(sys.maxsize)
157 line_starts.sort()
158
159 assert line_starts[0] == 0, line_starts[0]
160 assert line_starts[1] != 0, line_starts[1]
161 assert line_starts[-2] < sys.maxsize, line_starts[-2]
162 assert line_starts[-1] == sys.maxsize, line_starts[-1]
163
164 self._line_starts = line_starts
165
166 def get_line_number(self, char_number: int) -> int:
167 """Given a char offset into a file, returns its line number."""
168 assert 0 <= char_number < sys.maxsize, char_number
169 return bisect.bisect_right(self._line_starts, char_number)
170
171 def get_line_offset(self, char_number: int) -> int:
172 """Given a char offset into a file, returns its column number."""
173 assert 0 <= char_number < sys.maxsize, char_number
174 line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1
175 return char_number - self._line_starts[line_start_index]
176
177 @staticmethod
178 def for_text(data: str) -> 'LineOffsetMap':
179 """Creates a LineOffsetMap for the given string."""
180 return LineOffsetMap(m.start() for m in re.finditer(r'\n', data))
181
182
183def parse_tidy_fixes_file(tidy_invocation_dir: Path,
184 yaml_data: Any) -> Iterable[TidyDiagnostic]:
185 """Parses a clang-tidy YAML file.
186
187 Args:
188 yaml_data: The parsed YAML data from clang-tidy's fixits file.
189 tidy_invocation_dir: The directory clang-tidy was run in.
190
191 Returns:
192 A generator of |TidyDiagnostic|s.
193 """
194 assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir
195
196 if yaml_data is None:
197 return
198
199 # A cache of file_path => LineOffsetMap so we only need to load offsets once
200 # per file per |parse_tidy_fixes_file| invocation.
201 cached_line_offsets = {}
202
203 def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap:
204 """Gets a LineOffsetMap for the given |file_path|."""
205 assert not file_path or file_path.is_absolute(), file_path
206
207 if file_path in cached_line_offsets:
208 return cached_line_offsets[file_path]
209
210 # Sometimes tidy will give us empty file names; they don't map to any file,
211 # and are generally issues it has with CFLAGS, etc. File offsets don't
212 # matter in those, so use an empty map.
213 if file_path:
214 offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8'))
215 else:
216 offsets = LineOffsetMap(())
217 cached_line_offsets[file_path] = offsets
218 return offsets
219
220 # Rarely (e.g., in the case of missing |#include|s, clang will emit relative
221 # file paths for diagnostics. This fixes those.
222 def makeabs(file_path: str) -> Path:
223 """Resolves a |file_path| emitted by clang-tidy to an absolute path."""
224 if not file_path:
225 return None
226 path = Path(file_path)
227 if not path.is_absolute():
228 path = tidy_invocation_dir / path
229 return path.resolve()
230
231 try:
232 for diag in yaml_data['Diagnostics']:
233 message = diag['DiagnosticMessage']
234 file_path = message['FilePath']
235
236 absolute_file_path = makeabs(file_path)
237 line_offsets = get_line_offsets(absolute_file_path)
238
239 replacements = []
240 for replacement in message.get('Replacements', ()):
241 replacement_file_path = makeabs(replacement['FilePath'])
242
243 # FIXME(gbiv): This happens in practice with things like
244 # hicpp-member-init. Supporting it should be simple, but I'd like to
245 # get the basics running first.
246 if replacement_file_path != absolute_file_path:
247 logging.warning(
248 "Replacement %r wasn't in original file %r (diag: %r)",
249 replacement_file_path, file_path, diag)
250 continue
251
252 start_offset = replacement['Offset']
253 end_offset = start_offset + replacement['Length']
254 replacements.append(
255 TidyReplacement(
256 new_text=replacement['ReplacementText'],
257 start_line=line_offsets.get_line_number(start_offset),
258 end_line=line_offsets.get_line_number(end_offset),
259 start_char=line_offsets.get_line_offset(start_offset),
260 end_char=line_offsets.get_line_offset(end_offset),
261 ))
262
263 expansion_locs = []
264 for note in diag.get('Notes', ()):
265 if not note['Message'].startswith('expanded from macro '):
266 continue
267
268 absolute_note_path = makeabs(note['FilePath'])
269 note_offsets = get_line_offsets(absolute_note_path)
270 expansion_locs.append(
271 TidyExpandedFrom(
272 file_path=absolute_note_path,
273 line_number=note_offsets.get_line_number(note['FileOffset']),
274 ))
275
276 yield TidyDiagnostic(
277 diag_name=diag['DiagnosticName'],
278 message=message['Message'],
279 file_path=absolute_file_path,
280 line_number=line_offsets.get_line_number(message['FileOffset']),
281 replacements=tuple(replacements),
282 expansion_locs=tuple(expansion_locs),
283 )
284 except KeyError as k:
285 key_name = k.args[0]
286 raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}')
287
288
289# Represents metadata about a clang-tidy invocation.
290class InvocationMetadata(NamedTuple):
291 """Metadata describing a singular invocation of clang-tidy."""
292 exit_code: int
293 invocation: List[str]
294 lint_target: str
295 stdstreams: str
296 wd: str
297
298
299class ExceptionData:
300 """Info about an exception that can be sent across processes."""
301
302 def __init__(self):
303 """Builds an instance; only intended to be called from `except` blocks."""
304 self._str = traceback.format_exc()
305
306 def __str__(self):
307 return self._str
308
309
310def parse_tidy_invocation(
311 json_file: Path,
312) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]:
313 """Parses a clang-tidy invocation result based on a JSON file.
314
315 This is intended to be run in a separate process, which Exceptions and
316 locking and such work notoriously poorly over, so it's never intended to
317 |raise| (except under a KeyboardInterrupt or similar).
318
319 Args:
320 json_file: The JSON invocation metadata file to parse.
321
322 Returns:
323 An |ExceptionData| instance on failure. On success, it returns a
324 (InvocationMetadata, [TidyLint]).
325 """
326 try:
327 assert json_file.suffix == '.json', json_file
328
329 with json_file.open(encoding='utf-8') as f:
330 raw_meta = json.load(f)
331
332 meta = InvocationMetadata(
333 exit_code=raw_meta['exit_code'],
334 invocation=[raw_meta['executable']] + raw_meta['args'],
335 lint_target=raw_meta['lint_target'],
336 stdstreams=raw_meta['stdstreams'],
337 wd=raw_meta['wd'],
338 )
339
340 raw_crash_output = raw_meta.get('crash_output')
341 if raw_crash_output:
342 crash_reproducer_path = raw_crash_output['crash_reproducer_path']
343 output = raw_crash_output['stdstreams']
344 raise RuntimeError(f"""\
345Clang-tidy apparently crashed; dumping lots of invocation info:
346## Tidy JSON file target: {json_file}
347## Invocation: {meta.invocation}
348## Target: {meta.lint_target}
349## Crash reproducer is at: {crash_reproducer_path}
350## Output producing reproducer:
351{output}
352## Output from the crashing invocation:
353{meta.stdstreams}
354""")
355
356 yaml_file = json_file.with_suffix('.yaml')
357 # If this happened, clang-tidy was probably killed. Dump output as part of
358 # the exception so it's easier to reason about what happened.
359 if not yaml_file.exists():
360 raise RuntimeError("clang-tidy didn't produce an output file for "
361 f'{json_file}. Output:\n{meta.stdstreams}')
362
363 with yaml_file.open('rb') as f:
George Engelbrecht86f37062021-05-18 13:28:32 -0600364 yaml_data = yaml.safe_load(f)
George Burgess IV853d65b2020-02-25 13:13:15 -0800365 return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data))
366 except Exception:
367 return ExceptionData()
368
369
370def generate_lints(board: str, ebuild_path: str) -> Path:
371 """Collects the lints for a given package on a given board.
372
373 Args:
374 board: the board to collect lints for.
375 ebuild_path: the path to the ebuild to collect lints for.
376
377 Returns:
378 The path to a tmpdir that all of the lint YAML files (if any) will be in.
379 This will also be populated by JSON files containing InvocationMetadata.
380 The generation of this is handled by our compiler wrapper.
381 """
382 logging.info('Running lints for %r on board %r', ebuild_path, board)
383
384 osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True)
385 osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True)
386
387 # FIXME(gbiv): |test| might be better here?
388 result = cros_build_lib.run(
389 [f'ebuild-{board}', ebuild_path, 'clean', 'compile'],
390 check=False,
391 print_cmd=True,
392 extra_env={'WITH_TIDY': 'tricium'},
393 capture_output=True,
394 encoding='utf-8',
395 errors='replace',
396 )
397
398 if result.returncode:
399 status = f'failed with code {result.returncode}; output:\n{result.stdout}'
400 log_fn = logging.warning
401 else:
402 status = 'succeeded'
403 log_fn = logging.info
404
405 log_fn('Running |ebuild| on %s %s', ebuild_path, status)
406 lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy')
407 osutils.CopyDirContents(LINT_BASE, lint_tmpdir)
408 return Path(lint_tmpdir)
409
410
411def collect_lints(lint_tmpdir: Path,
412 yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]:
413 """Collects the lints for a given directory filled with linting artifacts."""
414 json_files = list(lint_tmpdir.glob('*.json'))
415 pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files)
416
417 parses_failed = 0
418 all_complaints = set()
419 for path, parse in zip(json_files, pending_parses):
420 if isinstance(parse, ExceptionData):
421 parses_failed += 1
422 logging.error('Parsing %r failed with an exception\n%s', path, parse)
423 continue
424
425 meta, complaints = parse
426 if meta.exit_code:
427 logging.warning(
428 'Invoking clang-tidy on %r with flags %r exited with code %d; '
429 'output:\n%s',
430 meta.lint_target,
431 meta.invocation,
432 meta.exit_code,
433 meta.stdstreams,
434 )
435
436 all_complaints.update(complaints)
437
438 if parses_failed:
439 raise ClangTidyParseError(parses_failed, len(json_files))
440
441 return all_complaints
442
443
444def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]):
445 """Sets up to run clang-tidy on the given ebuilds for the given board."""
446 packages = [x.package for x in ebuild_list]
447 logging.info('Setting up to lint %r', packages)
448
Mike Frysinger06a51c82021-04-06 11:39:17 -0400449 workon = workon_helper.WorkonHelper(
450 build_target_lib.get_default_sysroot_path(board))
George Burgess IV853d65b2020-02-25 13:13:15 -0800451 workon.StopWorkingOnPackages(packages=[], use_all=True)
452 workon.StartWorkingOnPackages(packages)
453
454 # We're going to be hacking with |ebuild| later on, so having all
455 # dependencies in place is necessary so one |ebuild| won't stomp on another.
456 cmd = [
457 f'emerge-{board}',
458 '--onlydeps',
459 # Since each `emerge` may eat up to `ncpu` cores, limit the maximum
460 # concurrency we can get here to (arbitrarily) 8 jobs. Having
461 # `configure`s and such run in parallel is nice.
462 f'-j{min(8, multiprocessing.cpu_count())}',
463 ]
464 cmd += packages
465 result = cros_build_lib.run(cmd, print_cmd=True, check=False)
466 if result.returncode:
467 logging.error('Setup failed with exit code %d; some lints may fail.',
468 result.returncode)
469
470
471def run_tidy(board: str, ebuild_list: List[portage_util.EBuild],
472 keep_dirs: bool,
473 parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]:
474 """Runs clang-tidy on the given ebuilds for the given board.
475
476 Returns the set of |TidyDiagnostic|s produced by doing so.
477 """
478 # Since we rely on build actions _actually_ running, we can't live with a
479 # cache.
480 osutils.RmDir(
Mike Frysinger06a51c82021-04-06 11:39:17 -0400481 Path(build_target_lib.get_default_sysroot_path(
482 board)) / 'var' / 'cache' / 'portage',
George Burgess IV853d65b2020-02-25 13:13:15 -0800483 ignore_missing=True,
484 sudo=True,
485 )
486
487 results = set()
488 # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while
George Engelbrecht86f37062021-05-18 13:28:32 -0600489 # holding the GIL to |yaml.safe_load| on my otherwise-idle dev box.
490 # |yaml_pool| lets us do this in parallel.
George Burgess IV853d65b2020-02-25 13:13:15 -0800491 with multiprocessing.pool.Pool() as yaml_pool:
492 for ebuild in ebuild_list:
493 lint_tmpdir = generate_lints(board, ebuild.ebuild_path)
494 try:
495 results |= collect_lints(lint_tmpdir, yaml_pool)
496 except ClangTidyParseError:
497 if not parse_errors_are_nonfatal:
498 raise
499 logging.exception('Working on %r', ebuild)
500 finally:
501 if keep_dirs:
502 logging.info('Lints for %r are in %r', ebuild.ebuild_path,
503 lint_tmpdir)
504 else:
505 osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True)
506 return results
507
508
509def resolve_package_ebuilds(board: str,
510 package_names: Iterable[str]) -> List[str]:
511 """Figures out ebuild paths for the given package names."""
512
513 def resolve_package(package_name_or_ebuild):
514 """Resolves a single package name an ebuild path."""
515 if package_name_or_ebuild.endswith('.ebuild'):
516 return package_name_or_ebuild
517 return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild],
518 check=True,
519 stdout=subprocess.PIPE,
520 encoding='utf-8').stdout.strip()
521
522 # Resolving ebuilds takes time. If we get more than one (like when I'm tesing
523 # on 50 of them), parallelism speeds things up quite a bit.
524 with multiprocessing.pool.ThreadPool() as pool:
525 return pool.map(resolve_package, package_names)
526
527
528def filter_tidy_lints(only_files: Optional[Set[Path]],
529 git_repo_base: Optional[Path],
530 diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]:
531 """Transforms and filters the given TidyDiagnostics.
532
533 Args:
534 only_files: a set of file paths, or None; if this is not None, only
535 |TidyDiagnostic|s in these files will be kept.
536 git_repo_base: if not None, only files in the given directory will be kept.
537 All paths of the returned diagnostics will be made relative to
538 |git_repo_base|.
539 diags: diagnostics to transform/filter.
540
541 Returns:
542 A sorted list of |TidyDiagnostic|s.
543 """
544 result_diags = []
545 total_diags = 0
546
547 for diag in diags:
548 total_diags += 1
549
550 if not diag.file_path:
551 # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're
552 # redefining |FOO| with a different value") in 'virtual' files; these
553 # receive no name in clang.
554 logging.info('Dropping diagnostic %r, since it has no associated file',
555 diag)
556 continue
557
558 file_path = Path(diag.file_path)
559 if only_files and file_path not in only_files:
560 continue
561
562 if git_repo_base:
563 if git_repo_base not in file_path.parents:
564 continue
565 diag = diag.normalize_paths_to(git_repo_base)
566
567 result_diags.append(diag)
568
569 logging.info('Dropped %d/%d diags', total_diags - len(result_diags),
570 total_diags)
571
572 result_diags.sort()
573 return result_diags
574
575
576def get_parser() -> commandline.ArgumentParser:
577 """Creates an argument parser for this script."""
578 parser = commandline.ArgumentParser(description=__doc__)
579 parser.add_argument(
580 '--output', required=True, type='path', help='File to write results to.')
581 parser.add_argument(
582 '--git-repo-base',
583 type='path',
584 help="Base directory of the git repo we're looking at. If specified, "
585 'only diagnostics in files in this directory will be emitted. All '
586 'diagnostic file paths will be made relative to this directory.')
587 parser.add_argument('--board', required=True, help='Board to run under.')
588 parser.add_argument(
589 '--package',
590 action='append',
591 required=True,
592 help='Package(s) to build and lint. Required.')
593 parser.add_argument(
594 '--keep-lint-dirs',
595 action='store_true',
596 help='Keep directories with tidy lints around; meant primarily for '
597 'debugging.')
598 parser.add_argument(
599 '--nonfatal-parse-errors',
600 action='store_true',
601 help="Keep going even if clang-tidy's output is impossible to parse.")
602 parser.add_argument(
603 'file',
604 nargs='*',
605 type='path',
606 help='File(s) to output lints for. If none are specified, this tool '
607 'outputs all lints that clang-tidy emits after applying filtering '
608 'from |--git-repo-base|, if applicable.')
609 return parser
610
611
612def main(argv: List[str]) -> None:
613 cros_build_lib.AssertInsideChroot()
614 parser = get_parser()
615 opts = parser.parse_args(argv)
616 opts.Freeze()
617
618 only_files = {Path(f).resolve() for f in opts.file}
619
620 git_repo_base = opts.git_repo_base
621 if git_repo_base:
622 git_repo_base = Path(opts.git_repo_base)
623 if not (git_repo_base / '.git').exists():
624 # This script doesn't strictly care if there's a .git dir there; more of
625 # a smoke check.
626 parser.error(f'Given git repo base ({git_repo_base}) has no .git dir')
627
628 package_ebuilds = [
629 portage_util.EBuild(x)
630 for x in resolve_package_ebuilds(opts.board, opts.package)
631 ]
632
633 setup_tidy(opts.board, package_ebuilds)
634 lints = filter_tidy_lints(
635 only_files,
636 git_repo_base,
637 diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs,
638 opts.nonfatal_parse_errors))
639
640 osutils.WriteFile(
641 opts.output,
642 json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}),
643 atomic=True)