blob: b6613696390a84bb5f2c4632cbc3859435705f0e [file] [log] [blame]
George Burgess IV853d65b2020-02-25 13:13:15 -08001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs clang-tidy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10# From an implementation perspective, it's good to note that this script
11# cooperates with the toolchain's compiler wrapper. In particular,
12# ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go.
13#
14# When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX)
15# is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler.
16# This clang-tidy invocation will result in a few files being dumped to
17# |LINT_BASE| (below):
18# - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents
19# clang-tidy's diagnostics for the file the compiler was asked to build
20# - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file
21# was generated, including clang-tidy's exit code, stdout, etc. See
22# |InvocationMetadata| below.
23#
24# As one might expect, the compiler wrapper writes the JSON file only after
25# clang-tidy is done executing.
26#
27# This directory might contain other files, as well; these are ignored by this
28# script.
29
30import bisect
31import json
32import multiprocessing
33import os
34from pathlib import Path
35import re
36import subprocess
37import sys
38import tempfile
39import traceback
40from typing import (Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple,
41 Union)
42
43import yaml # pylint: disable=import-error
Mike Frysinger06a51c82021-04-06 11:39:17 -040044
45from chromite.lib import build_target_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080046from chromite.lib import commandline
47from chromite.lib import cros_build_lib
48from chromite.lib import cros_logging as logging
49from chromite.lib import osutils
50from chromite.lib import portage_util
51from chromite.lib import workon_helper
52
George Burgess IV853d65b2020-02-25 13:13:15 -080053
54# The directory under which the compiler wrapper stores clang-tidy reports.
55LINT_BASE = Path('/tmp/linting_output/clang-tidy')
56
57
58class TidyReplacement(NamedTuple):
59 """Represents a replacement emitted by clang-tidy.
60
61 File path is omitted, since these are intended to be associated with
62 TidyDiagnostics with identical paths.
63 """
64 new_text: str
65 start_line: int
66 end_line: int
67 start_char: int
68 end_char: int
69
70
71class TidyExpandedFrom(NamedTuple):
72 """Represents a macro expansion.
73
74 When a diagnostic is inside of a macro expansion, clang-tidy emits
75 information about where said macro was expanded from. |TidyDiagnostic|s will
76 have one |TidyExpandedFrom| for each level of this expansion.
77 """
78 file_path: Path
79 line_number: int
80
81 def to_dict(self) -> Dict[str, Any]:
82 """Converts this |TidyExpandedFrom| to a dict serializeable as JSON."""
83 return {
84 'file_path': self.file_path.as_posix(),
85 'line_number': self.line_number,
86 }
87
88
89class Error(Exception):
90 """Base error class for tricium-clang-tidy."""
91
92
93class ClangTidyParseError(Error):
94 """Raised when clang-tidy parsing jobs fail."""
95
96 def __init__(self, failed_jobs: int, total_jobs: int):
97 super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed')
98 self.failed_jobs = failed_jobs
99 self.total_jobs = total_jobs
100
101
102class TidyDiagnostic(NamedTuple):
103 """A diagnostic emitted by clang-tidy.
104
105 Note that we shove these in a set for cheap deduplication, and we sort based
106 on the natural element order here. Sorting is mostly just for
107 deterministic/pretty output.
108 """
109 file_path: Path
110 line_number: int
111 diag_name: str
112 message: str
113 replacements: Tuple[TidyReplacement]
114 expansion_locs: Tuple[TidyExpandedFrom]
115
116 def normalize_paths_to(self, where: str) -> 'TidyDiagnostic':
117 """Creates a new TidyDiagnostic with all paths relative to |where|."""
118 return self._replace(
119 # Use relpath because Path.relative_to requires that `self` is rooted
120 # at `where`.
121 file_path=Path(os.path.relpath(self.file_path, where)),
122 expansion_locs=tuple(
123 x._replace(file_path=Path(os.path.relpath(x.file_path, where)))
124 for x in self.expansion_locs))
125
126 def to_dict(self) -> Dict[str, Any]:
127 """Converts this |TidyDiagnostic| to a dict serializeable as JSON."""
128 return {
129 'file_path': self.file_path.as_posix(),
130 'line_number': self.line_number,
131 'diag_name': self.diag_name,
132 'message': self.message,
133 'replacements': [x._asdict() for x in self.replacements],
134 'expansion_locs': [x.to_dict() for x in self.expansion_locs],
135 }
136
137
138class ClangTidySchemaError(Error):
139 """Raised when we encounter malformed YAML."""
140
141 def __init__(self, err_msg: str):
142 super().__init__(err_msg)
143 self.err_msg = err_msg
144
145
146class LineOffsetMap:
147 """Convenient API to turn offsets in a file into line numbers."""
148
149 def __init__(self, newline_locations: Iterable[int]):
150 line_starts = [x + 1 for x in newline_locations]
151 # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit
152 # complicated around the first and last lines of a file. Adding boundaries
153 # here removes some complexity from those implementations.
154 line_starts.append(0)
155 line_starts.append(sys.maxsize)
156 line_starts.sort()
157
158 assert line_starts[0] == 0, line_starts[0]
159 assert line_starts[1] != 0, line_starts[1]
160 assert line_starts[-2] < sys.maxsize, line_starts[-2]
161 assert line_starts[-1] == sys.maxsize, line_starts[-1]
162
163 self._line_starts = line_starts
164
165 def get_line_number(self, char_number: int) -> int:
166 """Given a char offset into a file, returns its line number."""
167 assert 0 <= char_number < sys.maxsize, char_number
168 return bisect.bisect_right(self._line_starts, char_number)
169
170 def get_line_offset(self, char_number: int) -> int:
171 """Given a char offset into a file, returns its column number."""
172 assert 0 <= char_number < sys.maxsize, char_number
173 line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1
174 return char_number - self._line_starts[line_start_index]
175
176 @staticmethod
177 def for_text(data: str) -> 'LineOffsetMap':
178 """Creates a LineOffsetMap for the given string."""
179 return LineOffsetMap(m.start() for m in re.finditer(r'\n', data))
180
181
182def parse_tidy_fixes_file(tidy_invocation_dir: Path,
183 yaml_data: Any) -> Iterable[TidyDiagnostic]:
184 """Parses a clang-tidy YAML file.
185
186 Args:
187 yaml_data: The parsed YAML data from clang-tidy's fixits file.
188 tidy_invocation_dir: The directory clang-tidy was run in.
189
190 Returns:
191 A generator of |TidyDiagnostic|s.
192 """
193 assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir
194
195 if yaml_data is None:
196 return
197
198 # A cache of file_path => LineOffsetMap so we only need to load offsets once
199 # per file per |parse_tidy_fixes_file| invocation.
200 cached_line_offsets = {}
201
202 def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap:
203 """Gets a LineOffsetMap for the given |file_path|."""
204 assert not file_path or file_path.is_absolute(), file_path
205
206 if file_path in cached_line_offsets:
207 return cached_line_offsets[file_path]
208
209 # Sometimes tidy will give us empty file names; they don't map to any file,
210 # and are generally issues it has with CFLAGS, etc. File offsets don't
211 # matter in those, so use an empty map.
212 if file_path:
213 offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8'))
214 else:
215 offsets = LineOffsetMap(())
216 cached_line_offsets[file_path] = offsets
217 return offsets
218
219 # Rarely (e.g., in the case of missing |#include|s, clang will emit relative
220 # file paths for diagnostics. This fixes those.
221 def makeabs(file_path: str) -> Path:
222 """Resolves a |file_path| emitted by clang-tidy to an absolute path."""
223 if not file_path:
224 return None
225 path = Path(file_path)
226 if not path.is_absolute():
227 path = tidy_invocation_dir / path
228 return path.resolve()
229
230 try:
231 for diag in yaml_data['Diagnostics']:
232 message = diag['DiagnosticMessage']
233 file_path = message['FilePath']
234
235 absolute_file_path = makeabs(file_path)
236 line_offsets = get_line_offsets(absolute_file_path)
237
238 replacements = []
239 for replacement in message.get('Replacements', ()):
240 replacement_file_path = makeabs(replacement['FilePath'])
241
242 # FIXME(gbiv): This happens in practice with things like
243 # hicpp-member-init. Supporting it should be simple, but I'd like to
244 # get the basics running first.
245 if replacement_file_path != absolute_file_path:
246 logging.warning(
247 "Replacement %r wasn't in original file %r (diag: %r)",
248 replacement_file_path, file_path, diag)
249 continue
250
251 start_offset = replacement['Offset']
252 end_offset = start_offset + replacement['Length']
253 replacements.append(
254 TidyReplacement(
255 new_text=replacement['ReplacementText'],
256 start_line=line_offsets.get_line_number(start_offset),
257 end_line=line_offsets.get_line_number(end_offset),
258 start_char=line_offsets.get_line_offset(start_offset),
259 end_char=line_offsets.get_line_offset(end_offset),
260 ))
261
262 expansion_locs = []
263 for note in diag.get('Notes', ()):
264 if not note['Message'].startswith('expanded from macro '):
265 continue
266
267 absolute_note_path = makeabs(note['FilePath'])
268 note_offsets = get_line_offsets(absolute_note_path)
269 expansion_locs.append(
270 TidyExpandedFrom(
271 file_path=absolute_note_path,
272 line_number=note_offsets.get_line_number(note['FileOffset']),
273 ))
274
275 yield TidyDiagnostic(
276 diag_name=diag['DiagnosticName'],
277 message=message['Message'],
278 file_path=absolute_file_path,
279 line_number=line_offsets.get_line_number(message['FileOffset']),
280 replacements=tuple(replacements),
281 expansion_locs=tuple(expansion_locs),
282 )
283 except KeyError as k:
284 key_name = k.args[0]
285 raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}')
286
287
288# Represents metadata about a clang-tidy invocation.
289class InvocationMetadata(NamedTuple):
290 """Metadata describing a singular invocation of clang-tidy."""
291 exit_code: int
292 invocation: List[str]
293 lint_target: str
294 stdstreams: str
295 wd: str
296
297
298class ExceptionData:
299 """Info about an exception that can be sent across processes."""
300
301 def __init__(self):
302 """Builds an instance; only intended to be called from `except` blocks."""
303 self._str = traceback.format_exc()
304
305 def __str__(self):
306 return self._str
307
308
309def parse_tidy_invocation(
310 json_file: Path,
311) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]:
312 """Parses a clang-tidy invocation result based on a JSON file.
313
314 This is intended to be run in a separate process, which Exceptions and
315 locking and such work notoriously poorly over, so it's never intended to
316 |raise| (except under a KeyboardInterrupt or similar).
317
318 Args:
319 json_file: The JSON invocation metadata file to parse.
320
321 Returns:
322 An |ExceptionData| instance on failure. On success, it returns a
323 (InvocationMetadata, [TidyLint]).
324 """
325 try:
326 assert json_file.suffix == '.json', json_file
327
328 with json_file.open(encoding='utf-8') as f:
329 raw_meta = json.load(f)
330
331 meta = InvocationMetadata(
332 exit_code=raw_meta['exit_code'],
333 invocation=[raw_meta['executable']] + raw_meta['args'],
334 lint_target=raw_meta['lint_target'],
335 stdstreams=raw_meta['stdstreams'],
336 wd=raw_meta['wd'],
337 )
338
339 raw_crash_output = raw_meta.get('crash_output')
340 if raw_crash_output:
341 crash_reproducer_path = raw_crash_output['crash_reproducer_path']
342 output = raw_crash_output['stdstreams']
343 raise RuntimeError(f"""\
344Clang-tidy apparently crashed; dumping lots of invocation info:
345## Tidy JSON file target: {json_file}
346## Invocation: {meta.invocation}
347## Target: {meta.lint_target}
348## Crash reproducer is at: {crash_reproducer_path}
349## Output producing reproducer:
350{output}
351## Output from the crashing invocation:
352{meta.stdstreams}
353""")
354
355 yaml_file = json_file.with_suffix('.yaml')
356 # If this happened, clang-tidy was probably killed. Dump output as part of
357 # the exception so it's easier to reason about what happened.
358 if not yaml_file.exists():
359 raise RuntimeError("clang-tidy didn't produce an output file for "
360 f'{json_file}. Output:\n{meta.stdstreams}')
361
362 with yaml_file.open('rb') as f:
Chih-Yu Huangf6b6ec12021-05-19 02:38:24 +0000363 yaml_data = yaml.load(f)
George Burgess IV853d65b2020-02-25 13:13:15 -0800364 return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data))
365 except Exception:
366 return ExceptionData()
367
368
369def generate_lints(board: str, ebuild_path: str) -> Path:
370 """Collects the lints for a given package on a given board.
371
372 Args:
373 board: the board to collect lints for.
374 ebuild_path: the path to the ebuild to collect lints for.
375
376 Returns:
377 The path to a tmpdir that all of the lint YAML files (if any) will be in.
378 This will also be populated by JSON files containing InvocationMetadata.
379 The generation of this is handled by our compiler wrapper.
380 """
381 logging.info('Running lints for %r on board %r', ebuild_path, board)
382
383 osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True)
384 osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True)
385
386 # FIXME(gbiv): |test| might be better here?
387 result = cros_build_lib.run(
388 [f'ebuild-{board}', ebuild_path, 'clean', 'compile'],
389 check=False,
390 print_cmd=True,
391 extra_env={'WITH_TIDY': 'tricium'},
392 capture_output=True,
393 encoding='utf-8',
394 errors='replace',
395 )
396
397 if result.returncode:
398 status = f'failed with code {result.returncode}; output:\n{result.stdout}'
399 log_fn = logging.warning
400 else:
401 status = 'succeeded'
402 log_fn = logging.info
403
404 log_fn('Running |ebuild| on %s %s', ebuild_path, status)
405 lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy')
406 osutils.CopyDirContents(LINT_BASE, lint_tmpdir)
407 return Path(lint_tmpdir)
408
409
410def collect_lints(lint_tmpdir: Path,
411 yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]:
412 """Collects the lints for a given directory filled with linting artifacts."""
413 json_files = list(lint_tmpdir.glob('*.json'))
414 pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files)
415
416 parses_failed = 0
417 all_complaints = set()
418 for path, parse in zip(json_files, pending_parses):
419 if isinstance(parse, ExceptionData):
420 parses_failed += 1
421 logging.error('Parsing %r failed with an exception\n%s', path, parse)
422 continue
423
424 meta, complaints = parse
425 if meta.exit_code:
426 logging.warning(
427 'Invoking clang-tidy on %r with flags %r exited with code %d; '
428 'output:\n%s',
429 meta.lint_target,
430 meta.invocation,
431 meta.exit_code,
432 meta.stdstreams,
433 )
434
435 all_complaints.update(complaints)
436
437 if parses_failed:
438 raise ClangTidyParseError(parses_failed, len(json_files))
439
440 return all_complaints
441
442
443def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]):
444 """Sets up to run clang-tidy on the given ebuilds for the given board."""
445 packages = [x.package for x in ebuild_list]
446 logging.info('Setting up to lint %r', packages)
447
Mike Frysinger06a51c82021-04-06 11:39:17 -0400448 workon = workon_helper.WorkonHelper(
449 build_target_lib.get_default_sysroot_path(board))
George Burgess IV853d65b2020-02-25 13:13:15 -0800450 workon.StopWorkingOnPackages(packages=[], use_all=True)
451 workon.StartWorkingOnPackages(packages)
452
453 # We're going to be hacking with |ebuild| later on, so having all
454 # dependencies in place is necessary so one |ebuild| won't stomp on another.
455 cmd = [
456 f'emerge-{board}',
457 '--onlydeps',
458 # Since each `emerge` may eat up to `ncpu` cores, limit the maximum
459 # concurrency we can get here to (arbitrarily) 8 jobs. Having
460 # `configure`s and such run in parallel is nice.
461 f'-j{min(8, multiprocessing.cpu_count())}',
462 ]
463 cmd += packages
464 result = cros_build_lib.run(cmd, print_cmd=True, check=False)
465 if result.returncode:
466 logging.error('Setup failed with exit code %d; some lints may fail.',
467 result.returncode)
468
469
470def run_tidy(board: str, ebuild_list: List[portage_util.EBuild],
471 keep_dirs: bool,
472 parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]:
473 """Runs clang-tidy on the given ebuilds for the given board.
474
475 Returns the set of |TidyDiagnostic|s produced by doing so.
476 """
477 # Since we rely on build actions _actually_ running, we can't live with a
478 # cache.
479 osutils.RmDir(
Mike Frysinger06a51c82021-04-06 11:39:17 -0400480 Path(build_target_lib.get_default_sysroot_path(
481 board)) / 'var' / 'cache' / 'portage',
George Burgess IV853d65b2020-02-25 13:13:15 -0800482 ignore_missing=True,
483 sudo=True,
484 )
485
486 results = set()
487 # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while
Chih-Yu Huangf6b6ec12021-05-19 02:38:24 +0000488 # holding the GIL to |yaml.load| on my otherwise-idle dev box. |yaml_pool|
489 # lets us do this in parallel.
George Burgess IV853d65b2020-02-25 13:13:15 -0800490 with multiprocessing.pool.Pool() as yaml_pool:
491 for ebuild in ebuild_list:
492 lint_tmpdir = generate_lints(board, ebuild.ebuild_path)
493 try:
494 results |= collect_lints(lint_tmpdir, yaml_pool)
495 except ClangTidyParseError:
496 if not parse_errors_are_nonfatal:
497 raise
498 logging.exception('Working on %r', ebuild)
499 finally:
500 if keep_dirs:
501 logging.info('Lints for %r are in %r', ebuild.ebuild_path,
502 lint_tmpdir)
503 else:
504 osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True)
505 return results
506
507
508def resolve_package_ebuilds(board: str,
509 package_names: Iterable[str]) -> List[str]:
510 """Figures out ebuild paths for the given package names."""
511
512 def resolve_package(package_name_or_ebuild):
513 """Resolves a single package name an ebuild path."""
514 if package_name_or_ebuild.endswith('.ebuild'):
515 return package_name_or_ebuild
516 return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild],
517 check=True,
518 stdout=subprocess.PIPE,
519 encoding='utf-8').stdout.strip()
520
521 # Resolving ebuilds takes time. If we get more than one (like when I'm tesing
522 # on 50 of them), parallelism speeds things up quite a bit.
523 with multiprocessing.pool.ThreadPool() as pool:
524 return pool.map(resolve_package, package_names)
525
526
527def filter_tidy_lints(only_files: Optional[Set[Path]],
528 git_repo_base: Optional[Path],
529 diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]:
530 """Transforms and filters the given TidyDiagnostics.
531
532 Args:
533 only_files: a set of file paths, or None; if this is not None, only
534 |TidyDiagnostic|s in these files will be kept.
535 git_repo_base: if not None, only files in the given directory will be kept.
536 All paths of the returned diagnostics will be made relative to
537 |git_repo_base|.
538 diags: diagnostics to transform/filter.
539
540 Returns:
541 A sorted list of |TidyDiagnostic|s.
542 """
543 result_diags = []
544 total_diags = 0
545
546 for diag in diags:
547 total_diags += 1
548
549 if not diag.file_path:
550 # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're
551 # redefining |FOO| with a different value") in 'virtual' files; these
552 # receive no name in clang.
553 logging.info('Dropping diagnostic %r, since it has no associated file',
554 diag)
555 continue
556
557 file_path = Path(diag.file_path)
558 if only_files and file_path not in only_files:
559 continue
560
561 if git_repo_base:
562 if git_repo_base not in file_path.parents:
563 continue
564 diag = diag.normalize_paths_to(git_repo_base)
565
566 result_diags.append(diag)
567
568 logging.info('Dropped %d/%d diags', total_diags - len(result_diags),
569 total_diags)
570
571 result_diags.sort()
572 return result_diags
573
574
575def get_parser() -> commandline.ArgumentParser:
576 """Creates an argument parser for this script."""
577 parser = commandline.ArgumentParser(description=__doc__)
578 parser.add_argument(
579 '--output', required=True, type='path', help='File to write results to.')
580 parser.add_argument(
581 '--git-repo-base',
582 type='path',
583 help="Base directory of the git repo we're looking at. If specified, "
584 'only diagnostics in files in this directory will be emitted. All '
585 'diagnostic file paths will be made relative to this directory.')
586 parser.add_argument('--board', required=True, help='Board to run under.')
587 parser.add_argument(
588 '--package',
589 action='append',
590 required=True,
591 help='Package(s) to build and lint. Required.')
592 parser.add_argument(
593 '--keep-lint-dirs',
594 action='store_true',
595 help='Keep directories with tidy lints around; meant primarily for '
596 'debugging.')
597 parser.add_argument(
598 '--nonfatal-parse-errors',
599 action='store_true',
600 help="Keep going even if clang-tidy's output is impossible to parse.")
601 parser.add_argument(
602 'file',
603 nargs='*',
604 type='path',
605 help='File(s) to output lints for. If none are specified, this tool '
606 'outputs all lints that clang-tidy emits after applying filtering '
607 'from |--git-repo-base|, if applicable.')
608 return parser
609
610
611def main(argv: List[str]) -> None:
612 cros_build_lib.AssertInsideChroot()
613 parser = get_parser()
614 opts = parser.parse_args(argv)
615 opts.Freeze()
616
617 only_files = {Path(f).resolve() for f in opts.file}
618
619 git_repo_base = opts.git_repo_base
620 if git_repo_base:
621 git_repo_base = Path(opts.git_repo_base)
622 if not (git_repo_base / '.git').exists():
623 # This script doesn't strictly care if there's a .git dir there; more of
624 # a smoke check.
625 parser.error(f'Given git repo base ({git_repo_base}) has no .git dir')
626
627 package_ebuilds = [
628 portage_util.EBuild(x)
629 for x in resolve_package_ebuilds(opts.board, opts.package)
630 ]
631
632 setup_tidy(opts.board, package_ebuilds)
633 lints = filter_tidy_lints(
634 only_files,
635 git_repo_base,
636 diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs,
637 opts.nonfatal_parse_errors))
638
639 osutils.WriteFile(
640 opts.output,
641 json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}),
642 atomic=True)