blob: dc8cc548164a4f19f25b2abe2be11b6c833f57a5 [file] [log] [blame]
George Burgess IV853d65b2020-02-25 13:13:15 -08001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs clang-tidy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10# From an implementation perspective, it's good to note that this script
11# cooperates with the toolchain's compiler wrapper. In particular,
12# ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go.
13#
14# When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX)
15# is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler.
16# This clang-tidy invocation will result in a few files being dumped to
17# |LINT_BASE| (below):
18# - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents
19# clang-tidy's diagnostics for the file the compiler was asked to build
20# - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file
21# was generated, including clang-tidy's exit code, stdout, etc. See
22# |InvocationMetadata| below.
23#
24# As one might expect, the compiler wrapper writes the JSON file only after
25# clang-tidy is done executing.
26#
27# This directory might contain other files, as well; these are ignored by this
28# script.
29
30import bisect
31import json
32import multiprocessing
33import os
34from pathlib import Path
35import re
36import subprocess
37import sys
38import tempfile
39import traceback
40from typing import (Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple,
41 Union)
42
43import yaml # pylint: disable=import-error
44from chromite.lib import commandline
45from chromite.lib import cros_build_lib
46from chromite.lib import cros_logging as logging
47from chromite.lib import osutils
48from chromite.lib import portage_util
49from chromite.lib import workon_helper
50
51assert sys.version_info >= (3, 6), 'This module requires Python 3.6+'
52
53# The directory under which the compiler wrapper stores clang-tidy reports.
54LINT_BASE = Path('/tmp/linting_output/clang-tidy')
55
56
57class TidyReplacement(NamedTuple):
58 """Represents a replacement emitted by clang-tidy.
59
60 File path is omitted, since these are intended to be associated with
61 TidyDiagnostics with identical paths.
62 """
63 new_text: str
64 start_line: int
65 end_line: int
66 start_char: int
67 end_char: int
68
69
70class TidyExpandedFrom(NamedTuple):
71 """Represents a macro expansion.
72
73 When a diagnostic is inside of a macro expansion, clang-tidy emits
74 information about where said macro was expanded from. |TidyDiagnostic|s will
75 have one |TidyExpandedFrom| for each level of this expansion.
76 """
77 file_path: Path
78 line_number: int
79
80 def to_dict(self) -> Dict[str, Any]:
81 """Converts this |TidyExpandedFrom| to a dict serializeable as JSON."""
82 return {
83 'file_path': self.file_path.as_posix(),
84 'line_number': self.line_number,
85 }
86
87
88class Error(Exception):
89 """Base error class for tricium-clang-tidy."""
90
91
92class ClangTidyParseError(Error):
93 """Raised when clang-tidy parsing jobs fail."""
94
95 def __init__(self, failed_jobs: int, total_jobs: int):
96 super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed')
97 self.failed_jobs = failed_jobs
98 self.total_jobs = total_jobs
99
100
101class TidyDiagnostic(NamedTuple):
102 """A diagnostic emitted by clang-tidy.
103
104 Note that we shove these in a set for cheap deduplication, and we sort based
105 on the natural element order here. Sorting is mostly just for
106 deterministic/pretty output.
107 """
108 file_path: Path
109 line_number: int
110 diag_name: str
111 message: str
112 replacements: Tuple[TidyReplacement]
113 expansion_locs: Tuple[TidyExpandedFrom]
114
115 def normalize_paths_to(self, where: str) -> 'TidyDiagnostic':
116 """Creates a new TidyDiagnostic with all paths relative to |where|."""
117 return self._replace(
118 # Use relpath because Path.relative_to requires that `self` is rooted
119 # at `where`.
120 file_path=Path(os.path.relpath(self.file_path, where)),
121 expansion_locs=tuple(
122 x._replace(file_path=Path(os.path.relpath(x.file_path, where)))
123 for x in self.expansion_locs))
124
125 def to_dict(self) -> Dict[str, Any]:
126 """Converts this |TidyDiagnostic| to a dict serializeable as JSON."""
127 return {
128 'file_path': self.file_path.as_posix(),
129 'line_number': self.line_number,
130 'diag_name': self.diag_name,
131 'message': self.message,
132 'replacements': [x._asdict() for x in self.replacements],
133 'expansion_locs': [x.to_dict() for x in self.expansion_locs],
134 }
135
136
137class ClangTidySchemaError(Error):
138 """Raised when we encounter malformed YAML."""
139
140 def __init__(self, err_msg: str):
141 super().__init__(err_msg)
142 self.err_msg = err_msg
143
144
145class LineOffsetMap:
146 """Convenient API to turn offsets in a file into line numbers."""
147
148 def __init__(self, newline_locations: Iterable[int]):
149 line_starts = [x + 1 for x in newline_locations]
150 # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit
151 # complicated around the first and last lines of a file. Adding boundaries
152 # here removes some complexity from those implementations.
153 line_starts.append(0)
154 line_starts.append(sys.maxsize)
155 line_starts.sort()
156
157 assert line_starts[0] == 0, line_starts[0]
158 assert line_starts[1] != 0, line_starts[1]
159 assert line_starts[-2] < sys.maxsize, line_starts[-2]
160 assert line_starts[-1] == sys.maxsize, line_starts[-1]
161
162 self._line_starts = line_starts
163
164 def get_line_number(self, char_number: int) -> int:
165 """Given a char offset into a file, returns its line number."""
166 assert 0 <= char_number < sys.maxsize, char_number
167 return bisect.bisect_right(self._line_starts, char_number)
168
169 def get_line_offset(self, char_number: int) -> int:
170 """Given a char offset into a file, returns its column number."""
171 assert 0 <= char_number < sys.maxsize, char_number
172 line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1
173 return char_number - self._line_starts[line_start_index]
174
175 @staticmethod
176 def for_text(data: str) -> 'LineOffsetMap':
177 """Creates a LineOffsetMap for the given string."""
178 return LineOffsetMap(m.start() for m in re.finditer(r'\n', data))
179
180
181def parse_tidy_fixes_file(tidy_invocation_dir: Path,
182 yaml_data: Any) -> Iterable[TidyDiagnostic]:
183 """Parses a clang-tidy YAML file.
184
185 Args:
186 yaml_data: The parsed YAML data from clang-tidy's fixits file.
187 tidy_invocation_dir: The directory clang-tidy was run in.
188
189 Returns:
190 A generator of |TidyDiagnostic|s.
191 """
192 assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir
193
194 if yaml_data is None:
195 return
196
197 # A cache of file_path => LineOffsetMap so we only need to load offsets once
198 # per file per |parse_tidy_fixes_file| invocation.
199 cached_line_offsets = {}
200
201 def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap:
202 """Gets a LineOffsetMap for the given |file_path|."""
203 assert not file_path or file_path.is_absolute(), file_path
204
205 if file_path in cached_line_offsets:
206 return cached_line_offsets[file_path]
207
208 # Sometimes tidy will give us empty file names; they don't map to any file,
209 # and are generally issues it has with CFLAGS, etc. File offsets don't
210 # matter in those, so use an empty map.
211 if file_path:
212 offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8'))
213 else:
214 offsets = LineOffsetMap(())
215 cached_line_offsets[file_path] = offsets
216 return offsets
217
218 # Rarely (e.g., in the case of missing |#include|s, clang will emit relative
219 # file paths for diagnostics. This fixes those.
220 def makeabs(file_path: str) -> Path:
221 """Resolves a |file_path| emitted by clang-tidy to an absolute path."""
222 if not file_path:
223 return None
224 path = Path(file_path)
225 if not path.is_absolute():
226 path = tidy_invocation_dir / path
227 return path.resolve()
228
229 try:
230 for diag in yaml_data['Diagnostics']:
231 message = diag['DiagnosticMessage']
232 file_path = message['FilePath']
233
234 absolute_file_path = makeabs(file_path)
235 line_offsets = get_line_offsets(absolute_file_path)
236
237 replacements = []
238 for replacement in message.get('Replacements', ()):
239 replacement_file_path = makeabs(replacement['FilePath'])
240
241 # FIXME(gbiv): This happens in practice with things like
242 # hicpp-member-init. Supporting it should be simple, but I'd like to
243 # get the basics running first.
244 if replacement_file_path != absolute_file_path:
245 logging.warning(
246 "Replacement %r wasn't in original file %r (diag: %r)",
247 replacement_file_path, file_path, diag)
248 continue
249
250 start_offset = replacement['Offset']
251 end_offset = start_offset + replacement['Length']
252 replacements.append(
253 TidyReplacement(
254 new_text=replacement['ReplacementText'],
255 start_line=line_offsets.get_line_number(start_offset),
256 end_line=line_offsets.get_line_number(end_offset),
257 start_char=line_offsets.get_line_offset(start_offset),
258 end_char=line_offsets.get_line_offset(end_offset),
259 ))
260
261 expansion_locs = []
262 for note in diag.get('Notes', ()):
263 if not note['Message'].startswith('expanded from macro '):
264 continue
265
266 absolute_note_path = makeabs(note['FilePath'])
267 note_offsets = get_line_offsets(absolute_note_path)
268 expansion_locs.append(
269 TidyExpandedFrom(
270 file_path=absolute_note_path,
271 line_number=note_offsets.get_line_number(note['FileOffset']),
272 ))
273
274 yield TidyDiagnostic(
275 diag_name=diag['DiagnosticName'],
276 message=message['Message'],
277 file_path=absolute_file_path,
278 line_number=line_offsets.get_line_number(message['FileOffset']),
279 replacements=tuple(replacements),
280 expansion_locs=tuple(expansion_locs),
281 )
282 except KeyError as k:
283 key_name = k.args[0]
284 raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}')
285
286
287# Represents metadata about a clang-tidy invocation.
288class InvocationMetadata(NamedTuple):
289 """Metadata describing a singular invocation of clang-tidy."""
290 exit_code: int
291 invocation: List[str]
292 lint_target: str
293 stdstreams: str
294 wd: str
295
296
297class ExceptionData:
298 """Info about an exception that can be sent across processes."""
299
300 def __init__(self):
301 """Builds an instance; only intended to be called from `except` blocks."""
302 self._str = traceback.format_exc()
303
304 def __str__(self):
305 return self._str
306
307
308def parse_tidy_invocation(
309 json_file: Path,
310) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]:
311 """Parses a clang-tidy invocation result based on a JSON file.
312
313 This is intended to be run in a separate process, which Exceptions and
314 locking and such work notoriously poorly over, so it's never intended to
315 |raise| (except under a KeyboardInterrupt or similar).
316
317 Args:
318 json_file: The JSON invocation metadata file to parse.
319
320 Returns:
321 An |ExceptionData| instance on failure. On success, it returns a
322 (InvocationMetadata, [TidyLint]).
323 """
324 try:
325 assert json_file.suffix == '.json', json_file
326
327 with json_file.open(encoding='utf-8') as f:
328 raw_meta = json.load(f)
329
330 meta = InvocationMetadata(
331 exit_code=raw_meta['exit_code'],
332 invocation=[raw_meta['executable']] + raw_meta['args'],
333 lint_target=raw_meta['lint_target'],
334 stdstreams=raw_meta['stdstreams'],
335 wd=raw_meta['wd'],
336 )
337
338 raw_crash_output = raw_meta.get('crash_output')
339 if raw_crash_output:
340 crash_reproducer_path = raw_crash_output['crash_reproducer_path']
341 output = raw_crash_output['stdstreams']
342 raise RuntimeError(f"""\
343Clang-tidy apparently crashed; dumping lots of invocation info:
344## Tidy JSON file target: {json_file}
345## Invocation: {meta.invocation}
346## Target: {meta.lint_target}
347## Crash reproducer is at: {crash_reproducer_path}
348## Output producing reproducer:
349{output}
350## Output from the crashing invocation:
351{meta.stdstreams}
352""")
353
354 yaml_file = json_file.with_suffix('.yaml')
355 # If this happened, clang-tidy was probably killed. Dump output as part of
356 # the exception so it's easier to reason about what happened.
357 if not yaml_file.exists():
358 raise RuntimeError("clang-tidy didn't produce an output file for "
359 f'{json_file}. Output:\n{meta.stdstreams}')
360
361 with yaml_file.open('rb') as f:
362 yaml_data = yaml.load(f)
363 return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data))
364 except Exception:
365 return ExceptionData()
366
367
368def generate_lints(board: str, ebuild_path: str) -> Path:
369 """Collects the lints for a given package on a given board.
370
371 Args:
372 board: the board to collect lints for.
373 ebuild_path: the path to the ebuild to collect lints for.
374
375 Returns:
376 The path to a tmpdir that all of the lint YAML files (if any) will be in.
377 This will also be populated by JSON files containing InvocationMetadata.
378 The generation of this is handled by our compiler wrapper.
379 """
380 logging.info('Running lints for %r on board %r', ebuild_path, board)
381
382 osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True)
383 osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True)
384
385 # FIXME(gbiv): |test| might be better here?
386 result = cros_build_lib.run(
387 [f'ebuild-{board}', ebuild_path, 'clean', 'compile'],
388 check=False,
389 print_cmd=True,
390 extra_env={'WITH_TIDY': 'tricium'},
391 capture_output=True,
392 encoding='utf-8',
393 errors='replace',
394 )
395
396 if result.returncode:
397 status = f'failed with code {result.returncode}; output:\n{result.stdout}'
398 log_fn = logging.warning
399 else:
400 status = 'succeeded'
401 log_fn = logging.info
402
403 log_fn('Running |ebuild| on %s %s', ebuild_path, status)
404 lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy')
405 osutils.CopyDirContents(LINT_BASE, lint_tmpdir)
406 return Path(lint_tmpdir)
407
408
409def collect_lints(lint_tmpdir: Path,
410 yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]:
411 """Collects the lints for a given directory filled with linting artifacts."""
412 json_files = list(lint_tmpdir.glob('*.json'))
413 pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files)
414
415 parses_failed = 0
416 all_complaints = set()
417 for path, parse in zip(json_files, pending_parses):
418 if isinstance(parse, ExceptionData):
419 parses_failed += 1
420 logging.error('Parsing %r failed with an exception\n%s', path, parse)
421 continue
422
423 meta, complaints = parse
424 if meta.exit_code:
425 logging.warning(
426 'Invoking clang-tidy on %r with flags %r exited with code %d; '
427 'output:\n%s',
428 meta.lint_target,
429 meta.invocation,
430 meta.exit_code,
431 meta.stdstreams,
432 )
433
434 all_complaints.update(complaints)
435
436 if parses_failed:
437 raise ClangTidyParseError(parses_failed, len(json_files))
438
439 return all_complaints
440
441
442def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]):
443 """Sets up to run clang-tidy on the given ebuilds for the given board."""
444 packages = [x.package for x in ebuild_list]
445 logging.info('Setting up to lint %r', packages)
446
Jack Rosenthalb984e102021-04-07 21:18:29 +0000447 workon = workon_helper.WorkonHelper(cros_build_lib.GetSysroot(board))
George Burgess IV853d65b2020-02-25 13:13:15 -0800448 workon.StopWorkingOnPackages(packages=[], use_all=True)
449 workon.StartWorkingOnPackages(packages)
450
451 # We're going to be hacking with |ebuild| later on, so having all
452 # dependencies in place is necessary so one |ebuild| won't stomp on another.
453 cmd = [
454 f'emerge-{board}',
455 '--onlydeps',
456 # Since each `emerge` may eat up to `ncpu` cores, limit the maximum
457 # concurrency we can get here to (arbitrarily) 8 jobs. Having
458 # `configure`s and such run in parallel is nice.
459 f'-j{min(8, multiprocessing.cpu_count())}',
460 ]
461 cmd += packages
462 result = cros_build_lib.run(cmd, print_cmd=True, check=False)
463 if result.returncode:
464 logging.error('Setup failed with exit code %d; some lints may fail.',
465 result.returncode)
466
467
468def run_tidy(board: str, ebuild_list: List[portage_util.EBuild],
469 keep_dirs: bool,
470 parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]:
471 """Runs clang-tidy on the given ebuilds for the given board.
472
473 Returns the set of |TidyDiagnostic|s produced by doing so.
474 """
475 # Since we rely on build actions _actually_ running, we can't live with a
476 # cache.
477 osutils.RmDir(
Jack Rosenthalb984e102021-04-07 21:18:29 +0000478 Path(cros_build_lib.GetSysroot(board)) / 'var' / 'cache' / 'portage',
George Burgess IV853d65b2020-02-25 13:13:15 -0800479 ignore_missing=True,
480 sudo=True,
481 )
482
483 results = set()
484 # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while
485 # holding the GIL to |yaml.load| on my otherwise-idle dev box. |yaml_pool|
486 # lets us do this in parallel.
487 with multiprocessing.pool.Pool() as yaml_pool:
488 for ebuild in ebuild_list:
489 lint_tmpdir = generate_lints(board, ebuild.ebuild_path)
490 try:
491 results |= collect_lints(lint_tmpdir, yaml_pool)
492 except ClangTidyParseError:
493 if not parse_errors_are_nonfatal:
494 raise
495 logging.exception('Working on %r', ebuild)
496 finally:
497 if keep_dirs:
498 logging.info('Lints for %r are in %r', ebuild.ebuild_path,
499 lint_tmpdir)
500 else:
501 osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True)
502 return results
503
504
505def resolve_package_ebuilds(board: str,
506 package_names: Iterable[str]) -> List[str]:
507 """Figures out ebuild paths for the given package names."""
508
509 def resolve_package(package_name_or_ebuild):
510 """Resolves a single package name an ebuild path."""
511 if package_name_or_ebuild.endswith('.ebuild'):
512 return package_name_or_ebuild
513 return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild],
514 check=True,
515 stdout=subprocess.PIPE,
516 encoding='utf-8').stdout.strip()
517
518 # Resolving ebuilds takes time. If we get more than one (like when I'm tesing
519 # on 50 of them), parallelism speeds things up quite a bit.
520 with multiprocessing.pool.ThreadPool() as pool:
521 return pool.map(resolve_package, package_names)
522
523
524def filter_tidy_lints(only_files: Optional[Set[Path]],
525 git_repo_base: Optional[Path],
526 diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]:
527 """Transforms and filters the given TidyDiagnostics.
528
529 Args:
530 only_files: a set of file paths, or None; if this is not None, only
531 |TidyDiagnostic|s in these files will be kept.
532 git_repo_base: if not None, only files in the given directory will be kept.
533 All paths of the returned diagnostics will be made relative to
534 |git_repo_base|.
535 diags: diagnostics to transform/filter.
536
537 Returns:
538 A sorted list of |TidyDiagnostic|s.
539 """
540 result_diags = []
541 total_diags = 0
542
543 for diag in diags:
544 total_diags += 1
545
546 if not diag.file_path:
547 # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're
548 # redefining |FOO| with a different value") in 'virtual' files; these
549 # receive no name in clang.
550 logging.info('Dropping diagnostic %r, since it has no associated file',
551 diag)
552 continue
553
554 file_path = Path(diag.file_path)
555 if only_files and file_path not in only_files:
556 continue
557
558 if git_repo_base:
559 if git_repo_base not in file_path.parents:
560 continue
561 diag = diag.normalize_paths_to(git_repo_base)
562
563 result_diags.append(diag)
564
565 logging.info('Dropped %d/%d diags', total_diags - len(result_diags),
566 total_diags)
567
568 result_diags.sort()
569 return result_diags
570
571
572def get_parser() -> commandline.ArgumentParser:
573 """Creates an argument parser for this script."""
574 parser = commandline.ArgumentParser(description=__doc__)
575 parser.add_argument(
576 '--output', required=True, type='path', help='File to write results to.')
577 parser.add_argument(
578 '--git-repo-base',
579 type='path',
580 help="Base directory of the git repo we're looking at. If specified, "
581 'only diagnostics in files in this directory will be emitted. All '
582 'diagnostic file paths will be made relative to this directory.')
583 parser.add_argument('--board', required=True, help='Board to run under.')
584 parser.add_argument(
585 '--package',
586 action='append',
587 required=True,
588 help='Package(s) to build and lint. Required.')
589 parser.add_argument(
590 '--keep-lint-dirs',
591 action='store_true',
592 help='Keep directories with tidy lints around; meant primarily for '
593 'debugging.')
594 parser.add_argument(
595 '--nonfatal-parse-errors',
596 action='store_true',
597 help="Keep going even if clang-tidy's output is impossible to parse.")
598 parser.add_argument(
599 'file',
600 nargs='*',
601 type='path',
602 help='File(s) to output lints for. If none are specified, this tool '
603 'outputs all lints that clang-tidy emits after applying filtering '
604 'from |--git-repo-base|, if applicable.')
605 return parser
606
607
608def main(argv: List[str]) -> None:
609 cros_build_lib.AssertInsideChroot()
610 parser = get_parser()
611 opts = parser.parse_args(argv)
612 opts.Freeze()
613
614 only_files = {Path(f).resolve() for f in opts.file}
615
616 git_repo_base = opts.git_repo_base
617 if git_repo_base:
618 git_repo_base = Path(opts.git_repo_base)
619 if not (git_repo_base / '.git').exists():
620 # This script doesn't strictly care if there's a .git dir there; more of
621 # a smoke check.
622 parser.error(f'Given git repo base ({git_repo_base}) has no .git dir')
623
624 package_ebuilds = [
625 portage_util.EBuild(x)
626 for x in resolve_package_ebuilds(opts.board, opts.package)
627 ]
628
629 setup_tidy(opts.board, package_ebuilds)
630 lints = filter_tidy_lints(
631 only_files,
632 git_repo_base,
633 diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs,
634 opts.nonfatal_parse_errors))
635
636 osutils.WriteFile(
637 opts.output,
638 json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}),
639 atomic=True)