blob: 665659294860c9a83f11f7bbaf9f1124a44cba24 [file] [log] [blame]
George Burgess IV853d65b2020-02-25 13:13:15 -08001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs clang-tidy across the given files, dumping diagnostics to a JSON file.
6
7This script is intended specifically for use with Tricium (go/tricium).
8"""
9
10# From an implementation perspective, it's good to note that this script
11# cooperates with the toolchain's compiler wrapper. In particular,
12# ${cros}/src/third_party/toolchain-utils/compiler_wrapper/clang_tidy_flag.go.
13#
14# When |WITH_TIDY=tricium| is set and the wrapper (which is already $CC/$CXX)
15# is invoked, $CC will invoke clang-tidy _as well_ as the regular compiler.
16# This clang-tidy invocation will result in a few files being dumped to
17# |LINT_BASE| (below):
18# - "${LINT_BASE}/some-prefix.yaml" -- a YAML file that represents
19# clang-tidy's diagnostics for the file the compiler was asked to build
20# - "${LINT_BASE}/some-prefix.json" -- metadata about how the above YAML file
21# was generated, including clang-tidy's exit code, stdout, etc. See
22# |InvocationMetadata| below.
23#
24# As one might expect, the compiler wrapper writes the JSON file only after
25# clang-tidy is done executing.
26#
27# This directory might contain other files, as well; these are ignored by this
28# script.
29
30import bisect
31import json
Chris McDonald59650c32021-07-20 15:29:28 -060032import logging
George Burgess IV853d65b2020-02-25 13:13:15 -080033import multiprocessing
34import os
35from pathlib import Path
36import re
37import subprocess
38import sys
39import tempfile
40import traceback
Chris McDonald59650c32021-07-20 15:29:28 -060041from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
George Burgess IV853d65b2020-02-25 13:13:15 -080042
43import yaml # pylint: disable=import-error
Mike Frysinger06a51c82021-04-06 11:39:17 -040044
45from chromite.lib import build_target_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080046from chromite.lib import commandline
47from chromite.lib import cros_build_lib
George Burgess IV853d65b2020-02-25 13:13:15 -080048from chromite.lib import osutils
49from chromite.lib import portage_util
50from chromite.lib import workon_helper
51
George Burgess IV853d65b2020-02-25 13:13:15 -080052
53# The directory under which the compiler wrapper stores clang-tidy reports.
54LINT_BASE = Path('/tmp/linting_output/clang-tidy')
55
56
57class TidyReplacement(NamedTuple):
58 """Represents a replacement emitted by clang-tidy.
59
60 File path is omitted, since these are intended to be associated with
61 TidyDiagnostics with identical paths.
62 """
63 new_text: str
64 start_line: int
65 end_line: int
66 start_char: int
67 end_char: int
68
69
70class TidyExpandedFrom(NamedTuple):
71 """Represents a macro expansion.
72
73 When a diagnostic is inside of a macro expansion, clang-tidy emits
74 information about where said macro was expanded from. |TidyDiagnostic|s will
75 have one |TidyExpandedFrom| for each level of this expansion.
76 """
77 file_path: Path
78 line_number: int
79
80 def to_dict(self) -> Dict[str, Any]:
81 """Converts this |TidyExpandedFrom| to a dict serializeable as JSON."""
82 return {
83 'file_path': self.file_path.as_posix(),
84 'line_number': self.line_number,
85 }
86
87
88class Error(Exception):
89 """Base error class for tricium-clang-tidy."""
90
91
92class ClangTidyParseError(Error):
93 """Raised when clang-tidy parsing jobs fail."""
94
95 def __init__(self, failed_jobs: int, total_jobs: int):
96 super().__init__(f'{failed_jobs}/{total_jobs} parse jobs failed')
97 self.failed_jobs = failed_jobs
98 self.total_jobs = total_jobs
99
100
101class TidyDiagnostic(NamedTuple):
102 """A diagnostic emitted by clang-tidy.
103
104 Note that we shove these in a set for cheap deduplication, and we sort based
105 on the natural element order here. Sorting is mostly just for
106 deterministic/pretty output.
107 """
108 file_path: Path
109 line_number: int
110 diag_name: str
111 message: str
112 replacements: Tuple[TidyReplacement]
113 expansion_locs: Tuple[TidyExpandedFrom]
114
115 def normalize_paths_to(self, where: str) -> 'TidyDiagnostic':
116 """Creates a new TidyDiagnostic with all paths relative to |where|."""
117 return self._replace(
118 # Use relpath because Path.relative_to requires that `self` is rooted
119 # at `where`.
120 file_path=Path(os.path.relpath(self.file_path, where)),
121 expansion_locs=tuple(
122 x._replace(file_path=Path(os.path.relpath(x.file_path, where)))
123 for x in self.expansion_locs))
124
125 def to_dict(self) -> Dict[str, Any]:
126 """Converts this |TidyDiagnostic| to a dict serializeable as JSON."""
127 return {
128 'file_path': self.file_path.as_posix(),
129 'line_number': self.line_number,
130 'diag_name': self.diag_name,
131 'message': self.message,
132 'replacements': [x._asdict() for x in self.replacements],
133 'expansion_locs': [x.to_dict() for x in self.expansion_locs],
134 }
135
136
137class ClangTidySchemaError(Error):
138 """Raised when we encounter malformed YAML."""
139
140 def __init__(self, err_msg: str):
141 super().__init__(err_msg)
142 self.err_msg = err_msg
143
144
145class LineOffsetMap:
146 """Convenient API to turn offsets in a file into line numbers."""
147
148 def __init__(self, newline_locations: Iterable[int]):
149 line_starts = [x + 1 for x in newline_locations]
150 # The |bisect| logic in |get_line_number|/|get_line_offset| gets a bit
151 # complicated around the first and last lines of a file. Adding boundaries
152 # here removes some complexity from those implementations.
153 line_starts.append(0)
154 line_starts.append(sys.maxsize)
155 line_starts.sort()
156
157 assert line_starts[0] == 0, line_starts[0]
158 assert line_starts[1] != 0, line_starts[1]
159 assert line_starts[-2] < sys.maxsize, line_starts[-2]
160 assert line_starts[-1] == sys.maxsize, line_starts[-1]
161
162 self._line_starts = line_starts
163
164 def get_line_number(self, char_number: int) -> int:
165 """Given a char offset into a file, returns its line number."""
166 assert 0 <= char_number < sys.maxsize, char_number
167 return bisect.bisect_right(self._line_starts, char_number)
168
169 def get_line_offset(self, char_number: int) -> int:
170 """Given a char offset into a file, returns its column number."""
171 assert 0 <= char_number < sys.maxsize, char_number
172 line_start_index = bisect.bisect_right(self._line_starts, char_number) - 1
173 return char_number - self._line_starts[line_start_index]
174
175 @staticmethod
176 def for_text(data: str) -> 'LineOffsetMap':
177 """Creates a LineOffsetMap for the given string."""
178 return LineOffsetMap(m.start() for m in re.finditer(r'\n', data))
179
180
181def parse_tidy_fixes_file(tidy_invocation_dir: Path,
182 yaml_data: Any) -> Iterable[TidyDiagnostic]:
183 """Parses a clang-tidy YAML file.
184
185 Args:
186 yaml_data: The parsed YAML data from clang-tidy's fixits file.
187 tidy_invocation_dir: The directory clang-tidy was run in.
188
189 Returns:
190 A generator of |TidyDiagnostic|s.
191 """
192 assert tidy_invocation_dir.is_absolute(), tidy_invocation_dir
193
194 if yaml_data is None:
195 return
196
197 # A cache of file_path => LineOffsetMap so we only need to load offsets once
198 # per file per |parse_tidy_fixes_file| invocation.
199 cached_line_offsets = {}
200
201 def get_line_offsets(file_path: Optional[Path]) -> LineOffsetMap:
202 """Gets a LineOffsetMap for the given |file_path|."""
203 assert not file_path or file_path.is_absolute(), file_path
204
205 if file_path in cached_line_offsets:
206 return cached_line_offsets[file_path]
207
208 # Sometimes tidy will give us empty file names; they don't map to any file,
209 # and are generally issues it has with CFLAGS, etc. File offsets don't
210 # matter in those, so use an empty map.
211 if file_path:
212 offsets = LineOffsetMap.for_text(file_path.read_text(encoding='utf-8'))
213 else:
214 offsets = LineOffsetMap(())
215 cached_line_offsets[file_path] = offsets
216 return offsets
217
218 # Rarely (e.g., in the case of missing |#include|s, clang will emit relative
219 # file paths for diagnostics. This fixes those.
220 def makeabs(file_path: str) -> Path:
221 """Resolves a |file_path| emitted by clang-tidy to an absolute path."""
222 if not file_path:
223 return None
224 path = Path(file_path)
225 if not path.is_absolute():
226 path = tidy_invocation_dir / path
227 return path.resolve()
228
229 try:
230 for diag in yaml_data['Diagnostics']:
231 message = diag['DiagnosticMessage']
232 file_path = message['FilePath']
233
234 absolute_file_path = makeabs(file_path)
235 line_offsets = get_line_offsets(absolute_file_path)
236
237 replacements = []
238 for replacement in message.get('Replacements', ()):
239 replacement_file_path = makeabs(replacement['FilePath'])
240
241 # FIXME(gbiv): This happens in practice with things like
242 # hicpp-member-init. Supporting it should be simple, but I'd like to
243 # get the basics running first.
244 if replacement_file_path != absolute_file_path:
245 logging.warning(
246 "Replacement %r wasn't in original file %r (diag: %r)",
247 replacement_file_path, file_path, diag)
248 continue
249
250 start_offset = replacement['Offset']
251 end_offset = start_offset + replacement['Length']
252 replacements.append(
253 TidyReplacement(
254 new_text=replacement['ReplacementText'],
255 start_line=line_offsets.get_line_number(start_offset),
256 end_line=line_offsets.get_line_number(end_offset),
257 start_char=line_offsets.get_line_offset(start_offset),
258 end_char=line_offsets.get_line_offset(end_offset),
259 ))
260
261 expansion_locs = []
262 for note in diag.get('Notes', ()):
263 if not note['Message'].startswith('expanded from macro '):
264 continue
265
266 absolute_note_path = makeabs(note['FilePath'])
267 note_offsets = get_line_offsets(absolute_note_path)
268 expansion_locs.append(
269 TidyExpandedFrom(
270 file_path=absolute_note_path,
271 line_number=note_offsets.get_line_number(note['FileOffset']),
272 ))
273
274 yield TidyDiagnostic(
275 diag_name=diag['DiagnosticName'],
276 message=message['Message'],
277 file_path=absolute_file_path,
278 line_number=line_offsets.get_line_number(message['FileOffset']),
279 replacements=tuple(replacements),
280 expansion_locs=tuple(expansion_locs),
281 )
282 except KeyError as k:
283 key_name = k.args[0]
284 raise ClangTidySchemaError(f'Broken yaml: missing key {key_name!r}')
285
286
287# Represents metadata about a clang-tidy invocation.
288class InvocationMetadata(NamedTuple):
289 """Metadata describing a singular invocation of clang-tidy."""
290 exit_code: int
291 invocation: List[str]
292 lint_target: str
293 stdstreams: str
294 wd: str
295
296
297class ExceptionData:
298 """Info about an exception that can be sent across processes."""
299
300 def __init__(self):
301 """Builds an instance; only intended to be called from `except` blocks."""
302 self._str = traceback.format_exc()
303
304 def __str__(self):
305 return self._str
306
307
308def parse_tidy_invocation(
309 json_file: Path,
310) -> Union[ExceptionData, Tuple[InvocationMetadata, List[TidyDiagnostic]]]:
311 """Parses a clang-tidy invocation result based on a JSON file.
312
313 This is intended to be run in a separate process, which Exceptions and
314 locking and such work notoriously poorly over, so it's never intended to
315 |raise| (except under a KeyboardInterrupt or similar).
316
317 Args:
318 json_file: The JSON invocation metadata file to parse.
319
320 Returns:
321 An |ExceptionData| instance on failure. On success, it returns a
322 (InvocationMetadata, [TidyLint]).
323 """
324 try:
325 assert json_file.suffix == '.json', json_file
326
327 with json_file.open(encoding='utf-8') as f:
328 raw_meta = json.load(f)
329
330 meta = InvocationMetadata(
331 exit_code=raw_meta['exit_code'],
332 invocation=[raw_meta['executable']] + raw_meta['args'],
333 lint_target=raw_meta['lint_target'],
334 stdstreams=raw_meta['stdstreams'],
335 wd=raw_meta['wd'],
336 )
337
338 raw_crash_output = raw_meta.get('crash_output')
339 if raw_crash_output:
340 crash_reproducer_path = raw_crash_output['crash_reproducer_path']
341 output = raw_crash_output['stdstreams']
342 raise RuntimeError(f"""\
343Clang-tidy apparently crashed; dumping lots of invocation info:
344## Tidy JSON file target: {json_file}
345## Invocation: {meta.invocation}
346## Target: {meta.lint_target}
347## Crash reproducer is at: {crash_reproducer_path}
348## Output producing reproducer:
349{output}
350## Output from the crashing invocation:
351{meta.stdstreams}
352""")
353
354 yaml_file = json_file.with_suffix('.yaml')
355 # If this happened, clang-tidy was probably killed. Dump output as part of
356 # the exception so it's easier to reason about what happened.
357 if not yaml_file.exists():
358 raise RuntimeError("clang-tidy didn't produce an output file for "
359 f'{json_file}. Output:\n{meta.stdstreams}')
360
361 with yaml_file.open('rb') as f:
George Engelbrecht77e0bf82021-05-18 13:28:32 -0600362 yaml_data = yaml.safe_load(f)
George Burgess IV853d65b2020-02-25 13:13:15 -0800363 return meta, list(parse_tidy_fixes_file(Path(meta.wd), yaml_data))
364 except Exception:
365 return ExceptionData()
366
367
368def generate_lints(board: str, ebuild_path: str) -> Path:
369 """Collects the lints for a given package on a given board.
370
371 Args:
372 board: the board to collect lints for.
373 ebuild_path: the path to the ebuild to collect lints for.
374
375 Returns:
376 The path to a tmpdir that all of the lint YAML files (if any) will be in.
377 This will also be populated by JSON files containing InvocationMetadata.
378 The generation of this is handled by our compiler wrapper.
379 """
380 logging.info('Running lints for %r on board %r', ebuild_path, board)
381
382 osutils.RmDir(LINT_BASE, ignore_missing=True, sudo=True)
383 osutils.SafeMakedirs(LINT_BASE, 0o777, sudo=True)
384
385 # FIXME(gbiv): |test| might be better here?
386 result = cros_build_lib.run(
387 [f'ebuild-{board}', ebuild_path, 'clean', 'compile'],
388 check=False,
389 print_cmd=True,
390 extra_env={'WITH_TIDY': 'tricium'},
391 capture_output=True,
392 encoding='utf-8',
393 errors='replace',
394 )
395
396 if result.returncode:
397 status = f'failed with code {result.returncode}; output:\n{result.stdout}'
398 log_fn = logging.warning
399 else:
400 status = 'succeeded'
401 log_fn = logging.info
402
403 log_fn('Running |ebuild| on %s %s', ebuild_path, status)
404 lint_tmpdir = tempfile.mkdtemp(prefix='tricium_tidy')
405 osutils.CopyDirContents(LINT_BASE, lint_tmpdir)
406 return Path(lint_tmpdir)
407
408
409def collect_lints(lint_tmpdir: Path,
410 yaml_pool: multiprocessing.Pool) -> Set[TidyDiagnostic]:
411 """Collects the lints for a given directory filled with linting artifacts."""
412 json_files = list(lint_tmpdir.glob('*.json'))
413 pending_parses = yaml_pool.imap(parse_tidy_invocation, json_files)
414
415 parses_failed = 0
416 all_complaints = set()
417 for path, parse in zip(json_files, pending_parses):
418 if isinstance(parse, ExceptionData):
419 parses_failed += 1
420 logging.error('Parsing %r failed with an exception\n%s', path, parse)
421 continue
422
423 meta, complaints = parse
424 if meta.exit_code:
425 logging.warning(
426 'Invoking clang-tidy on %r with flags %r exited with code %d; '
427 'output:\n%s',
428 meta.lint_target,
429 meta.invocation,
430 meta.exit_code,
431 meta.stdstreams,
432 )
433
434 all_complaints.update(complaints)
435
436 if parses_failed:
437 raise ClangTidyParseError(parses_failed, len(json_files))
438
439 return all_complaints
440
441
442def setup_tidy(board: str, ebuild_list: List[portage_util.EBuild]):
443 """Sets up to run clang-tidy on the given ebuilds for the given board."""
444 packages = [x.package for x in ebuild_list]
445 logging.info('Setting up to lint %r', packages)
446
Mike Frysinger06a51c82021-04-06 11:39:17 -0400447 workon = workon_helper.WorkonHelper(
448 build_target_lib.get_default_sysroot_path(board))
George Burgess IV853d65b2020-02-25 13:13:15 -0800449 workon.StopWorkingOnPackages(packages=[], use_all=True)
450 workon.StartWorkingOnPackages(packages)
451
452 # We're going to be hacking with |ebuild| later on, so having all
453 # dependencies in place is necessary so one |ebuild| won't stomp on another.
454 cmd = [
455 f'emerge-{board}',
456 '--onlydeps',
457 # Since each `emerge` may eat up to `ncpu` cores, limit the maximum
458 # concurrency we can get here to (arbitrarily) 8 jobs. Having
459 # `configure`s and such run in parallel is nice.
460 f'-j{min(8, multiprocessing.cpu_count())}',
461 ]
462 cmd += packages
463 result = cros_build_lib.run(cmd, print_cmd=True, check=False)
464 if result.returncode:
465 logging.error('Setup failed with exit code %d; some lints may fail.',
466 result.returncode)
467
468
469def run_tidy(board: str, ebuild_list: List[portage_util.EBuild],
470 keep_dirs: bool,
471 parse_errors_are_nonfatal: bool) -> Set[TidyDiagnostic]:
472 """Runs clang-tidy on the given ebuilds for the given board.
473
474 Returns the set of |TidyDiagnostic|s produced by doing so.
475 """
476 # Since we rely on build actions _actually_ running, we can't live with a
477 # cache.
478 osutils.RmDir(
Mike Frysinger06a51c82021-04-06 11:39:17 -0400479 Path(build_target_lib.get_default_sysroot_path(
480 board)) / 'var' / 'cache' / 'portage',
George Burgess IV853d65b2020-02-25 13:13:15 -0800481 ignore_missing=True,
482 sudo=True,
483 )
484
485 results = set()
486 # If clang-tidy dumps a lot of diags, it can take 1-10secs of CPU while
George Engelbrecht77e0bf82021-05-18 13:28:32 -0600487 # holding the GIL to |yaml.safe_load| on my otherwise-idle dev box.
488 # |yaml_pool| lets us do this in parallel.
George Burgess IV853d65b2020-02-25 13:13:15 -0800489 with multiprocessing.pool.Pool() as yaml_pool:
490 for ebuild in ebuild_list:
491 lint_tmpdir = generate_lints(board, ebuild.ebuild_path)
492 try:
493 results |= collect_lints(lint_tmpdir, yaml_pool)
494 except ClangTidyParseError:
495 if not parse_errors_are_nonfatal:
496 raise
497 logging.exception('Working on %r', ebuild)
498 finally:
499 if keep_dirs:
500 logging.info('Lints for %r are in %r', ebuild.ebuild_path,
501 lint_tmpdir)
502 else:
503 osutils.RmDir(lint_tmpdir, ignore_missing=True, sudo=True)
504 return results
505
506
507def resolve_package_ebuilds(board: str,
508 package_names: Iterable[str]) -> List[str]:
509 """Figures out ebuild paths for the given package names."""
510
511 def resolve_package(package_name_or_ebuild):
512 """Resolves a single package name an ebuild path."""
513 if package_name_or_ebuild.endswith('.ebuild'):
514 return package_name_or_ebuild
515 return cros_build_lib.run([f'equery-{board}', 'w', package_name_or_ebuild],
516 check=True,
517 stdout=subprocess.PIPE,
518 encoding='utf-8').stdout.strip()
519
520 # Resolving ebuilds takes time. If we get more than one (like when I'm tesing
521 # on 50 of them), parallelism speeds things up quite a bit.
522 with multiprocessing.pool.ThreadPool() as pool:
523 return pool.map(resolve_package, package_names)
524
525
526def filter_tidy_lints(only_files: Optional[Set[Path]],
527 git_repo_base: Optional[Path],
528 diags: Iterable[TidyDiagnostic]) -> List[TidyDiagnostic]:
529 """Transforms and filters the given TidyDiagnostics.
530
531 Args:
532 only_files: a set of file paths, or None; if this is not None, only
533 |TidyDiagnostic|s in these files will be kept.
534 git_repo_base: if not None, only files in the given directory will be kept.
535 All paths of the returned diagnostics will be made relative to
536 |git_repo_base|.
537 diags: diagnostics to transform/filter.
538
539 Returns:
540 A sorted list of |TidyDiagnostic|s.
541 """
542 result_diags = []
543 total_diags = 0
544
545 for diag in diags:
546 total_diags += 1
547
548 if not diag.file_path:
549 # Things like |-DFOO=1 -DFOO=2| can trigger diagnostics ("oh no you're
550 # redefining |FOO| with a different value") in 'virtual' files; these
551 # receive no name in clang.
552 logging.info('Dropping diagnostic %r, since it has no associated file',
553 diag)
554 continue
555
556 file_path = Path(diag.file_path)
557 if only_files and file_path not in only_files:
558 continue
559
560 if git_repo_base:
561 if git_repo_base not in file_path.parents:
562 continue
563 diag = diag.normalize_paths_to(git_repo_base)
564
565 result_diags.append(diag)
566
567 logging.info('Dropped %d/%d diags', total_diags - len(result_diags),
568 total_diags)
569
570 result_diags.sort()
571 return result_diags
572
573
574def get_parser() -> commandline.ArgumentParser:
575 """Creates an argument parser for this script."""
576 parser = commandline.ArgumentParser(description=__doc__)
577 parser.add_argument(
578 '--output', required=True, type='path', help='File to write results to.')
579 parser.add_argument(
580 '--git-repo-base',
581 type='path',
582 help="Base directory of the git repo we're looking at. If specified, "
583 'only diagnostics in files in this directory will be emitted. All '
584 'diagnostic file paths will be made relative to this directory.')
585 parser.add_argument('--board', required=True, help='Board to run under.')
586 parser.add_argument(
587 '--package',
588 action='append',
589 required=True,
590 help='Package(s) to build and lint. Required.')
591 parser.add_argument(
592 '--keep-lint-dirs',
593 action='store_true',
594 help='Keep directories with tidy lints around; meant primarily for '
595 'debugging.')
596 parser.add_argument(
597 '--nonfatal-parse-errors',
598 action='store_true',
599 help="Keep going even if clang-tidy's output is impossible to parse.")
600 parser.add_argument(
601 'file',
602 nargs='*',
603 type='path',
604 help='File(s) to output lints for. If none are specified, this tool '
605 'outputs all lints that clang-tidy emits after applying filtering '
606 'from |--git-repo-base|, if applicable.')
607 return parser
608
609
610def main(argv: List[str]) -> None:
611 cros_build_lib.AssertInsideChroot()
612 parser = get_parser()
613 opts = parser.parse_args(argv)
614 opts.Freeze()
615
616 only_files = {Path(f).resolve() for f in opts.file}
617
618 git_repo_base = opts.git_repo_base
619 if git_repo_base:
620 git_repo_base = Path(opts.git_repo_base)
621 if not (git_repo_base / '.git').exists():
622 # This script doesn't strictly care if there's a .git dir there; more of
623 # a smoke check.
624 parser.error(f'Given git repo base ({git_repo_base}) has no .git dir')
625
626 package_ebuilds = [
627 portage_util.EBuild(x)
628 for x in resolve_package_ebuilds(opts.board, opts.package)
629 ]
630
631 setup_tidy(opts.board, package_ebuilds)
632 lints = filter_tidy_lints(
633 only_files,
634 git_repo_base,
635 diags=run_tidy(opts.board, package_ebuilds, opts.keep_lint_dirs,
636 opts.nonfatal_parse_errors))
637
638 osutils.WriteFile(
639 opts.output,
640 json.dumps({'tidy_diagnostics': [x.to_dict() for x in lints]}),
641 atomic=True)