blob: ddfab0c895df524723e258b6bc5dbf7f91fe668d [file] [log] [blame]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2021 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6""" This script cleans up the vendor directory.
7"""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -07008import argparse
George Burgess IV635f7262022-08-09 21:32:20 -07009import collections
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000010import hashlib
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080011import json
12import os
13import pathlib
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070014import re
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -070015import shutil
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000016import subprocess
George Burgess IV04833702022-08-09 22:00:38 -070017import textwrap
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -070018import toml
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000019
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070020# We only care about crates we're actually going to use and that's usually
21# limited to ones with cfg(linux). For running `cargo metadata`, limit results
22# to only this platform
23DEFAULT_PLATFORM_FILTER = "x86_64-unknown-linux-gnu"
24
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000025
26def _rerun_checksums(package_path):
27 """Re-run checksums for given package.
28
29 Writes resulting checksums to $package_path/.cargo-checksum.json.
30 """
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070031 hashes = dict()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000032 checksum_path = os.path.join(package_path, '.cargo-checksum.json')
33 if not pathlib.Path(checksum_path).is_file():
34 return False
35
36 with open(checksum_path, 'r') as fread:
37 contents = json.load(fread)
38
39 for root, _, files in os.walk(package_path, topdown=True):
40 for f in files:
41 # Don't checksum an existing checksum file
42 if f == ".cargo-checksum.json":
43 continue
44
45 file_path = os.path.join(root, f)
46 with open(file_path, 'rb') as frb:
47 m = hashlib.sha256()
48 m.update(frb.read())
49 d = m.hexdigest()
50
51 # Key is relative to the package path so strip from beginning
52 key = os.path.relpath(file_path, package_path)
53 hashes[key] = d
54
55 if hashes:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070056 print("{} regenerated {} hashes".format(package_path,
57 len(hashes.keys())))
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000058 contents['files'] = hashes
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000059 with open(checksum_path, 'w') as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070060 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000061
62 return True
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080063
64
65def _remove_OWNERS_checksum(root):
66 """ Delete all OWNERS files from the checksum file.
67
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000068 Args:
69 root: Root directory for the vendored crate.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080070
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000071 Returns:
72 True if OWNERS was found and cleaned up. Otherwise False.
73 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080074 checksum_path = os.path.join(root, '.cargo-checksum.json')
75 if not pathlib.Path(checksum_path).is_file():
76 return False
77
78 with open(checksum_path, 'r') as fread:
79 contents = json.load(fread)
80
81 del_keys = []
82 for cfile in contents['files']:
83 if 'OWNERS' in cfile:
84 del_keys.append(cfile)
85
86 for key in del_keys:
87 del contents['files'][key]
88
89 if del_keys:
90 print('{} deleted: {}'.format(root, del_keys))
91 with open(checksum_path, 'w') as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070092 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080093
94 return bool(del_keys)
95
96
97def cleanup_owners(vendor_path):
98 """ Remove owners checksums from the vendor directory.
99
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000100 We currently do not check in the OWNERS files from vendored crates because
101 they interfere with the find-owners functionality in gerrit. This cleanup
102 simply finds all instances of "OWNERS" in the checksum files within and
103 removes them.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800104
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000105 Args:
106 vendor_path: Absolute path to vendor directory.
107 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800108 deps_cleaned = []
109 for root, dirs, _ in os.walk(vendor_path):
110 for d in dirs:
111 removed = _remove_OWNERS_checksum(os.path.join(root, d))
112 if removed:
113 deps_cleaned.append(d)
114
115 if deps_cleaned:
116 print('Cleanup owners:\n {}'.format("\n".join(deps_cleaned)))
117
118
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000119def apply_single_patch(patch, workdir):
120 """Apply a single patch and return whether it was successful.
121
122 Returns:
123 True if successful. False otherwise.
124 """
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000125 proc = subprocess.run(["patch", "-p1", "-i", patch], cwd=workdir)
126 return proc.returncode == 0
127
128
George Burgess IV30c5c362022-08-19 17:05:02 -0700129def apply_patch_script(script, workdir):
130 """Run the given patch script, returning whether it exited cleanly.
131
132 Returns:
133 True if successful. False otherwise.
134 """
135 return subprocess.run([script], cwd=workdir).returncode == 0
136
137
George Burgess IV635f7262022-08-09 21:32:20 -0700138def determine_vendor_crates(vendor_path):
139 """Returns a map of {crate_name: [directory]} at the given vendor_path."""
140 result = collections.defaultdict(list)
141 for crate_name_plus_ver in os.listdir(vendor_path):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700142 name, _ = crate_name_plus_ver.rsplit('-', 1)
143 result[name].append(crate_name_plus_ver)
George Burgess IV635f7262022-08-09 21:32:20 -0700144
145 for crate_list in result.values():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700146 crate_list.sort()
George Burgess IV635f7262022-08-09 21:32:20 -0700147 return result
148
149
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000150def apply_patches(patches_path, vendor_path):
151 """Finds patches and applies them to sub-folders in the vendored crates.
152
153 Args:
154 patches_path: Path to folder with patches. Expect all patches to be one
155 level down (matching the crate name).
156 vendor_path: Root path to vendored crates directory.
157 """
158 checksums_for = {}
159
160 # Don't bother running if patches directory is empty
161 if not pathlib.Path(patches_path).is_dir():
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700162 return
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000163
George Burgess IV30c5c362022-08-19 17:05:02 -0700164 patches_failed = False
George Burgess IV635f7262022-08-09 21:32:20 -0700165 vendor_crate_map = determine_vendor_crates(vendor_path)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000166 # Look for all patches and apply them
167 for d in os.listdir(patches_path):
168 dir_path = os.path.join(patches_path, d)
169
170 # We don't process patches in root dir
171 if not os.path.isdir(dir_path):
172 continue
173
George Burgess IV30c5c362022-08-19 17:05:02 -0700174 # We accept one of two forms here:
175 # - direct targets (these name # `${crate_name}-${version}`)
176 # - simply the crate name (which applies to all versions of the
177 # crate)
178 direct_target = os.path.join(vendor_path, d)
179 if os.path.isdir(direct_target):
180 patch_targets = [d]
181 elif d in vendor_crate_map:
182 patch_targets = vendor_crate_map[d]
183 else:
184 raise RuntimeError(f'Unknown crate in {vendor_path}: {d}')
185
George Burgess IV635f7262022-08-09 21:32:20 -0700186 for patch in os.listdir(dir_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000187 file_path = os.path.join(dir_path, patch)
188
189 # Skip if not a patch file
George Burgess IV30c5c362022-08-19 17:05:02 -0700190 if not os.path.isfile(file_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000191 continue
192
George Burgess IV30c5c362022-08-19 17:05:02 -0700193 if patch.endswith(".patch"):
194 apply = apply_single_patch
195 elif os.access(file_path, os.X_OK):
196 apply = apply_patch_script
George Burgess IV635f7262022-08-09 21:32:20 -0700197 else:
George Burgess IV30c5c362022-08-19 17:05:02 -0700198 # Unrecognized. Skip it.
199 continue
200
201 for target_name in patch_targets:
202 checksums_for[target_name] = True
203 target = os.path.join(vendor_path, target_name)
204 print(f"-- Applying {file_path} to {target}")
205 if not apply(file_path, target):
206 print(f"Failed to apply {file_path} to {target}")
207 patches_failed = True
208
209 # Do this late, so we can report all of the failing patches in one
210 # invocation.
211 if patches_failed:
212 raise ValueError('Patches failed; please see above logs')
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000213
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000214 # Re-run checksums for all modified packages since we applied patches.
215 for key in checksums_for.keys():
216 _rerun_checksums(os.path.join(vendor_path, key))
217
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700218
George Burgess IV18af5632022-08-30 14:10:53 -0700219def get_workspace_cargo_toml(working_dir):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700220 """Returns all Cargo.toml files under working_dir."""
George Burgess IV18af5632022-08-30 14:10:53 -0700221 return [working_dir / 'projects' / 'Cargo.toml']
George Burgess IV40cc91c2022-08-15 13:07:40 -0700222
223
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700224def run_cargo_vendor(working_dir):
225 """Runs cargo vendor.
226
227 Args:
228 working_dir: Directory to run inside. This should be the directory where
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700229 Cargo.toml is kept.
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700230 """
George Burgess IV635f7262022-08-09 21:32:20 -0700231 # Cargo will refuse to revendor into versioned directories, which leads to
232 # repeated `./vendor.py` invocations trying to apply patches to
233 # already-patched sources. Remove the existing vendor directory to avoid
234 # this.
235 vendor_dir = working_dir / 'vendor'
236 if vendor_dir.exists():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700237 shutil.rmtree(vendor_dir)
238
George Burgess IV18af5632022-08-30 14:10:53 -0700239 cargo_cmdline = [
240 'cargo',
241 'vendor',
242 '--versioned-dirs',
243 '-v',
244 '--manifest-path=projects/Cargo.toml',
245 '--',
246 'vendor',
247 ]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700248 subprocess.check_call(cargo_cmdline, cwd=working_dir)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000249
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700250
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700251def load_metadata(working_dir, filter_platform=DEFAULT_PLATFORM_FILTER):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700252 """Load metadata for all projects under a given directory.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700253
254 Args:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700255 working_dir: Base directory to run from.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700256 filter_platform: Filter packages to ones configured for this platform.
257 """
George Burgess IV40cc91c2022-08-15 13:07:40 -0700258 metadata_objects = []
George Burgess IV18af5632022-08-30 14:10:53 -0700259 cmd = [
260 'cargo',
261 'metadata',
262 '--format-version=1',
263 '--manifest-path=projects/Cargo.toml',
264 ]
265 # Conditionally add platform filter
266 if filter_platform:
267 cmd += ("--filter-platform", filter_platform)
268 output = subprocess.check_output(cmd, cwd=working_dir)
269 return json.loads(output)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700270
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700271
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700272class LicenseManager:
273 """ Manage consolidating licenses for all packages."""
274
275 # These are all the licenses we support. Keys are what is seen in metadata and
276 # values are what is expected by the ebuild.
277 SUPPORTED_LICENSES = {
George Burgess IVf4a5e362022-08-30 14:30:36 -0700278 '0BSD': '0BSD',
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700279 'Apache-2.0': 'Apache-2.0',
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700280 'BSD-3-Clause': 'BSD-3',
George Burgess IV4ae42062022-08-15 18:54:51 -0700281 'ISC': 'ISC',
George Burgess IVf4a5e362022-08-30 14:30:36 -0700282 'MIT': 'MIT',
283 'MPL-2.0': 'MPL-2.0',
George Burgess IV4ae42062022-08-15 18:54:51 -0700284 'unicode': 'unicode',
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700285 }
286
287 # Prefer to take attribution licenses in this order. All these require that
288 # we actually use the license file found in the package so they MUST have
289 # a license file set.
290 PREFERRED_ATTRIB_LICENSE_ORDER = ['MIT', 'BSD-3', 'ISC']
291
292 # If Apache license is found, always prefer it (simplifies attribution)
293 APACHE_LICENSE = 'Apache-2.0'
294
295 # Regex for license files found in the vendored directories. Search for
296 # these files with re.IGNORECASE.
297 #
298 # These will be searched in order with the earlier entries being preferred.
299 LICENSE_NAMES_REGEX = [
300 r'^license-mit$',
301 r'^copyright$',
302 r'^licen[cs]e.*$',
303 ]
304
305 # Some crates have their license file in other crates. This usually occurs
306 # because multiple crates are published from the same git repository and the
307 # license isn't updated in each sub-crate. In these cases, we can just
308 # ignore these packages.
309 MAP_LICENSE_TO_OTHER = {
310 'failure_derive': 'failure',
311 'grpcio-compiler': 'grpcio',
312 'grpcio-sys': 'grpcio',
313 'rustyline-derive': 'rustyline',
314 }
315
316 # Map a package to a specific license and license file. Only use this if
317 # a package doesn't have an easily discoverable license or exports its
318 # license in a weird way. Prefer to patch the project with a license and
319 # upstream the patch instead.
320 STATIC_LICENSE_MAP = {
321 # "package name": ( "license name", "license file relative location")
George Burgess IVf4a5e362022-08-30 14:30:36 -0700322 # Patch for adding this is upstream, but the patch application doesn't
323 # apply to `cargo metadata`. This is presumably because it can't detect
324 # our vendor directory.
325 # https://gitlab.freedesktop.org/slirp/libslirp-sys/-/merge_requests/6
326 'libslirp-sys': ('MIT', 'LICENSE'),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700327 }
328
329 def __init__(self, working_dir, vendor_dir):
330 self.working_dir = working_dir
331 self.vendor_dir = vendor_dir
332
333 def _find_license_in_dir(self, search_dir):
334 for p in os.listdir(search_dir):
335 # Ignore anything that's not a file
336 if not os.path.isfile(os.path.join(search_dir, p)):
337 continue
338
339 # Now check if the name matches any of the regexes
340 # We'll return the first matching file.
341 for regex in self.LICENSE_NAMES_REGEX:
342 if re.search(regex, p, re.IGNORECASE):
343 yield os.path.join(search_dir, p)
344 break
345
346 def _guess_license_type(self, license_file):
347 if '-MIT' in license_file:
348 return 'MIT'
349 elif '-APACHE' in license_file:
350 return 'APACHE'
351 elif '-BSD' in license_file:
352 return 'BSD-3'
353
354 with open(license_file, 'r') as f:
355 lines = f.read()
356 if 'MIT' in lines:
357 return 'MIT'
358 elif 'Apache' in lines:
359 return 'APACHE'
360 elif 'BSD 3-Clause' in lines:
361 return 'BSD-3'
362
363 return ''
364
George Burgess IV04833702022-08-09 22:00:38 -0700365 def generate_license(self, skip_license_check, print_map_to_file,
366 license_shorthand_file):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700367 """Generate single massive license file from metadata."""
George Burgess IV18af5632022-08-30 14:10:53 -0700368 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700369
370 has_license_types = set()
371 bad_licenses = {}
372
373 # Keep license map ordered so it generates a consistent license map
374 license_map = {}
375
376 skip_license_check = skip_license_check or []
George Burgess IV4ae42062022-08-15 18:54:51 -0700377 has_unicode_license = False
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700378
George Burgess IV18af5632022-08-30 14:10:53 -0700379 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700380 # Skip the synthesized Cargo.toml packages that exist solely to
381 # list dependencies.
382 if 'path+file:///' in package['id']:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700383 continue
384
George Burgess IV40cc91c2022-08-15 13:07:40 -0700385 pkg_name = package['name']
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700386 if pkg_name in skip_license_check:
387 print(
388 "Skipped license check on {}. Reason: Skipped from command line"
389 .format(pkg_name))
390 continue
391
392 if pkg_name in self.MAP_LICENSE_TO_OTHER:
393 print(
394 'Skipped license check on {}. Reason: License already in {}'
395 .format(pkg_name, self.MAP_LICENSE_TO_OTHER[pkg_name]))
396 continue
397
398 # Check if we have a static license map for this package. Use the
399 # static values if we have it already set.
400 if pkg_name in self.STATIC_LICENSE_MAP:
401 (license, license_file) = self.STATIC_LICENSE_MAP[pkg_name]
402 license_map[pkg_name] = {
403 "license": license,
404 "license_file": license_file,
405 }
406 continue
407
408 license_files = []
George Burgess IV93ba4732022-08-13 14:10:10 -0700409 # use `or ''` instead of get's default, since `package` may have a
410 # None value for 'license'.
411 license = package.get('license') or ''
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700412
413 # We ignore the metadata for license file because most crates don't
414 # have it set. Just scan the source for licenses.
George Burgess IV635f7262022-08-09 21:32:20 -0700415 pkg_version = package['version']
George Burgess IV40cc91c2022-08-15 13:07:40 -0700416 license_files = list(self._find_license_in_dir(
417 os.path.join(self.vendor_dir, f'{pkg_name}-{pkg_version}')))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700418
George Burgess IV4ae42062022-08-15 18:54:51 -0700419 # FIXME(b/240953811): The code later in this loop is only
420 # structured to handle ORs, not ANDs. Fortunately, this license in
421 # particular is `AND`ed between a super common license (Apache) and
422 # a more obscure one (unicode). This hack is specifically intended
423 # for the `unicode-ident` crate, though no crate name check is
424 # made, since it's OK other crates happen to have this license.
425 if license == '(MIT OR Apache-2.0) AND Unicode-DFS-2016':
426 has_unicode_license = True
427 # We'll check later to be sure MIT or Apache-2.0 is represented
428 # properly.
429 for x in license_files:
430 if os.path.basename(x) == 'LICENSE-UNICODE':
431 license_file = x
432 break
433 else:
434 raise ValueError('No LICENSE-UNICODE found in '
435 f'{license_files}')
436 license_map[pkg_name] = {
437 "license": license,
438 "license_file": license_file,
439 }
440 has_license_types.add('unicode')
441 continue
442
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700443 # If there are multiple licenses, they are delimited with "OR" or "/"
444 delim = ' OR ' if ' OR ' in license else '/'
George Burgess IV40cc91c2022-08-15 13:07:40 -0700445 found = [x.strip() for x in license.split(delim)]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700446
447 # Filter licenses to ones we support
448 licenses_or = [
449 self.SUPPORTED_LICENSES[f] for f in found
450 if f in self.SUPPORTED_LICENSES
451 ]
452
453 # If apache license is found, always prefer it because it simplifies
454 # license attribution (we can use existing Apache notice)
455 if self.APACHE_LICENSE in licenses_or:
456 has_license_types.add(self.APACHE_LICENSE)
457 license_map[pkg_name] = {'license': self.APACHE_LICENSE}
458
459 # Handle single license that has at least one license file
460 # We pick the first license file and the license
461 elif len(licenses_or) == 1:
462 if license_files:
463 l = licenses_or[0]
464 lf = license_files[0]
465
466 has_license_types.add(l)
467 license_map[pkg_name] = {
468 'license': l,
469 'license_file': os.path.relpath(lf, self.working_dir),
470 }
471 else:
472 bad_licenses[pkg_name] = "{} missing license file".format(
473 licenses_or[0])
474 # Handle multiple licenses
475 elif len(licenses_or) > 1:
476 # Check preferred licenses in order
477 license_found = False
478 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
479 if not l in licenses_or:
480 continue
481
482 for f in license_files:
483 if self._guess_license_type(f) == l:
484 license_found = True
485 has_license_types.add(l)
486 license_map[pkg_name] = {
487 'license':
488 l,
489 'license_file':
490 os.path.relpath(f, self.working_dir),
491 }
492 break
493
494 # Break out of loop if license is found
495 if license_found:
496 break
497 else:
498 bad_licenses[pkg_name] = license
499
500 # If we had any bad licenses, we need to abort
501 if bad_licenses:
502 for k in bad_licenses.keys():
503 print("{} had no acceptable licenses: {}".format(
504 k, bad_licenses[k]))
505 raise Exception("Bad licenses in vendored packages.")
506
507 # Write license map to file
508 if print_map_to_file:
509 with open(os.path.join(self.working_dir, print_map_to_file),
510 'w') as lfile:
511 json.dump(license_map, lfile, sort_keys=True)
512
513 # Raise missing licenses unless we have a valid reason to ignore them
514 raise_missing_license = False
515 for name, v in license_map.items():
516 if 'license_file' not in v and v.get('license',
517 '') != self.APACHE_LICENSE:
518 raise_missing_license = True
519 print(" {}: Missing license file. Fix or add to ignorelist.".
520 format(name))
521
522 if raise_missing_license:
523 raise Exception(
524 "Unhandled missing license file. "
525 "Make sure all are accounted for before continuing.")
526
George Burgess IV4ae42062022-08-15 18:54:51 -0700527 if has_unicode_license:
528 if self.APACHE_LICENSE not in has_license_types:
529 raise ValueError('Need the apache license; currently have: '
530 f'{sorted(has_license_types)}')
531
George Burgess IV04833702022-08-09 22:00:38 -0700532 sorted_licenses = sorted(has_license_types)
533 print("Add the following licenses to the ebuild:\n",
534 sorted_licenses)
535 header = textwrap.dedent("""\
536 # File to describe the licenses used by this registry.
537 # Used to it's easy to automatically verify ebuilds are updated.
538 # Each line is a license. Lines starting with # are comments.
539 """)
540 with open(license_shorthand_file, 'w', encoding='utf-8') as f:
541 f.write(header)
542 f.write('\n'.join(sorted_licenses))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700543
544
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700545# TODO(abps) - This needs to be replaced with datalog later. We should compile
546# all crab files into datalog and query it with our requirements
547# instead.
548class CrabManager:
549 """Manage audit files."""
550 def __init__(self, working_dir, crab_dir):
551 self.working_dir = working_dir
552 self.crab_dir = crab_dir
553
554 def _check_bad_traits(self, crabdata):
555 """Checks that a package's crab audit meets our requirements.
556
557 Args:
558 crabdata: Dict with crab keys in standard templated format.
559 """
560 common = crabdata['common']
561 # TODO(b/200578411) - Figure out what conditions we should enforce as
562 # part of the audit.
563 conditions = [
564 common.get('deny', None),
565 ]
566
567 # If any conditions are true, this crate is not acceptable.
568 return any(conditions)
569
570 def verify_traits(self):
571 """ Verify that all required CRAB traits for this repository are met.
572 """
George Burgess IV18af5632022-08-30 14:10:53 -0700573 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700574
575 failing_crates = {}
576
577 # Verify all packages have a CRAB file associated with it and they meet
578 # all our required traits
George Burgess IV18af5632022-08-30 14:10:53 -0700579 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700580 # Skip the synthesized Cargo.toml packages that exist solely to
581 # list dependencies.
582 if 'path+file:///' in package['id']:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700583 continue
584
585 crabname = "{}-{}".format(package['name'], package['version'])
586 filename = os.path.join(self.crab_dir, "{}.toml".format(crabname))
587
588 # If crab file doesn't exist, the crate fails
589 if not os.path.isfile(filename):
590 failing_crates[crabname] = "No crab file".format(filename)
591 continue
592
593 with open(filename, 'r') as f:
594 crabdata = toml.loads(f.read())
595
596 # If crab file's crate_name and version keys don't match this
597 # package, it also fails. This is just housekeeping...
598 if package['name'] != crabdata['crate_name'] or package[
599 'version'] != crabdata['version']:
600 failing_crates[crabname] = "Crate name or version don't match"
601 continue
602
603 if self._check_bad_traits(crabdata):
604 failing_crates[crabname] = "Failed bad traits check"
605
606 # If we had any failing crates, list them now
607 if failing_crates:
608 print('Failed CRAB audit:')
609 for k, v in failing_crates.items():
610 print(' {}: {}'.format(k, v))
611
612
George Burgess IVd4ff0502022-08-14 23:27:57 -0700613def clean_features_in_place(cargo_toml):
614 """Removes all side-effects of features in `cargo_toml`."""
615 features = cargo_toml.get('features')
616 if not features:
617 return
618
619 for name, value in features.items():
620 if name != 'default':
621 features[name] = []
622
623
George Burgess IV0313d782022-08-15 23:45:44 -0700624def remove_all_target_dependencies_in_place(cargo_toml):
George Burgess IVd4ff0502022-08-14 23:27:57 -0700625 """Removes all `target.*.dependencies` from `cargo_toml`."""
626 target = cargo_toml.get('target')
627 if not target:
628 return
George Burgess IV0313d782022-08-15 23:45:44 -0700629
George Burgess IVd4ff0502022-08-14 23:27:57 -0700630 empty_keys = []
631 for key, values in target.items():
632 values.pop('dependencies', None)
633 values.pop('dev-dependencies', None)
634 if not values:
635 empty_keys.append(key)
George Burgess IV0313d782022-08-15 23:45:44 -0700636
George Burgess IVd4ff0502022-08-14 23:27:57 -0700637 if len(empty_keys) == len(target):
638 del cargo_toml['target']
639 else:
640 for key in empty_keys:
641 del target[key]
George Burgess IV0313d782022-08-15 23:45:44 -0700642
643
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700644class CrateDestroyer():
645 LIB_RS_BODY = """compile_error!("This crate cannot be built for this configuration.");\n"""
646
647 def __init__(self, working_dir, vendor_dir):
648 self.working_dir = working_dir
649 self.vendor_dir = vendor_dir
650
651 def _modify_cargo_toml(self, pkg_path):
George Burgess IVd4ff0502022-08-14 23:27:57 -0700652 with open(os.path.join(pkg_path, 'Cargo.toml'), 'r') as cargo:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700653 contents = toml.load(cargo)
654
George Burgess IVd4ff0502022-08-14 23:27:57 -0700655 package = contents['package']
656
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700657 # Change description, license and delete license key
George Burgess IVd4ff0502022-08-14 23:27:57 -0700658 package['description'] = 'Empty crate that should not build.'
659 package['license'] = 'Apache-2.0'
660
661 package.pop('license_file', None)
662 # If there's no build.rs but we specify `links = "foo"`, Cargo gets
663 # upset.
664 package.pop('links', None)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700665
George Burgess IV0313d782022-08-15 23:45:44 -0700666 # Some packages have cfg-specific dependencies. Remove them here; we
667 # don't care about the dependencies of an empty package.
668 #
669 # This is a load-bearing optimization: `dev-python/toml` doesn't
670 # always round-trip dumps(loads(x)) correctly when `x` has keys with
671 # strings (b/242589711#comment3). The place this has bitten us so far
672 # is target dependencies, which can be harmlessly removed for now.
George Burgess IVd4ff0502022-08-14 23:27:57 -0700673 #
674 # Cleaning features in-place is also necessary, since we're removing
675 # dependencies, and a feature can enable features in dependencies.
676 # Cargo errors out on `[features] foo = "bar/baz"` if `bar` isn't a
677 # dependency.
678 clean_features_in_place(contents)
George Burgess IV0313d782022-08-15 23:45:44 -0700679 remove_all_target_dependencies_in_place(contents)
680
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700681 with open(os.path.join(pkg_path, "Cargo.toml"), "w") as cargo:
682 toml.dump(contents, cargo)
683
684 def _replace_source_contents(self, package_path):
685 # First load the checksum file before starting
686 checksum_file = os.path.join(package_path, ".cargo-checksum.json")
687 with open(checksum_file, 'r') as csum:
688 checksum_contents = json.load(csum)
689
690 # Also load the cargo.toml file which we need to write back
691 cargo_file = os.path.join(package_path, "Cargo.toml")
George Burgess IV3e344e42022-08-09 21:07:04 -0700692 with open(cargo_file, 'rb') as cfile:
693 cargo_contents = cfile.read()
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700694
695 shutil.rmtree(package_path)
696
697 # Make package and src dirs and replace lib.rs
698 os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
699 with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
700 librs.write(self.LIB_RS_BODY)
701
702 # Restore cargo.toml
George Burgess IV3e344e42022-08-09 21:07:04 -0700703 with open(cargo_file, 'wb') as cfile:
704 cfile.write(cargo_contents)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700705
706 # Restore checksum
707 with open(checksum_file, 'w') as csum:
708 json.dump(checksum_contents, csum)
709
710 def destroy_unused_crates(self):
George Burgess IV18af5632022-08-30 14:10:53 -0700711 metadata = load_metadata(self.working_dir, filter_platform=None)
George Burgess IV40cc91c2022-08-15 13:07:40 -0700712 used_packages = {p["name"]
George Burgess IV18af5632022-08-30 14:10:53 -0700713 for p in load_metadata(self.working_dir)["packages"]}
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700714
715 cleaned_packages = []
George Burgess IV40cc91c2022-08-15 13:07:40 -0700716 # Since we're asking for _all_ metadata packages, we may see
717 # duplication.
George Burgess IV18af5632022-08-30 14:10:53 -0700718 for package in metadata["packages"]:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700719 # Skip used packages
720 if package["name"] in used_packages:
721 continue
722
723 # Detect the correct package path to destroy
724 pkg_path = os.path.join(self.vendor_dir, "{}-{}".format(package["name"], package["version"]))
725 if not os.path.isdir(pkg_path):
George Burgess IV635f7262022-08-09 21:32:20 -0700726 print(f'Crate {package["name"]} not found at {pkg_path}')
727 continue
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700728
729 self._replace_source_contents(pkg_path)
730 self._modify_cargo_toml(pkg_path)
731 _rerun_checksums(pkg_path)
732 cleaned_packages.append(package["name"])
733
734 for pkg in cleaned_packages:
George Burgess IV635f7262022-08-09 21:32:20 -0700735 print("Removed unused crate", pkg)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700736
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700737def main(args):
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800738 current_path = pathlib.Path(__file__).parent.absolute()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000739 patches = os.path.join(current_path, "patches")
740 vendor = os.path.join(current_path, "vendor")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700741 crab_dir = os.path.join(current_path, "crab", "crates")
George Burgess IV04833702022-08-09 22:00:38 -0700742 license_shorthand_file = os.path.join(current_path, "licenses_used.txt")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800743
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700744 # First, actually run cargo vendor
745 run_cargo_vendor(current_path)
746
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000747 # Order matters here:
748 # - Apply patches (also re-calculates checksums)
749 # - Cleanup any owners files (otherwise, git check-in or checksums are
750 # unhappy)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700751 # - Destroy unused crates
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000752 apply_patches(patches, vendor)
753 cleanup_owners(vendor)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700754 destroyer = CrateDestroyer(current_path, vendor)
755 destroyer.destroy_unused_crates()
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800756
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700757 # Combine license file and check for any bad licenses
758 lm = LicenseManager(current_path, vendor)
George Burgess IV04833702022-08-09 22:00:38 -0700759 lm.generate_license(args.skip_license_check, args.license_map,
760 license_shorthand_file)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700761
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700762 # Run crab audit on all packages
763 crab = CrabManager(current_path, crab_dir)
764 crab.verify_traits()
765
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800766
767if __name__ == '__main__':
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700768 parser = argparse.ArgumentParser(description='Vendor packages properly')
769 parser.add_argument('--skip-license-check',
770 '-s',
771 help='Skip the license check on a specific package',
772 action='append')
773 parser.add_argument('--license-map', help='Write license map to this file')
774 args = parser.parse_args()
775
776 main(args)