blob: 66382e6d27846220ff64f9ba449ea357be5d8eb8 [file] [log] [blame]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2021 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6""" This script cleans up the vendor directory.
7"""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -07008import argparse
George Burgess IV635f7262022-08-09 21:32:20 -07009import collections
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000010import hashlib
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080011import json
12import os
13import pathlib
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070014import re
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -070015import shutil
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000016import subprocess
George Burgess IV04833702022-08-09 22:00:38 -070017import textwrap
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -070018import toml
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000019
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070020# We only care about crates we're actually going to use and that's usually
21# limited to ones with cfg(linux). For running `cargo metadata`, limit results
22# to only this platform
23DEFAULT_PLATFORM_FILTER = "x86_64-unknown-linux-gnu"
24
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000025
26def _rerun_checksums(package_path):
27 """Re-run checksums for given package.
28
29 Writes resulting checksums to $package_path/.cargo-checksum.json.
30 """
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070031 hashes = dict()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000032 checksum_path = os.path.join(package_path, '.cargo-checksum.json')
33 if not pathlib.Path(checksum_path).is_file():
34 return False
35
36 with open(checksum_path, 'r') as fread:
37 contents = json.load(fread)
38
39 for root, _, files in os.walk(package_path, topdown=True):
40 for f in files:
41 # Don't checksum an existing checksum file
42 if f == ".cargo-checksum.json":
43 continue
44
45 file_path = os.path.join(root, f)
46 with open(file_path, 'rb') as frb:
47 m = hashlib.sha256()
48 m.update(frb.read())
49 d = m.hexdigest()
50
51 # Key is relative to the package path so strip from beginning
52 key = os.path.relpath(file_path, package_path)
53 hashes[key] = d
54
55 if hashes:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070056 print("{} regenerated {} hashes".format(package_path,
57 len(hashes.keys())))
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000058 contents['files'] = hashes
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000059 with open(checksum_path, 'w') as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070060 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000061
62 return True
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080063
64
65def _remove_OWNERS_checksum(root):
66 """ Delete all OWNERS files from the checksum file.
67
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000068 Args:
69 root: Root directory for the vendored crate.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080070
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000071 Returns:
72 True if OWNERS was found and cleaned up. Otherwise False.
73 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080074 checksum_path = os.path.join(root, '.cargo-checksum.json')
75 if not pathlib.Path(checksum_path).is_file():
76 return False
77
78 with open(checksum_path, 'r') as fread:
79 contents = json.load(fread)
80
81 del_keys = []
82 for cfile in contents['files']:
83 if 'OWNERS' in cfile:
84 del_keys.append(cfile)
85
86 for key in del_keys:
87 del contents['files'][key]
88
89 if del_keys:
90 print('{} deleted: {}'.format(root, del_keys))
91 with open(checksum_path, 'w') as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070092 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080093
94 return bool(del_keys)
95
96
97def cleanup_owners(vendor_path):
98 """ Remove owners checksums from the vendor directory.
99
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000100 We currently do not check in the OWNERS files from vendored crates because
101 they interfere with the find-owners functionality in gerrit. This cleanup
102 simply finds all instances of "OWNERS" in the checksum files within and
103 removes them.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800104
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000105 Args:
106 vendor_path: Absolute path to vendor directory.
107 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800108 deps_cleaned = []
109 for root, dirs, _ in os.walk(vendor_path):
110 for d in dirs:
111 removed = _remove_OWNERS_checksum(os.path.join(root, d))
112 if removed:
113 deps_cleaned.append(d)
114
115 if deps_cleaned:
116 print('Cleanup owners:\n {}'.format("\n".join(deps_cleaned)))
117
118
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000119def apply_single_patch(patch, workdir):
120 """Apply a single patch and return whether it was successful.
121
122 Returns:
123 True if successful. False otherwise.
124 """
George Burgess IV635f7262022-08-09 21:32:20 -0700125 print(f"-- Applying {patch} to {workdir}")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000126 proc = subprocess.run(["patch", "-p1", "-i", patch], cwd=workdir)
127 return proc.returncode == 0
128
129
George Burgess IV635f7262022-08-09 21:32:20 -0700130def determine_vendor_crates(vendor_path):
131 """Returns a map of {crate_name: [directory]} at the given vendor_path."""
132 result = collections.defaultdict(list)
133 for crate_name_plus_ver in os.listdir(vendor_path):
134 name, _ = crate_name_plus_ver.rsplit('-', 1)
135 result[name].append(crate_name_plus_ver)
136
137 for crate_list in result.values():
138 crate_list.sort()
139 return result
140
141
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000142def apply_patches(patches_path, vendor_path):
143 """Finds patches and applies them to sub-folders in the vendored crates.
144
145 Args:
146 patches_path: Path to folder with patches. Expect all patches to be one
147 level down (matching the crate name).
148 vendor_path: Root path to vendored crates directory.
149 """
150 checksums_for = {}
151
152 # Don't bother running if patches directory is empty
153 if not pathlib.Path(patches_path).is_dir():
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700154 return
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000155
George Burgess IV635f7262022-08-09 21:32:20 -0700156 vendor_crate_map = determine_vendor_crates(vendor_path)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000157 # Look for all patches and apply them
158 for d in os.listdir(patches_path):
159 dir_path = os.path.join(patches_path, d)
160
161 # We don't process patches in root dir
162 if not os.path.isdir(dir_path):
163 continue
164
George Burgess IV635f7262022-08-09 21:32:20 -0700165 for patch in os.listdir(dir_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000166 file_path = os.path.join(dir_path, patch)
167
168 # Skip if not a patch file
169 if not os.path.isfile(file_path) or not patch.endswith(".patch"):
170 continue
171
George Burgess IV635f7262022-08-09 21:32:20 -0700172 # We accept one of two forms here:
173 # - direct targets (these name # `${crate_name}-${version}`)
174 # - simply the crate name (which applies to all versions of the
175 # crate)
176 direct_target = os.path.join(vendor_path, d)
177 if os.path.isdir(direct_target):
178 # If there are any patches, queue checksums for that folder.
179 checksums_for[d] = True
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000180
George Burgess IV635f7262022-08-09 21:32:20 -0700181 # Apply the patch. Exit from patch loop if patching failed.
182 if not apply_single_patch(file_path, direct_target):
183 print("Failed to apply patch: {}".format(patch))
184 break
185 elif d in vendor_crate_map:
186 for crate in vendor_crate_map[d]:
187 checksums_for[crate] = True
188 target = os.path.join(vendor_path, crate)
189 if not apply_single_patch(file_path, target):
190 print(f'Failed to apply patch {patch} to {target}')
191 break
192 else:
193 raise RuntimeError(f'Unknown crate in {vendor_path}: {d}')
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000194
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000195 # Re-run checksums for all modified packages since we applied patches.
196 for key in checksums_for.keys():
197 _rerun_checksums(os.path.join(vendor_path, key))
198
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700199
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700200def run_cargo_vendor(working_dir):
201 """Runs cargo vendor.
202
203 Args:
204 working_dir: Directory to run inside. This should be the directory where
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700205 Cargo.toml is kept.
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700206 """
George Burgess IV635f7262022-08-09 21:32:20 -0700207 # Cargo will refuse to revendor into versioned directories, which leads to
208 # repeated `./vendor.py` invocations trying to apply patches to
209 # already-patched sources. Remove the existing vendor directory to avoid
210 # this.
211 vendor_dir = working_dir / 'vendor'
212 if vendor_dir.exists():
213 shutil.rmtree(vendor_dir)
214 subprocess.check_call(
215 ['cargo', 'vendor', '--versioned-dirs', '-v'],
216 cwd=working_dir,
217 )
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000218
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700219
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700220def load_metadata(working_dir, filter_platform=DEFAULT_PLATFORM_FILTER):
221 """Load metadata for manifest at given directory.
222
223 Args:
224 working_dir: Directory to run from.
225 filter_platform: Filter packages to ones configured for this platform.
226 """
227 manifest_path = os.path.join(working_dir, 'Cargo.toml')
228 cmd = [
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700229 'cargo', 'metadata', '--format-version', '1', '--manifest-path',
230 manifest_path
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700231 ]
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700232
233 # Conditionally add platform filter
234 if filter_platform:
235 cmd.append("--filter-platform")
236 cmd.append(filter_platform)
237
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700238 output = subprocess.check_output(cmd, cwd=working_dir)
239
240 return json.loads(output)
241
242
243class LicenseManager:
244 """ Manage consolidating licenses for all packages."""
245
246 # These are all the licenses we support. Keys are what is seen in metadata and
247 # values are what is expected by the ebuild.
248 SUPPORTED_LICENSES = {
249 'Apache-2.0': 'Apache-2.0',
250 'MIT': 'MIT',
251 'BSD-3-Clause': 'BSD-3',
252 'ISC': 'ISC'
253 }
254
255 # Prefer to take attribution licenses in this order. All these require that
256 # we actually use the license file found in the package so they MUST have
257 # a license file set.
258 PREFERRED_ATTRIB_LICENSE_ORDER = ['MIT', 'BSD-3', 'ISC']
259
260 # If Apache license is found, always prefer it (simplifies attribution)
261 APACHE_LICENSE = 'Apache-2.0'
262
263 # Regex for license files found in the vendored directories. Search for
264 # these files with re.IGNORECASE.
265 #
266 # These will be searched in order with the earlier entries being preferred.
267 LICENSE_NAMES_REGEX = [
268 r'^license-mit$',
269 r'^copyright$',
270 r'^licen[cs]e.*$',
271 ]
272
273 # Some crates have their license file in other crates. This usually occurs
274 # because multiple crates are published from the same git repository and the
275 # license isn't updated in each sub-crate. In these cases, we can just
276 # ignore these packages.
277 MAP_LICENSE_TO_OTHER = {
278 'failure_derive': 'failure',
279 'grpcio-compiler': 'grpcio',
280 'grpcio-sys': 'grpcio',
281 'rustyline-derive': 'rustyline',
282 }
283
284 # Map a package to a specific license and license file. Only use this if
285 # a package doesn't have an easily discoverable license or exports its
286 # license in a weird way. Prefer to patch the project with a license and
287 # upstream the patch instead.
288 STATIC_LICENSE_MAP = {
289 # "package name": ( "license name", "license file relative location")
290 }
291
292 def __init__(self, working_dir, vendor_dir):
293 self.working_dir = working_dir
294 self.vendor_dir = vendor_dir
295
296 def _find_license_in_dir(self, search_dir):
297 for p in os.listdir(search_dir):
298 # Ignore anything that's not a file
299 if not os.path.isfile(os.path.join(search_dir, p)):
300 continue
301
302 # Now check if the name matches any of the regexes
303 # We'll return the first matching file.
304 for regex in self.LICENSE_NAMES_REGEX:
305 if re.search(regex, p, re.IGNORECASE):
306 yield os.path.join(search_dir, p)
307 break
308
309 def _guess_license_type(self, license_file):
310 if '-MIT' in license_file:
311 return 'MIT'
312 elif '-APACHE' in license_file:
313 return 'APACHE'
314 elif '-BSD' in license_file:
315 return 'BSD-3'
316
317 with open(license_file, 'r') as f:
318 lines = f.read()
319 if 'MIT' in lines:
320 return 'MIT'
321 elif 'Apache' in lines:
322 return 'APACHE'
323 elif 'BSD 3-Clause' in lines:
324 return 'BSD-3'
325
326 return ''
327
George Burgess IV04833702022-08-09 22:00:38 -0700328 def generate_license(self, skip_license_check, print_map_to_file,
329 license_shorthand_file):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700330 """Generate single massive license file from metadata."""
331 metadata = load_metadata(self.working_dir)
332
333 has_license_types = set()
334 bad_licenses = {}
335
336 # Keep license map ordered so it generates a consistent license map
337 license_map = {}
338
339 skip_license_check = skip_license_check or []
340
341 for package in metadata['packages']:
342 pkg_name = package['name']
343
344 # Skip vendor libs directly
345 if pkg_name == "vendor_libs":
346 continue
347
348 if pkg_name in skip_license_check:
349 print(
350 "Skipped license check on {}. Reason: Skipped from command line"
351 .format(pkg_name))
352 continue
353
354 if pkg_name in self.MAP_LICENSE_TO_OTHER:
355 print(
356 'Skipped license check on {}. Reason: License already in {}'
357 .format(pkg_name, self.MAP_LICENSE_TO_OTHER[pkg_name]))
358 continue
359
360 # Check if we have a static license map for this package. Use the
361 # static values if we have it already set.
362 if pkg_name in self.STATIC_LICENSE_MAP:
363 (license, license_file) = self.STATIC_LICENSE_MAP[pkg_name]
364 license_map[pkg_name] = {
365 "license": license,
366 "license_file": license_file,
367 }
368 continue
369
370 license_files = []
George Burgess IV93ba4732022-08-13 14:10:10 -0700371 # use `or ''` instead of get's default, since `package` may have a
372 # None value for 'license'.
373 license = package.get('license') or ''
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700374
375 # We ignore the metadata for license file because most crates don't
376 # have it set. Just scan the source for licenses.
George Burgess IV635f7262022-08-09 21:32:20 -0700377 pkg_version = package['version']
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700378 license_files = [
379 x for x in self._find_license_in_dir(
George Burgess IV635f7262022-08-09 21:32:20 -0700380 os.path.join(self.vendor_dir, f'{pkg_name}-{pkg_version}'))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700381 ]
382
383 # If there are multiple licenses, they are delimited with "OR" or "/"
384 delim = ' OR ' if ' OR ' in license else '/'
385 found = license.split(delim)
386
387 # Filter licenses to ones we support
388 licenses_or = [
389 self.SUPPORTED_LICENSES[f] for f in found
390 if f in self.SUPPORTED_LICENSES
391 ]
392
393 # If apache license is found, always prefer it because it simplifies
394 # license attribution (we can use existing Apache notice)
395 if self.APACHE_LICENSE in licenses_or:
396 has_license_types.add(self.APACHE_LICENSE)
397 license_map[pkg_name] = {'license': self.APACHE_LICENSE}
398
399 # Handle single license that has at least one license file
400 # We pick the first license file and the license
401 elif len(licenses_or) == 1:
402 if license_files:
403 l = licenses_or[0]
404 lf = license_files[0]
405
406 has_license_types.add(l)
407 license_map[pkg_name] = {
408 'license': l,
409 'license_file': os.path.relpath(lf, self.working_dir),
410 }
411 else:
412 bad_licenses[pkg_name] = "{} missing license file".format(
413 licenses_or[0])
414 # Handle multiple licenses
415 elif len(licenses_or) > 1:
416 # Check preferred licenses in order
417 license_found = False
418 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
419 if not l in licenses_or:
420 continue
421
422 for f in license_files:
423 if self._guess_license_type(f) == l:
424 license_found = True
425 has_license_types.add(l)
426 license_map[pkg_name] = {
427 'license':
428 l,
429 'license_file':
430 os.path.relpath(f, self.working_dir),
431 }
432 break
433
434 # Break out of loop if license is found
435 if license_found:
436 break
437 else:
438 bad_licenses[pkg_name] = license
439
440 # If we had any bad licenses, we need to abort
441 if bad_licenses:
442 for k in bad_licenses.keys():
443 print("{} had no acceptable licenses: {}".format(
444 k, bad_licenses[k]))
445 raise Exception("Bad licenses in vendored packages.")
446
447 # Write license map to file
448 if print_map_to_file:
449 with open(os.path.join(self.working_dir, print_map_to_file),
450 'w') as lfile:
451 json.dump(license_map, lfile, sort_keys=True)
452
453 # Raise missing licenses unless we have a valid reason to ignore them
454 raise_missing_license = False
455 for name, v in license_map.items():
456 if 'license_file' not in v and v.get('license',
457 '') != self.APACHE_LICENSE:
458 raise_missing_license = True
459 print(" {}: Missing license file. Fix or add to ignorelist.".
460 format(name))
461
462 if raise_missing_license:
463 raise Exception(
464 "Unhandled missing license file. "
465 "Make sure all are accounted for before continuing.")
466
George Burgess IV04833702022-08-09 22:00:38 -0700467 sorted_licenses = sorted(has_license_types)
468 print("Add the following licenses to the ebuild:\n",
469 sorted_licenses)
470 header = textwrap.dedent("""\
471 # File to describe the licenses used by this registry.
472 # Used to it's easy to automatically verify ebuilds are updated.
473 # Each line is a license. Lines starting with # are comments.
474 """)
475 with open(license_shorthand_file, 'w', encoding='utf-8') as f:
476 f.write(header)
477 f.write('\n'.join(sorted_licenses))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700478
479
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700480# TODO(abps) - This needs to be replaced with datalog later. We should compile
481# all crab files into datalog and query it with our requirements
482# instead.
483class CrabManager:
484 """Manage audit files."""
485 def __init__(self, working_dir, crab_dir):
486 self.working_dir = working_dir
487 self.crab_dir = crab_dir
488
489 def _check_bad_traits(self, crabdata):
490 """Checks that a package's crab audit meets our requirements.
491
492 Args:
493 crabdata: Dict with crab keys in standard templated format.
494 """
495 common = crabdata['common']
496 # TODO(b/200578411) - Figure out what conditions we should enforce as
497 # part of the audit.
498 conditions = [
499 common.get('deny', None),
500 ]
501
502 # If any conditions are true, this crate is not acceptable.
503 return any(conditions)
504
505 def verify_traits(self):
506 """ Verify that all required CRAB traits for this repository are met.
507 """
508 metadata = load_metadata(self.working_dir)
509
510 failing_crates = {}
511
512 # Verify all packages have a CRAB file associated with it and they meet
513 # all our required traits
514 for package in metadata['packages']:
515 # Skip vendor_libs
516 if package['name'] == 'vendor_libs':
517 continue
518
519 crabname = "{}-{}".format(package['name'], package['version'])
520 filename = os.path.join(self.crab_dir, "{}.toml".format(crabname))
521
522 # If crab file doesn't exist, the crate fails
523 if not os.path.isfile(filename):
524 failing_crates[crabname] = "No crab file".format(filename)
525 continue
526
527 with open(filename, 'r') as f:
528 crabdata = toml.loads(f.read())
529
530 # If crab file's crate_name and version keys don't match this
531 # package, it also fails. This is just housekeeping...
532 if package['name'] != crabdata['crate_name'] or package[
533 'version'] != crabdata['version']:
534 failing_crates[crabname] = "Crate name or version don't match"
535 continue
536
537 if self._check_bad_traits(crabdata):
538 failing_crates[crabname] = "Failed bad traits check"
539
540 # If we had any failing crates, list them now
541 if failing_crates:
542 print('Failed CRAB audit:')
543 for k, v in failing_crates.items():
544 print(' {}: {}'.format(k, v))
545
546
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700547class CrateDestroyer():
548 LIB_RS_BODY = """compile_error!("This crate cannot be built for this configuration.");\n"""
549
550 def __init__(self, working_dir, vendor_dir):
551 self.working_dir = working_dir
552 self.vendor_dir = vendor_dir
553
554 def _modify_cargo_toml(self, pkg_path):
555 with open(os.path.join(pkg_path, "Cargo.toml"), "r") as cargo:
556 contents = toml.load(cargo)
557
558 # Change description, license and delete license key
559 contents["package"]["description"] = "Empty crate that should not build."
560 contents["package"]["license"] = "Apache-2.0"
561 if contents["package"].get("license_file"):
562 del contents["package"]["license_file"]
563
564 with open(os.path.join(pkg_path, "Cargo.toml"), "w") as cargo:
565 toml.dump(contents, cargo)
566
567 def _replace_source_contents(self, package_path):
568 # First load the checksum file before starting
569 checksum_file = os.path.join(package_path, ".cargo-checksum.json")
570 with open(checksum_file, 'r') as csum:
571 checksum_contents = json.load(csum)
572
573 # Also load the cargo.toml file which we need to write back
574 cargo_file = os.path.join(package_path, "Cargo.toml")
George Burgess IV3e344e42022-08-09 21:07:04 -0700575 with open(cargo_file, 'rb') as cfile:
576 cargo_contents = cfile.read()
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700577
578 shutil.rmtree(package_path)
579
580 # Make package and src dirs and replace lib.rs
581 os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
582 with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
583 librs.write(self.LIB_RS_BODY)
584
585 # Restore cargo.toml
George Burgess IV3e344e42022-08-09 21:07:04 -0700586 with open(cargo_file, 'wb') as cfile:
587 cfile.write(cargo_contents)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700588
589 # Restore checksum
590 with open(checksum_file, 'w') as csum:
591 json.dump(checksum_contents, csum)
592
593 def destroy_unused_crates(self):
594 all_packages = load_metadata(self.working_dir, filter_platform=None)
595 used_packages = set([p["name"] for p in load_metadata(self.working_dir)["packages"]])
596
597 cleaned_packages = []
598 for package in all_packages["packages"]:
599
600 # Skip used packages
601 if package["name"] in used_packages:
602 continue
603
604 # Detect the correct package path to destroy
605 pkg_path = os.path.join(self.vendor_dir, "{}-{}".format(package["name"], package["version"]))
606 if not os.path.isdir(pkg_path):
George Burgess IV635f7262022-08-09 21:32:20 -0700607 print(f'Crate {package["name"]} not found at {pkg_path}')
608 continue
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700609
610 self._replace_source_contents(pkg_path)
611 self._modify_cargo_toml(pkg_path)
612 _rerun_checksums(pkg_path)
613 cleaned_packages.append(package["name"])
614
615 for pkg in cleaned_packages:
George Burgess IV635f7262022-08-09 21:32:20 -0700616 print("Removed unused crate", pkg)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700617
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700618def main(args):
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800619 current_path = pathlib.Path(__file__).parent.absolute()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000620 patches = os.path.join(current_path, "patches")
621 vendor = os.path.join(current_path, "vendor")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700622 crab_dir = os.path.join(current_path, "crab", "crates")
George Burgess IV04833702022-08-09 22:00:38 -0700623 license_shorthand_file = os.path.join(current_path, "licenses_used.txt")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800624
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700625 # First, actually run cargo vendor
626 run_cargo_vendor(current_path)
627
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000628 # Order matters here:
629 # - Apply patches (also re-calculates checksums)
630 # - Cleanup any owners files (otherwise, git check-in or checksums are
631 # unhappy)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700632 # - Destroy unused crates
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000633 apply_patches(patches, vendor)
634 cleanup_owners(vendor)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700635 destroyer = CrateDestroyer(current_path, vendor)
636 destroyer.destroy_unused_crates()
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800637
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700638 # Combine license file and check for any bad licenses
639 lm = LicenseManager(current_path, vendor)
George Burgess IV04833702022-08-09 22:00:38 -0700640 lm.generate_license(args.skip_license_check, args.license_map,
641 license_shorthand_file)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700642
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700643 # Run crab audit on all packages
644 crab = CrabManager(current_path, crab_dir)
645 crab.verify_traits()
646
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800647
648if __name__ == '__main__':
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700649 parser = argparse.ArgumentParser(description='Vendor packages properly')
650 parser.add_argument('--skip-license-check',
651 '-s',
652 help='Skip the license check on a specific package',
653 action='append')
654 parser.add_argument('--license-map', help='Write license map to this file')
655 args = parser.parse_args()
656
657 main(args)