blob: 5e5d4116b1b46794981dae6f097dc12fc43e4da3 [file] [log] [blame]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
George Burgess IV9e0cfde2022-09-27 15:08:15 -07003# Copyright 2021 The ChromiumOS Authors
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6""" This script cleans up the vendor directory.
7"""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -07008import argparse
George Burgess IV635f7262022-08-09 21:32:20 -07009import collections
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000010import hashlib
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080011import json
12import os
13import pathlib
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070014import re
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -070015import shutil
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000016import subprocess
George Burgess IV04833702022-08-09 22:00:38 -070017import textwrap
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -070018import toml
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000019
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070020# We only care about crates we're actually going to use and that's usually
21# limited to ones with cfg(linux). For running `cargo metadata`, limit results
22# to only this platform
23DEFAULT_PLATFORM_FILTER = "x86_64-unknown-linux-gnu"
24
George Burgess IV8e2cc042022-10-18 14:50:48 -060025# A series of crates which are to be made empty by having no (non-comment)
26# contents in their `lib.rs`, rather than by inserting a compilation error.
27NOP_EMPTY_CRATES = frozenset({"windows"})
28
29EMPTY_CRATE_BODY = """\
30compile_error!("This crate cannot be built for this configuration.");
31"""
32NOP_EMPTY_CRATE_BODY = "// " + EMPTY_CRATE_BODY
33
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000034
35def _rerun_checksums(package_path):
36 """Re-run checksums for given package.
37
38 Writes resulting checksums to $package_path/.cargo-checksum.json.
39 """
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070040 hashes = dict()
George Burgess IV7dffc252022-08-31 14:37:01 -070041 checksum_path = os.path.join(package_path, ".cargo-checksum.json")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000042 if not pathlib.Path(checksum_path).is_file():
43 return False
44
George Burgess IV7dffc252022-08-31 14:37:01 -070045 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000046 contents = json.load(fread)
47
48 for root, _, files in os.walk(package_path, topdown=True):
49 for f in files:
50 # Don't checksum an existing checksum file
51 if f == ".cargo-checksum.json":
52 continue
53
54 file_path = os.path.join(root, f)
George Burgess IV7dffc252022-08-31 14:37:01 -070055 with open(file_path, "rb") as frb:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000056 m = hashlib.sha256()
57 m.update(frb.read())
58 d = m.hexdigest()
59
60 # Key is relative to the package path so strip from beginning
61 key = os.path.relpath(file_path, package_path)
62 hashes[key] = d
63
64 if hashes:
George Burgess IV7dffc252022-08-31 14:37:01 -070065 print(
66 "{} regenerated {} hashes".format(package_path, len(hashes.keys()))
67 )
68 contents["files"] = hashes
69 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070070 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000071
72 return True
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080073
74
75def _remove_OWNERS_checksum(root):
George Burgess IV7dffc252022-08-31 14:37:01 -070076 """Delete all OWNERS files from the checksum file.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080077
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000078 Args:
79 root: Root directory for the vendored crate.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080080
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000081 Returns:
82 True if OWNERS was found and cleaned up. Otherwise False.
83 """
George Burgess IV7dffc252022-08-31 14:37:01 -070084 checksum_path = os.path.join(root, ".cargo-checksum.json")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080085 if not pathlib.Path(checksum_path).is_file():
86 return False
87
George Burgess IV7dffc252022-08-31 14:37:01 -070088 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080089 contents = json.load(fread)
90
91 del_keys = []
George Burgess IV7dffc252022-08-31 14:37:01 -070092 for cfile in contents["files"]:
93 if "OWNERS" in cfile:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080094 del_keys.append(cfile)
95
96 for key in del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -070097 del contents["files"][key]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080098
99 if del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -0700100 print("{} deleted: {}".format(root, del_keys))
101 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700102 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800103
104 return bool(del_keys)
105
106
107def cleanup_owners(vendor_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700108 """Remove owners checksums from the vendor directory.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800109
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000110 We currently do not check in the OWNERS files from vendored crates because
111 they interfere with the find-owners functionality in gerrit. This cleanup
112 simply finds all instances of "OWNERS" in the checksum files within and
113 removes them.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800114
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000115 Args:
116 vendor_path: Absolute path to vendor directory.
117 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800118 deps_cleaned = []
119 for root, dirs, _ in os.walk(vendor_path):
120 for d in dirs:
121 removed = _remove_OWNERS_checksum(os.path.join(root, d))
122 if removed:
123 deps_cleaned.append(d)
124
125 if deps_cleaned:
George Burgess IV7dffc252022-08-31 14:37:01 -0700126 print("Cleanup owners:\n {}".format("\n".join(deps_cleaned)))
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800127
128
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000129def apply_single_patch(patch, workdir):
130 """Apply a single patch and return whether it was successful.
131
132 Returns:
133 True if successful. False otherwise.
134 """
George Burgess IV08664ba2022-10-03 11:09:33 -0700135 proc = subprocess.run(
136 [
137 "patch",
138 "-p1",
139 "--no-backup-if-mismatch",
140 "-i",
141 patch,
142 ],
143 cwd=workdir,
144 )
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000145 return proc.returncode == 0
146
147
George Burgess IV30c5c362022-08-19 17:05:02 -0700148def apply_patch_script(script, workdir):
149 """Run the given patch script, returning whether it exited cleanly.
150
151 Returns:
152 True if successful. False otherwise.
153 """
154 return subprocess.run([script], cwd=workdir).returncode == 0
155
156
George Burgess IV635f7262022-08-09 21:32:20 -0700157def determine_vendor_crates(vendor_path):
158 """Returns a map of {crate_name: [directory]} at the given vendor_path."""
159 result = collections.defaultdict(list)
George Burgess IV76b60d02022-10-26 17:44:48 -0600160 crate_version_re = re.compile(r"-\d+\.\d+\.\d+(:?[+-]|$)")
George Burgess IV635f7262022-08-09 21:32:20 -0700161 for crate_name_plus_ver in os.listdir(vendor_path):
George Burgess IV76b60d02022-10-26 17:44:48 -0600162 version = crate_version_re.search(crate_name_plus_ver)
163 assert version, crate_name_plus_ver
164 name = crate_name_plus_ver[: version.start()]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700165 result[name].append(crate_name_plus_ver)
George Burgess IV635f7262022-08-09 21:32:20 -0700166
167 for crate_list in result.values():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700168 crate_list.sort()
George Burgess IV635f7262022-08-09 21:32:20 -0700169 return result
170
171
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000172def apply_patches(patches_path, vendor_path):
173 """Finds patches and applies them to sub-folders in the vendored crates.
174
175 Args:
176 patches_path: Path to folder with patches. Expect all patches to be one
177 level down (matching the crate name).
178 vendor_path: Root path to vendored crates directory.
179 """
180 checksums_for = {}
181
182 # Don't bother running if patches directory is empty
183 if not pathlib.Path(patches_path).is_dir():
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700184 return
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000185
George Burgess IV30c5c362022-08-19 17:05:02 -0700186 patches_failed = False
George Burgess IV635f7262022-08-09 21:32:20 -0700187 vendor_crate_map = determine_vendor_crates(vendor_path)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000188 # Look for all patches and apply them
189 for d in os.listdir(patches_path):
190 dir_path = os.path.join(patches_path, d)
191
192 # We don't process patches in root dir
193 if not os.path.isdir(dir_path):
194 continue
195
George Burgess IV30c5c362022-08-19 17:05:02 -0700196 # We accept one of two forms here:
197 # - direct targets (these name # `${crate_name}-${version}`)
198 # - simply the crate name (which applies to all versions of the
199 # crate)
200 direct_target = os.path.join(vendor_path, d)
201 if os.path.isdir(direct_target):
202 patch_targets = [d]
203 elif d in vendor_crate_map:
204 patch_targets = vendor_crate_map[d]
205 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700206 raise RuntimeError(f"Unknown crate in {vendor_path}: {d}")
George Burgess IV30c5c362022-08-19 17:05:02 -0700207
George Burgess IV635f7262022-08-09 21:32:20 -0700208 for patch in os.listdir(dir_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000209 file_path = os.path.join(dir_path, patch)
210
211 # Skip if not a patch file
George Burgess IV30c5c362022-08-19 17:05:02 -0700212 if not os.path.isfile(file_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000213 continue
214
George Burgess IV30c5c362022-08-19 17:05:02 -0700215 if patch.endswith(".patch"):
216 apply = apply_single_patch
217 elif os.access(file_path, os.X_OK):
218 apply = apply_patch_script
George Burgess IV635f7262022-08-09 21:32:20 -0700219 else:
George Burgess IV30c5c362022-08-19 17:05:02 -0700220 # Unrecognized. Skip it.
221 continue
222
223 for target_name in patch_targets:
224 checksums_for[target_name] = True
225 target = os.path.join(vendor_path, target_name)
226 print(f"-- Applying {file_path} to {target}")
227 if not apply(file_path, target):
228 print(f"Failed to apply {file_path} to {target}")
229 patches_failed = True
230
231 # Do this late, so we can report all of the failing patches in one
232 # invocation.
233 if patches_failed:
George Burgess IV7dffc252022-08-31 14:37:01 -0700234 raise ValueError("Patches failed; please see above logs")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000235
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000236 # Re-run checksums for all modified packages since we applied patches.
237 for key in checksums_for.keys():
238 _rerun_checksums(os.path.join(vendor_path, key))
239
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700240
George Burgess IV18af5632022-08-30 14:10:53 -0700241def get_workspace_cargo_toml(working_dir):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700242 """Returns all Cargo.toml files under working_dir."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700243 return [working_dir / "projects" / "Cargo.toml"]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700244
245
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700246def run_cargo_vendor(working_dir):
247 """Runs cargo vendor.
248
249 Args:
250 working_dir: Directory to run inside. This should be the directory where
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700251 Cargo.toml is kept.
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700252 """
George Burgess IV635f7262022-08-09 21:32:20 -0700253 # Cargo will refuse to revendor into versioned directories, which leads to
254 # repeated `./vendor.py` invocations trying to apply patches to
255 # already-patched sources. Remove the existing vendor directory to avoid
256 # this.
George Burgess IV7dffc252022-08-31 14:37:01 -0700257 vendor_dir = working_dir / "vendor"
George Burgess IV635f7262022-08-09 21:32:20 -0700258 if vendor_dir.exists():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700259 shutil.rmtree(vendor_dir)
260
George Burgess IV18af5632022-08-30 14:10:53 -0700261 cargo_cmdline = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700262 "cargo",
263 "vendor",
264 "--versioned-dirs",
265 "-v",
266 "--manifest-path=projects/Cargo.toml",
267 "--",
268 "vendor",
George Burgess IV18af5632022-08-30 14:10:53 -0700269 ]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700270 subprocess.check_call(cargo_cmdline, cwd=working_dir)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000271
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700272
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700273def load_metadata(working_dir, filter_platform=DEFAULT_PLATFORM_FILTER):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700274 """Load metadata for all projects under a given directory.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700275
276 Args:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700277 working_dir: Base directory to run from.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700278 filter_platform: Filter packages to ones configured for this platform.
279 """
George Burgess IV40cc91c2022-08-15 13:07:40 -0700280 metadata_objects = []
George Burgess IV18af5632022-08-30 14:10:53 -0700281 cmd = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700282 "cargo",
283 "metadata",
284 "--format-version=1",
285 "--manifest-path=projects/Cargo.toml",
George Burgess IV18af5632022-08-30 14:10:53 -0700286 ]
287 # Conditionally add platform filter
288 if filter_platform:
289 cmd += ("--filter-platform", filter_platform)
290 output = subprocess.check_output(cmd, cwd=working_dir)
291 return json.loads(output)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700292
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700293
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700294class LicenseManager:
George Burgess IV7dffc252022-08-31 14:37:01 -0700295 """Manage consolidating licenses for all packages."""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700296
George Burgess IV124e6a12022-09-09 10:44:29 -0700297 # These are all the licenses we support. Keys are what is seen in metadata
298 # and values are what is expected by ebuilds.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700299 SUPPORTED_LICENSES = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700300 "0BSD": "0BSD",
301 "Apache-2.0": "Apache-2.0",
George Burgess IVb16816a2022-10-26 17:55:48 -0600302 "BSD-2-Clause": "BSD-2",
George Burgess IV7dffc252022-08-31 14:37:01 -0700303 "BSD-3-Clause": "BSD-3",
304 "ISC": "ISC",
305 "MIT": "MIT",
306 "MPL-2.0": "MPL-2.0",
307 "unicode": "unicode",
Dan Callaghan91f80542022-09-09 10:57:23 +1000308 "Zlib": "ZLIB",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700309 }
310
311 # Prefer to take attribution licenses in this order. All these require that
312 # we actually use the license file found in the package so they MUST have
313 # a license file set.
George Burgess IV7dffc252022-08-31 14:37:01 -0700314 PREFERRED_ATTRIB_LICENSE_ORDER = ["MIT", "BSD-3", "ISC"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700315
316 # If Apache license is found, always prefer it (simplifies attribution)
George Burgess IV7dffc252022-08-31 14:37:01 -0700317 APACHE_LICENSE = "Apache-2.0"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700318
319 # Regex for license files found in the vendored directories. Search for
320 # these files with re.IGNORECASE.
321 #
322 # These will be searched in order with the earlier entries being preferred.
323 LICENSE_NAMES_REGEX = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700324 r"^license-mit$",
325 r"^copyright$",
326 r"^licen[cs]e.*$",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700327 ]
328
329 # Some crates have their license file in other crates. This usually occurs
330 # because multiple crates are published from the same git repository and the
331 # license isn't updated in each sub-crate. In these cases, we can just
332 # ignore these packages.
333 MAP_LICENSE_TO_OTHER = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700334 "failure_derive": "failure",
335 "grpcio-compiler": "grpcio",
336 "grpcio-sys": "grpcio",
337 "rustyline-derive": "rustyline",
Nicholas Bishop7d4433a2022-10-07 12:38:26 -0400338 "uefi-macros": "uefi",
339 "uefi-services": "uefi",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700340 }
341
342 # Map a package to a specific license and license file. Only use this if
343 # a package doesn't have an easily discoverable license or exports its
344 # license in a weird way. Prefer to patch the project with a license and
345 # upstream the patch instead.
346 STATIC_LICENSE_MAP = {
George Burgess IVb16816a2022-10-26 17:55:48 -0600347 # "package name": ("license name", "license file relative location")
George Burgess IV26642872022-10-18 19:46:58 -0600348 # Patch for adding these are upstream, but the patch application
349 # doesn't apply to `cargo metadata`. This is presumably because it
350 # can't detect our vendor directory.
George Burgess IVf4a5e362022-08-30 14:30:36 -0700351 # https://gitlab.freedesktop.org/slirp/libslirp-sys/-/merge_requests/6
George Burgess IV7dffc252022-08-31 14:37:01 -0700352 "libslirp-sys": ("MIT", "LICENSE"),
George Burgess IV26642872022-10-18 19:46:58 -0600353 # https://gitlab.freedesktop.org/anholt/deqp-runner/-/merge_requests/48
354 "deqp-runner": ("MIT", "LICENSE"),
Dan Callaghan91f80542022-09-09 10:57:23 +1000355 # Upstream prefers to embed license text inside README.md:
356 "riscv": ("ISC", "README.md"),
357 "riscv-rt": ("ISC", "README.md"),
George Burgess IVb16816a2022-10-26 17:55:48 -0600358 "zerocopy": ("BSD-2", "LICENSE"),
359 "zerocopy-derive": ("BSD-2", "LICENSE"),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700360 }
361
362 def __init__(self, working_dir, vendor_dir):
363 self.working_dir = working_dir
364 self.vendor_dir = vendor_dir
365
366 def _find_license_in_dir(self, search_dir):
367 for p in os.listdir(search_dir):
368 # Ignore anything that's not a file
369 if not os.path.isfile(os.path.join(search_dir, p)):
370 continue
371
372 # Now check if the name matches any of the regexes
373 # We'll return the first matching file.
374 for regex in self.LICENSE_NAMES_REGEX:
375 if re.search(regex, p, re.IGNORECASE):
376 yield os.path.join(search_dir, p)
377 break
378
379 def _guess_license_type(self, license_file):
George Burgess IV7dffc252022-08-31 14:37:01 -0700380 if "-MIT" in license_file:
381 return "MIT"
382 elif "-APACHE" in license_file:
383 return "APACHE"
384 elif "-BSD" in license_file:
385 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700386
George Burgess IV7dffc252022-08-31 14:37:01 -0700387 with open(license_file, "r") as f:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700388 lines = f.read()
George Burgess IV7dffc252022-08-31 14:37:01 -0700389 if "MIT" in lines:
390 return "MIT"
391 elif "Apache" in lines:
392 return "APACHE"
393 elif "BSD 3-Clause" in lines:
394 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700395
George Burgess IV7dffc252022-08-31 14:37:01 -0700396 return ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700397
George Burgess IV7dffc252022-08-31 14:37:01 -0700398 def generate_license(
399 self, skip_license_check, print_map_to_file, license_shorthand_file
400 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700401 """Generate single massive license file from metadata."""
George Burgess IV18af5632022-08-30 14:10:53 -0700402 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700403
George Burgess IVb16816a2022-10-26 17:55:48 -0600404 special_unicode_license = "(MIT OR Apache-2.0) AND Unicode-DFS-2016"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700405 bad_licenses = {}
406
407 # Keep license map ordered so it generates a consistent license map
408 license_map = {}
409
410 skip_license_check = skip_license_check or []
George Burgess IV4ae42062022-08-15 18:54:51 -0700411 has_unicode_license = False
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700412
George Burgess IV18af5632022-08-30 14:10:53 -0700413 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700414 # Skip the synthesized Cargo.toml packages that exist solely to
415 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700416 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700417 continue
418
George Burgess IV7dffc252022-08-31 14:37:01 -0700419 pkg_name = package["name"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700420 if pkg_name in skip_license_check:
421 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700422 "Skipped license check on {}. Reason: Skipped from command line".format(
423 pkg_name
424 )
425 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700426 continue
427
428 if pkg_name in self.MAP_LICENSE_TO_OTHER:
429 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700430 "Skipped license check on {}. Reason: License already in {}".format(
431 pkg_name, self.MAP_LICENSE_TO_OTHER[pkg_name]
432 )
433 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700434 continue
435
436 # Check if we have a static license map for this package. Use the
437 # static values if we have it already set.
438 if pkg_name in self.STATIC_LICENSE_MAP:
George Burgess IVb16816a2022-10-26 17:55:48 -0600439 license, license_file = self.STATIC_LICENSE_MAP[pkg_name]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700440 license_map[pkg_name] = {
441 "license": license,
442 "license_file": license_file,
443 }
444 continue
445
446 license_files = []
George Burgess IV93ba4732022-08-13 14:10:10 -0700447 # use `or ''` instead of get's default, since `package` may have a
448 # None value for 'license'.
George Burgess IV7dffc252022-08-31 14:37:01 -0700449 license = package.get("license") or ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700450
451 # We ignore the metadata for license file because most crates don't
452 # have it set. Just scan the source for licenses.
George Burgess IV7dffc252022-08-31 14:37:01 -0700453 pkg_version = package["version"]
454 license_files = list(
455 self._find_license_in_dir(
456 os.path.join(self.vendor_dir, f"{pkg_name}-{pkg_version}")
457 )
458 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700459
George Burgess IV4ae42062022-08-15 18:54:51 -0700460 # FIXME(b/240953811): The code later in this loop is only
461 # structured to handle ORs, not ANDs. Fortunately, this license in
462 # particular is `AND`ed between a super common license (Apache) and
463 # a more obscure one (unicode). This hack is specifically intended
464 # for the `unicode-ident` crate, though no crate name check is
465 # made, since it's OK other crates happen to have this license.
George Burgess IVb16816a2022-10-26 17:55:48 -0600466 if license == special_unicode_license:
George Burgess IV4ae42062022-08-15 18:54:51 -0700467 has_unicode_license = True
468 # We'll check later to be sure MIT or Apache-2.0 is represented
469 # properly.
470 for x in license_files:
George Burgess IV7dffc252022-08-31 14:37:01 -0700471 if os.path.basename(x) == "LICENSE-UNICODE":
George Burgess IV4ae42062022-08-15 18:54:51 -0700472 license_file = x
473 break
474 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700475 raise ValueError(
476 "No LICENSE-UNICODE found in " f"{license_files}"
477 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700478 license_map[pkg_name] = {
479 "license": license,
480 "license_file": license_file,
481 }
George Burgess IV4ae42062022-08-15 18:54:51 -0700482 continue
483
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700484 # If there are multiple licenses, they are delimited with "OR" or "/"
George Burgess IV7dffc252022-08-31 14:37:01 -0700485 delim = " OR " if " OR " in license else "/"
George Burgess IV40cc91c2022-08-15 13:07:40 -0700486 found = [x.strip() for x in license.split(delim)]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700487
488 # Filter licenses to ones we support
489 licenses_or = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700490 self.SUPPORTED_LICENSES[f]
491 for f in found
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700492 if f in self.SUPPORTED_LICENSES
493 ]
494
495 # If apache license is found, always prefer it because it simplifies
496 # license attribution (we can use existing Apache notice)
497 if self.APACHE_LICENSE in licenses_or:
George Burgess IV7dffc252022-08-31 14:37:01 -0700498 license_map[pkg_name] = {"license": self.APACHE_LICENSE}
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700499
500 # Handle single license that has at least one license file
501 # We pick the first license file and the license
502 elif len(licenses_or) == 1:
503 if license_files:
504 l = licenses_or[0]
505 lf = license_files[0]
506
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700507 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700508 "license": l,
509 "license_file": os.path.relpath(lf, self.working_dir),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700510 }
511 else:
512 bad_licenses[pkg_name] = "{} missing license file".format(
George Burgess IV7dffc252022-08-31 14:37:01 -0700513 licenses_or[0]
514 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700515 # Handle multiple licenses
516 elif len(licenses_or) > 1:
517 # Check preferred licenses in order
518 license_found = False
519 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
520 if not l in licenses_or:
521 continue
522
523 for f in license_files:
524 if self._guess_license_type(f) == l:
525 license_found = True
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700526 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700527 "license": l,
528 "license_file": os.path.relpath(
529 f, self.working_dir
530 ),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700531 }
532 break
533
534 # Break out of loop if license is found
535 if license_found:
536 break
537 else:
538 bad_licenses[pkg_name] = license
539
540 # If we had any bad licenses, we need to abort
541 if bad_licenses:
542 for k in bad_licenses.keys():
George Burgess IV7dffc252022-08-31 14:37:01 -0700543 print(
544 "{} had no acceptable licenses: {}".format(
545 k, bad_licenses[k]
546 )
547 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700548 raise Exception("Bad licenses in vendored packages.")
549
550 # Write license map to file
551 if print_map_to_file:
George Burgess IV7dffc252022-08-31 14:37:01 -0700552 with open(
553 os.path.join(self.working_dir, print_map_to_file), "w"
554 ) as lfile:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700555 json.dump(license_map, lfile, sort_keys=True)
556
557 # Raise missing licenses unless we have a valid reason to ignore them
558 raise_missing_license = False
559 for name, v in license_map.items():
George Burgess IV7dffc252022-08-31 14:37:01 -0700560 if (
561 "license_file" not in v
562 and v.get("license", "") != self.APACHE_LICENSE
563 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700564 raise_missing_license = True
George Burgess IV7dffc252022-08-31 14:37:01 -0700565 print(
566 " {}: Missing license file. Fix or add to ignorelist.".format(
567 name
568 )
569 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700570
571 if raise_missing_license:
572 raise Exception(
573 "Unhandled missing license file. "
George Burgess IV7dffc252022-08-31 14:37:01 -0700574 "Make sure all are accounted for before continuing."
575 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700576
George Burgess IVb16816a2022-10-26 17:55:48 -0600577 has_license_types = {x["license"] for x in license_map.values()}
George Burgess IV4ae42062022-08-15 18:54:51 -0700578 if has_unicode_license:
George Burgess IVb16816a2022-10-26 17:55:48 -0600579 # Replace this license with the actual SPDX license we plan to use.
580 has_license_types.remove(special_unicode_license)
581 has_license_types.add("unicode")
George Burgess IV4ae42062022-08-15 18:54:51 -0700582 if self.APACHE_LICENSE not in has_license_types:
George Burgess IV7dffc252022-08-31 14:37:01 -0700583 raise ValueError(
584 "Need the apache license; currently have: "
585 f"{sorted(has_license_types)}"
586 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700587
George Burgess IV04833702022-08-09 22:00:38 -0700588 sorted_licenses = sorted(has_license_types)
George Burgess IV124e6a12022-09-09 10:44:29 -0700589 print("The following licenses are in use:", sorted_licenses)
George Burgess IV7dffc252022-08-31 14:37:01 -0700590 header = textwrap.dedent(
591 """\
George Burgess IV04833702022-08-09 22:00:38 -0700592 # File to describe the licenses used by this registry.
Daniel Verkampd9d085b2022-09-07 10:52:27 -0700593 # Used so it's easy to automatically verify ebuilds are updated.
George Burgess IV04833702022-08-09 22:00:38 -0700594 # Each line is a license. Lines starting with # are comments.
George Burgess IV7dffc252022-08-31 14:37:01 -0700595 """
596 )
597 with open(license_shorthand_file, "w", encoding="utf-8") as f:
George Burgess IV04833702022-08-09 22:00:38 -0700598 f.write(header)
George Burgess IV7dffc252022-08-31 14:37:01 -0700599 f.write("\n".join(sorted_licenses))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700600
601
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700602# TODO(abps) - This needs to be replaced with datalog later. We should compile
603# all crab files into datalog and query it with our requirements
604# instead.
605class CrabManager:
606 """Manage audit files."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700607
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700608 def __init__(self, working_dir, crab_dir):
609 self.working_dir = working_dir
610 self.crab_dir = crab_dir
611
612 def _check_bad_traits(self, crabdata):
613 """Checks that a package's crab audit meets our requirements.
614
615 Args:
616 crabdata: Dict with crab keys in standard templated format.
617 """
George Burgess IV7dffc252022-08-31 14:37:01 -0700618 common = crabdata["common"]
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700619 # TODO(b/200578411) - Figure out what conditions we should enforce as
620 # part of the audit.
621 conditions = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700622 common.get("deny", None),
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700623 ]
624
625 # If any conditions are true, this crate is not acceptable.
626 return any(conditions)
627
628 def verify_traits(self):
George Burgess IV7dffc252022-08-31 14:37:01 -0700629 """Verify that all required CRAB traits for this repository are met."""
George Burgess IV18af5632022-08-30 14:10:53 -0700630 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700631
632 failing_crates = {}
633
634 # Verify all packages have a CRAB file associated with it and they meet
635 # all our required traits
George Burgess IV18af5632022-08-30 14:10:53 -0700636 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700637 # Skip the synthesized Cargo.toml packages that exist solely to
638 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700639 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700640 continue
641
George Burgess IV7dffc252022-08-31 14:37:01 -0700642 crabname = "{}-{}".format(package["name"], package["version"])
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700643 filename = os.path.join(self.crab_dir, "{}.toml".format(crabname))
644
645 # If crab file doesn't exist, the crate fails
646 if not os.path.isfile(filename):
647 failing_crates[crabname] = "No crab file".format(filename)
648 continue
649
George Burgess IV7dffc252022-08-31 14:37:01 -0700650 with open(filename, "r") as f:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700651 crabdata = toml.loads(f.read())
652
653 # If crab file's crate_name and version keys don't match this
654 # package, it also fails. This is just housekeeping...
George Burgess IV7dffc252022-08-31 14:37:01 -0700655 if (
656 package["name"] != crabdata["crate_name"]
657 or package["version"] != crabdata["version"]
658 ):
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700659 failing_crates[crabname] = "Crate name or version don't match"
660 continue
661
662 if self._check_bad_traits(crabdata):
663 failing_crates[crabname] = "Failed bad traits check"
664
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700665 # If we had any failing crates, list them now, and exit with an error.
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700666 if failing_crates:
George Burgess IV7dffc252022-08-31 14:37:01 -0700667 print("Failed CRAB audit:")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700668 for k, v in failing_crates.items():
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700669 print(f" {k}: {v}")
670 raise ValueError("CRAB audit did not complete successfully.")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700671
672
George Burgess IVd0261472022-10-17 18:59:10 -0600673def clean_source_related_lines_in_place(cargo_toml):
674 """Removes all [[bin]] (and similar) sections in `cargo_toml`."""
675 cargo_toml.pop("bench", None)
676 cargo_toml.pop("bin", None)
677 cargo_toml.pop("examples", None)
678 cargo_toml.pop("test", None)
679
680 lib = cargo_toml.get("lib")
681 if lib:
682 lib.pop("path", None)
683
684 package = cargo_toml.get("package")
685 if package:
686 package.pop("build", None)
687 package.pop("default-run", None)
688 package.pop("include", None)
689
690
George Burgess IVd4ff0502022-08-14 23:27:57 -0700691def clean_features_in_place(cargo_toml):
692 """Removes all side-effects of features in `cargo_toml`."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700693 features = cargo_toml.get("features")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700694 if not features:
695 return
696
George Burgess IVd0261472022-10-17 18:59:10 -0600697 for name in features:
698 features[name] = []
George Burgess IVd4ff0502022-08-14 23:27:57 -0700699
700
George Burgess IVd0261472022-10-17 18:59:10 -0600701def remove_all_dependencies_in_place(cargo_toml):
George Burgess IVd4ff0502022-08-14 23:27:57 -0700702 """Removes all `target.*.dependencies` from `cargo_toml`."""
George Burgess IVd0261472022-10-17 18:59:10 -0600703 cargo_toml.pop("build-dependencies", None)
704 cargo_toml.pop("dependencies", None)
705 cargo_toml.pop("dev-dependencies", None)
706
George Burgess IV7dffc252022-08-31 14:37:01 -0700707 target = cargo_toml.get("target")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700708 if not target:
709 return
George Burgess IV0313d782022-08-15 23:45:44 -0700710
George Burgess IVd4ff0502022-08-14 23:27:57 -0700711 empty_keys = []
712 for key, values in target.items():
George Burgess IVd0261472022-10-17 18:59:10 -0600713 values.pop("build-dependencies", None)
George Burgess IV7dffc252022-08-31 14:37:01 -0700714 values.pop("dependencies", None)
715 values.pop("dev-dependencies", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700716 if not values:
717 empty_keys.append(key)
George Burgess IV0313d782022-08-15 23:45:44 -0700718
George Burgess IVd4ff0502022-08-14 23:27:57 -0700719 if len(empty_keys) == len(target):
George Burgess IV7dffc252022-08-31 14:37:01 -0700720 del cargo_toml["target"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700721 else:
722 for key in empty_keys:
723 del target[key]
George Burgess IV0313d782022-08-15 23:45:44 -0700724
725
George Burgess IV7dffc252022-08-31 14:37:01 -0700726class CrateDestroyer:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700727 def __init__(self, working_dir, vendor_dir):
728 self.working_dir = working_dir
729 self.vendor_dir = vendor_dir
730
731 def _modify_cargo_toml(self, pkg_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700732 with open(os.path.join(pkg_path, "Cargo.toml"), "r") as cargo:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700733 contents = toml.load(cargo)
734
George Burgess IV7dffc252022-08-31 14:37:01 -0700735 package = contents["package"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700736
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700737 # Change description, license and delete license key
George Burgess IV7dffc252022-08-31 14:37:01 -0700738 package["description"] = "Empty crate that should not build."
739 package["license"] = "Apache-2.0"
George Burgess IVd4ff0502022-08-14 23:27:57 -0700740
George Burgess IV7dffc252022-08-31 14:37:01 -0700741 package.pop("license_file", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700742 # If there's no build.rs but we specify `links = "foo"`, Cargo gets
743 # upset.
George Burgess IV7dffc252022-08-31 14:37:01 -0700744 package.pop("links", None)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700745
George Burgess IV0313d782022-08-15 23:45:44 -0700746 # Some packages have cfg-specific dependencies. Remove them here; we
747 # don't care about the dependencies of an empty package.
748 #
749 # This is a load-bearing optimization: `dev-python/toml` doesn't
750 # always round-trip dumps(loads(x)) correctly when `x` has keys with
751 # strings (b/242589711#comment3). The place this has bitten us so far
752 # is target dependencies, which can be harmlessly removed for now.
George Burgess IVd4ff0502022-08-14 23:27:57 -0700753 #
754 # Cleaning features in-place is also necessary, since we're removing
755 # dependencies, and a feature can enable features in dependencies.
756 # Cargo errors out on `[features] foo = "bar/baz"` if `bar` isn't a
757 # dependency.
758 clean_features_in_place(contents)
George Burgess IVd0261472022-10-17 18:59:10 -0600759 remove_all_dependencies_in_place(contents)
760
761 # Since we're removing all source files, also be sure to remove
762 # source-related keys.
763 clean_source_related_lines_in_place(contents)
George Burgess IV0313d782022-08-15 23:45:44 -0700764
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700765 with open(os.path.join(pkg_path, "Cargo.toml"), "w") as cargo:
766 toml.dump(contents, cargo)
767
George Burgess IV8e2cc042022-10-18 14:50:48 -0600768 def _replace_source_contents(self, package_path, compile_error):
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700769 # First load the checksum file before starting
770 checksum_file = os.path.join(package_path, ".cargo-checksum.json")
George Burgess IV7dffc252022-08-31 14:37:01 -0700771 with open(checksum_file, "r") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700772 checksum_contents = json.load(csum)
773
774 # Also load the cargo.toml file which we need to write back
775 cargo_file = os.path.join(package_path, "Cargo.toml")
George Burgess IV7dffc252022-08-31 14:37:01 -0700776 with open(cargo_file, "rb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700777 cargo_contents = cfile.read()
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700778
779 shutil.rmtree(package_path)
780
781 # Make package and src dirs and replace lib.rs
782 os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
783 with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
George Burgess IV8e2cc042022-10-18 14:50:48 -0600784 librs.write(
785 EMPTY_CRATE_BODY if compile_error else NOP_EMPTY_CRATE_BODY
786 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700787
788 # Restore cargo.toml
George Burgess IV7dffc252022-08-31 14:37:01 -0700789 with open(cargo_file, "wb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700790 cfile.write(cargo_contents)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700791
792 # Restore checksum
George Burgess IV7dffc252022-08-31 14:37:01 -0700793 with open(checksum_file, "w") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700794 json.dump(checksum_contents, csum)
795
796 def destroy_unused_crates(self):
George Burgess IV18af5632022-08-30 14:10:53 -0700797 metadata = load_metadata(self.working_dir, filter_platform=None)
George Burgess IV7dffc252022-08-31 14:37:01 -0700798 used_packages = {
799 p["name"] for p in load_metadata(self.working_dir)["packages"]
800 }
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700801
802 cleaned_packages = []
George Burgess IV40cc91c2022-08-15 13:07:40 -0700803 # Since we're asking for _all_ metadata packages, we may see
804 # duplication.
George Burgess IV18af5632022-08-30 14:10:53 -0700805 for package in metadata["packages"]:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700806 # Skip used packages
George Burgess IV8e2cc042022-10-18 14:50:48 -0600807 package_name = package["name"]
808 if package_name in used_packages:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700809 continue
810
811 # Detect the correct package path to destroy
George Burgess IV7dffc252022-08-31 14:37:01 -0700812 pkg_path = os.path.join(
813 self.vendor_dir,
George Burgess IV8e2cc042022-10-18 14:50:48 -0600814 "{}-{}".format(package_name, package["version"]),
George Burgess IV7dffc252022-08-31 14:37:01 -0700815 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700816 if not os.path.isdir(pkg_path):
George Burgess IV8e2cc042022-10-18 14:50:48 -0600817 print(f"Crate {package_name} not found at {pkg_path}")
George Burgess IV635f7262022-08-09 21:32:20 -0700818 continue
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700819
George Burgess IV8e2cc042022-10-18 14:50:48 -0600820 self._replace_source_contents(
821 pkg_path, compile_error=package_name not in NOP_EMPTY_CRATES
822 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700823 self._modify_cargo_toml(pkg_path)
824 _rerun_checksums(pkg_path)
825 cleaned_packages.append(package["name"])
826
827 for pkg in cleaned_packages:
George Burgess IV635f7262022-08-09 21:32:20 -0700828 print("Removed unused crate", pkg)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700829
George Burgess IV7dffc252022-08-31 14:37:01 -0700830
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700831def main(args):
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800832 current_path = pathlib.Path(__file__).parent.absolute()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000833 patches = os.path.join(current_path, "patches")
834 vendor = os.path.join(current_path, "vendor")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700835 crab_dir = os.path.join(current_path, "crab", "crates")
George Burgess IV04833702022-08-09 22:00:38 -0700836 license_shorthand_file = os.path.join(current_path, "licenses_used.txt")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800837
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700838 # First, actually run cargo vendor
839 run_cargo_vendor(current_path)
840
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000841 # Order matters here:
842 # - Apply patches (also re-calculates checksums)
843 # - Cleanup any owners files (otherwise, git check-in or checksums are
844 # unhappy)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700845 # - Destroy unused crates
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000846 apply_patches(patches, vendor)
847 cleanup_owners(vendor)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700848 destroyer = CrateDestroyer(current_path, vendor)
849 destroyer.destroy_unused_crates()
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800850
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700851 # Combine license file and check for any bad licenses
852 lm = LicenseManager(current_path, vendor)
George Burgess IV7dffc252022-08-31 14:37:01 -0700853 lm.generate_license(
854 args.skip_license_check, args.license_map, license_shorthand_file
855 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700856
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700857 # Run crab audit on all packages
858 crab = CrabManager(current_path, crab_dir)
859 crab.verify_traits()
860
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800861
George Burgess IV7dffc252022-08-31 14:37:01 -0700862if __name__ == "__main__":
863 parser = argparse.ArgumentParser(description="Vendor packages properly")
864 parser.add_argument(
865 "--skip-license-check",
866 "-s",
867 help="Skip the license check on a specific package",
868 action="append",
869 )
870 parser.add_argument("--license-map", help="Write license map to this file")
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700871 args = parser.parse_args()
872
873 main(args)