blob: af2b339b32bcd6c9330d214c7f382bdff88fdc02 [file] [log] [blame]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
George Burgess IV9e0cfde2022-09-27 15:08:15 -07003# Copyright 2021 The ChromiumOS Authors
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6""" This script cleans up the vendor directory.
7"""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -07008import argparse
George Burgess IV635f7262022-08-09 21:32:20 -07009import collections
George Burgess IVfb0a1c42022-11-15 13:47:19 -070010import functools
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000011import hashlib
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080012import json
13import os
14import pathlib
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070015import re
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -070016import shutil
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000017import subprocess
George Burgess IV04833702022-08-09 22:00:38 -070018import textwrap
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -070019import toml
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000020
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070021# We only care about crates we're actually going to use and that's usually
22# limited to ones with cfg(linux). For running `cargo metadata`, limit results
George Burgess IVfb0a1c42022-11-15 13:47:19 -070023# to only these platforms.
24ALL_SUPPORTED_PLATFORMS = (
25 # Main targets.
26 "x86_64-cros-linux-gnu",
27 "armv7a-cros-linux-gnueabihf",
28 "aarch64-cros-linux-gnu",
29 # As far as we care, this is the same as x86_64-cros-linux-gnu.
30 # "x86_64-pc-linux-gnu",
31 # Baremetal targets.
32 "thumbv6m-none-eabi",
33 "thumbv7m-none-eabi",
34 "thumbv7em-none-eabihf",
35 "i686-unknown-uefi",
36 "x86_64-unknown-uefi",
37)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070038
George Burgess IV8e2cc042022-10-18 14:50:48 -060039# A series of crates which are to be made empty by having no (non-comment)
40# contents in their `lib.rs`, rather than by inserting a compilation error.
41NOP_EMPTY_CRATES = frozenset({"windows"})
42
43EMPTY_CRATE_BODY = """\
44compile_error!("This crate cannot be built for this configuration.");
45"""
46NOP_EMPTY_CRATE_BODY = "// " + EMPTY_CRATE_BODY
47
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000048
49def _rerun_checksums(package_path):
50 """Re-run checksums for given package.
51
52 Writes resulting checksums to $package_path/.cargo-checksum.json.
53 """
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070054 hashes = dict()
George Burgess IV7dffc252022-08-31 14:37:01 -070055 checksum_path = os.path.join(package_path, ".cargo-checksum.json")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000056 if not pathlib.Path(checksum_path).is_file():
57 return False
58
George Burgess IV7dffc252022-08-31 14:37:01 -070059 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000060 contents = json.load(fread)
61
62 for root, _, files in os.walk(package_path, topdown=True):
63 for f in files:
64 # Don't checksum an existing checksum file
65 if f == ".cargo-checksum.json":
66 continue
67
68 file_path = os.path.join(root, f)
George Burgess IV7dffc252022-08-31 14:37:01 -070069 with open(file_path, "rb") as frb:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000070 m = hashlib.sha256()
71 m.update(frb.read())
72 d = m.hexdigest()
73
74 # Key is relative to the package path so strip from beginning
75 key = os.path.relpath(file_path, package_path)
76 hashes[key] = d
77
78 if hashes:
George Burgess IV7dffc252022-08-31 14:37:01 -070079 print(
80 "{} regenerated {} hashes".format(package_path, len(hashes.keys()))
81 )
82 contents["files"] = hashes
83 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070084 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000085
86 return True
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080087
88
89def _remove_OWNERS_checksum(root):
George Burgess IV7dffc252022-08-31 14:37:01 -070090 """Delete all OWNERS files from the checksum file.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080091
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000092 Args:
93 root: Root directory for the vendored crate.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080094
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000095 Returns:
96 True if OWNERS was found and cleaned up. Otherwise False.
97 """
George Burgess IV7dffc252022-08-31 14:37:01 -070098 checksum_path = os.path.join(root, ".cargo-checksum.json")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080099 if not pathlib.Path(checksum_path).is_file():
100 return False
101
George Burgess IV7dffc252022-08-31 14:37:01 -0700102 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800103 contents = json.load(fread)
104
105 del_keys = []
George Burgess IV7dffc252022-08-31 14:37:01 -0700106 for cfile in contents["files"]:
107 if "OWNERS" in cfile:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800108 del_keys.append(cfile)
109
110 for key in del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -0700111 del contents["files"][key]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800112
113 if del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -0700114 print("{} deleted: {}".format(root, del_keys))
115 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700116 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800117
118 return bool(del_keys)
119
120
121def cleanup_owners(vendor_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700122 """Remove owners checksums from the vendor directory.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800123
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000124 We currently do not check in the OWNERS files from vendored crates because
125 they interfere with the find-owners functionality in gerrit. This cleanup
126 simply finds all instances of "OWNERS" in the checksum files within and
127 removes them.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800128
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000129 Args:
130 vendor_path: Absolute path to vendor directory.
131 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800132 deps_cleaned = []
133 for root, dirs, _ in os.walk(vendor_path):
134 for d in dirs:
135 removed = _remove_OWNERS_checksum(os.path.join(root, d))
136 if removed:
137 deps_cleaned.append(d)
138
139 if deps_cleaned:
George Burgess IV7dffc252022-08-31 14:37:01 -0700140 print("Cleanup owners:\n {}".format("\n".join(deps_cleaned)))
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800141
142
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000143def apply_single_patch(patch, workdir):
144 """Apply a single patch and return whether it was successful.
145
146 Returns:
147 True if successful. False otherwise.
148 """
George Burgess IV08664ba2022-10-03 11:09:33 -0700149 proc = subprocess.run(
150 [
151 "patch",
152 "-p1",
153 "--no-backup-if-mismatch",
154 "-i",
155 patch,
156 ],
157 cwd=workdir,
158 )
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000159 return proc.returncode == 0
160
161
George Burgess IV30c5c362022-08-19 17:05:02 -0700162def apply_patch_script(script, workdir):
163 """Run the given patch script, returning whether it exited cleanly.
164
165 Returns:
166 True if successful. False otherwise.
167 """
168 return subprocess.run([script], cwd=workdir).returncode == 0
169
170
George Burgess IV635f7262022-08-09 21:32:20 -0700171def determine_vendor_crates(vendor_path):
172 """Returns a map of {crate_name: [directory]} at the given vendor_path."""
173 result = collections.defaultdict(list)
George Burgess IV76b60d02022-10-26 17:44:48 -0600174 crate_version_re = re.compile(r"-\d+\.\d+\.\d+(:?[+-]|$)")
George Burgess IV635f7262022-08-09 21:32:20 -0700175 for crate_name_plus_ver in os.listdir(vendor_path):
George Burgess IV76b60d02022-10-26 17:44:48 -0600176 version = crate_version_re.search(crate_name_plus_ver)
177 assert version, crate_name_plus_ver
178 name = crate_name_plus_ver[: version.start()]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700179 result[name].append(crate_name_plus_ver)
George Burgess IV635f7262022-08-09 21:32:20 -0700180
181 for crate_list in result.values():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700182 crate_list.sort()
George Burgess IV635f7262022-08-09 21:32:20 -0700183 return result
184
185
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000186def apply_patches(patches_path, vendor_path):
187 """Finds patches and applies them to sub-folders in the vendored crates.
188
189 Args:
190 patches_path: Path to folder with patches. Expect all patches to be one
191 level down (matching the crate name).
192 vendor_path: Root path to vendored crates directory.
193 """
194 checksums_for = {}
195
196 # Don't bother running if patches directory is empty
197 if not pathlib.Path(patches_path).is_dir():
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700198 return
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000199
George Burgess IV30c5c362022-08-19 17:05:02 -0700200 patches_failed = False
George Burgess IV635f7262022-08-09 21:32:20 -0700201 vendor_crate_map = determine_vendor_crates(vendor_path)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000202 # Look for all patches and apply them
203 for d in os.listdir(patches_path):
204 dir_path = os.path.join(patches_path, d)
205
206 # We don't process patches in root dir
207 if not os.path.isdir(dir_path):
208 continue
209
George Burgess IV30c5c362022-08-19 17:05:02 -0700210 # We accept one of two forms here:
211 # - direct targets (these name # `${crate_name}-${version}`)
212 # - simply the crate name (which applies to all versions of the
213 # crate)
214 direct_target = os.path.join(vendor_path, d)
215 if os.path.isdir(direct_target):
216 patch_targets = [d]
217 elif d in vendor_crate_map:
218 patch_targets = vendor_crate_map[d]
219 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700220 raise RuntimeError(f"Unknown crate in {vendor_path}: {d}")
George Burgess IV30c5c362022-08-19 17:05:02 -0700221
George Burgess IV635f7262022-08-09 21:32:20 -0700222 for patch in os.listdir(dir_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000223 file_path = os.path.join(dir_path, patch)
224
225 # Skip if not a patch file
George Burgess IV30c5c362022-08-19 17:05:02 -0700226 if not os.path.isfile(file_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000227 continue
228
George Burgess IV30c5c362022-08-19 17:05:02 -0700229 if patch.endswith(".patch"):
230 apply = apply_single_patch
231 elif os.access(file_path, os.X_OK):
232 apply = apply_patch_script
George Burgess IV635f7262022-08-09 21:32:20 -0700233 else:
George Burgess IV30c5c362022-08-19 17:05:02 -0700234 # Unrecognized. Skip it.
235 continue
236
237 for target_name in patch_targets:
238 checksums_for[target_name] = True
239 target = os.path.join(vendor_path, target_name)
240 print(f"-- Applying {file_path} to {target}")
241 if not apply(file_path, target):
242 print(f"Failed to apply {file_path} to {target}")
243 patches_failed = True
244
245 # Do this late, so we can report all of the failing patches in one
246 # invocation.
247 if patches_failed:
George Burgess IV7dffc252022-08-31 14:37:01 -0700248 raise ValueError("Patches failed; please see above logs")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000249
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000250 # Re-run checksums for all modified packages since we applied patches.
251 for key in checksums_for.keys():
252 _rerun_checksums(os.path.join(vendor_path, key))
253
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700254
George Burgess IV18af5632022-08-30 14:10:53 -0700255def get_workspace_cargo_toml(working_dir):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700256 """Returns all Cargo.toml files under working_dir."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700257 return [working_dir / "projects" / "Cargo.toml"]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700258
259
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700260def run_cargo_vendor(working_dir):
261 """Runs cargo vendor.
262
263 Args:
264 working_dir: Directory to run inside. This should be the directory where
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700265 Cargo.toml is kept.
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700266 """
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700267 # `cargo vendor` may update dependencies (which may update metadata).
268 load_all_package_metadata.cache_clear()
269
George Burgess IV635f7262022-08-09 21:32:20 -0700270 # Cargo will refuse to revendor into versioned directories, which leads to
271 # repeated `./vendor.py` invocations trying to apply patches to
272 # already-patched sources. Remove the existing vendor directory to avoid
273 # this.
George Burgess IV7dffc252022-08-31 14:37:01 -0700274 vendor_dir = working_dir / "vendor"
George Burgess IV635f7262022-08-09 21:32:20 -0700275 if vendor_dir.exists():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700276 shutil.rmtree(vendor_dir)
277
George Burgess IV18af5632022-08-30 14:10:53 -0700278 cargo_cmdline = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700279 "cargo",
280 "vendor",
281 "--versioned-dirs",
282 "-v",
283 "--manifest-path=projects/Cargo.toml",
284 "--",
285 "vendor",
George Burgess IV18af5632022-08-30 14:10:53 -0700286 ]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700287 subprocess.check_call(cargo_cmdline, cwd=working_dir)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000288
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700289
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700290def load_single_metadata(working_dir, filter_platform):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700291 """Load metadata for all projects under a given directory.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700292
293 Args:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700294 working_dir: Base directory to run from.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700295 filter_platform: Filter packages to ones configured for this platform.
296 """
George Burgess IV40cc91c2022-08-15 13:07:40 -0700297 metadata_objects = []
George Burgess IV18af5632022-08-30 14:10:53 -0700298 cmd = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700299 "cargo",
300 "metadata",
301 "--format-version=1",
302 "--manifest-path=projects/Cargo.toml",
George Burgess IV18af5632022-08-30 14:10:53 -0700303 ]
304 # Conditionally add platform filter
305 if filter_platform:
306 cmd += ("--filter-platform", filter_platform)
307 output = subprocess.check_output(cmd, cwd=working_dir)
308 return json.loads(output)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700309
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700310
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700311# Calls to this are somewhat expensive, and repeated a fair few times
312# throughout `./vendor.py`. Measuring locally, having a cache here speeds this
313# script up by 1.4x.
314@functools.lru_cache()
315def load_all_package_metadata(working_dir, platforms=ALL_SUPPORTED_PLATFORMS):
316 """Loads and merges metadata for all platforms in `platforms`.
317
318 This drops a lot of data from `cargo metadata`. Some of this metadata is
319 hard to merge, other bits of it just aren't worth keeping at the moment.
320 """
321 assert platforms, f"`platforms` should have things; has {platforms}"
322
323 found_package_ids = set()
324 results = []
325 for platform in platforms:
326 metadata = load_single_metadata(working_dir, platform)["packages"]
327 for package in metadata:
328 package_id = package["id"]
329 if package_id in found_package_ids:
330 continue
331
332 found_package_ids.add(package_id)
333 results.append(
334 {
335 "id": package["id"],
336 "license": package["license"],
337 "license_file": package["license_file"],
338 "name": package["name"],
339 "version": package["version"],
340 }
341 )
342
343 return results
344
345
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700346class LicenseManager:
George Burgess IV7dffc252022-08-31 14:37:01 -0700347 """Manage consolidating licenses for all packages."""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700348
George Burgess IV124e6a12022-09-09 10:44:29 -0700349 # These are all the licenses we support. Keys are what is seen in metadata
350 # and values are what is expected by ebuilds.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700351 SUPPORTED_LICENSES = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700352 "0BSD": "0BSD",
353 "Apache-2.0": "Apache-2.0",
George Burgess IVb16816a2022-10-26 17:55:48 -0600354 "BSD-2-Clause": "BSD-2",
George Burgess IV7dffc252022-08-31 14:37:01 -0700355 "BSD-3-Clause": "BSD-3",
356 "ISC": "ISC",
357 "MIT": "MIT",
358 "MPL-2.0": "MPL-2.0",
359 "unicode": "unicode",
Dan Callaghan91f80542022-09-09 10:57:23 +1000360 "Zlib": "ZLIB",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700361 }
362
363 # Prefer to take attribution licenses in this order. All these require that
364 # we actually use the license file found in the package so they MUST have
365 # a license file set.
George Burgess IV7dffc252022-08-31 14:37:01 -0700366 PREFERRED_ATTRIB_LICENSE_ORDER = ["MIT", "BSD-3", "ISC"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700367
368 # If Apache license is found, always prefer it (simplifies attribution)
George Burgess IV7dffc252022-08-31 14:37:01 -0700369 APACHE_LICENSE = "Apache-2.0"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700370
371 # Regex for license files found in the vendored directories. Search for
372 # these files with re.IGNORECASE.
373 #
374 # These will be searched in order with the earlier entries being preferred.
375 LICENSE_NAMES_REGEX = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700376 r"^license-mit$",
377 r"^copyright$",
378 r"^licen[cs]e.*$",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700379 ]
380
381 # Some crates have their license file in other crates. This usually occurs
382 # because multiple crates are published from the same git repository and the
383 # license isn't updated in each sub-crate. In these cases, we can just
384 # ignore these packages.
385 MAP_LICENSE_TO_OTHER = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700386 "failure_derive": "failure",
387 "grpcio-compiler": "grpcio",
388 "grpcio-sys": "grpcio",
Li-Yu Yu89d93c72022-12-19 03:36:50 +0800389 "protobuf-codegen": "protobuf",
390 "protobuf-parse": "protobuf",
391 "protobuf-support": "protobuf",
George Burgess IV7dffc252022-08-31 14:37:01 -0700392 "rustyline-derive": "rustyline",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700393 }
394
395 # Map a package to a specific license and license file. Only use this if
396 # a package doesn't have an easily discoverable license or exports its
397 # license in a weird way. Prefer to patch the project with a license and
398 # upstream the patch instead.
399 STATIC_LICENSE_MAP = {
George Burgess IVb16816a2022-10-26 17:55:48 -0600400 # "package name": ("license name", "license file relative location")
George Burgess IV26642872022-10-18 19:46:58 -0600401 # Patch for adding these are upstream, but the patch application
402 # doesn't apply to `cargo metadata`. This is presumably because it
403 # can't detect our vendor directory.
George Burgess IVf4a5e362022-08-30 14:30:36 -0700404 # https://gitlab.freedesktop.org/slirp/libslirp-sys/-/merge_requests/6
George Burgess IV7dffc252022-08-31 14:37:01 -0700405 "libslirp-sys": ("MIT", "LICENSE"),
George Burgess IV26642872022-10-18 19:46:58 -0600406 # https://gitlab.freedesktop.org/anholt/deqp-runner/-/merge_requests/48
407 "deqp-runner": ("MIT", "LICENSE"),
Dan Callaghan91f80542022-09-09 10:57:23 +1000408 # Upstream prefers to embed license text inside README.md:
409 "riscv": ("ISC", "README.md"),
410 "riscv-rt": ("ISC", "README.md"),
George Burgess IVb16816a2022-10-26 17:55:48 -0600411 "zerocopy": ("BSD-2", "LICENSE"),
412 "zerocopy-derive": ("BSD-2", "LICENSE"),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700413 }
414
415 def __init__(self, working_dir, vendor_dir):
416 self.working_dir = working_dir
417 self.vendor_dir = vendor_dir
418
419 def _find_license_in_dir(self, search_dir):
420 for p in os.listdir(search_dir):
421 # Ignore anything that's not a file
422 if not os.path.isfile(os.path.join(search_dir, p)):
423 continue
424
425 # Now check if the name matches any of the regexes
426 # We'll return the first matching file.
427 for regex in self.LICENSE_NAMES_REGEX:
428 if re.search(regex, p, re.IGNORECASE):
429 yield os.path.join(search_dir, p)
430 break
431
432 def _guess_license_type(self, license_file):
George Burgess IV7dffc252022-08-31 14:37:01 -0700433 if "-MIT" in license_file:
434 return "MIT"
435 elif "-APACHE" in license_file:
436 return "APACHE"
437 elif "-BSD" in license_file:
438 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700439
George Burgess IV7dffc252022-08-31 14:37:01 -0700440 with open(license_file, "r") as f:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700441 lines = f.read()
George Burgess IV7dffc252022-08-31 14:37:01 -0700442 if "MIT" in lines:
443 return "MIT"
444 elif "Apache" in lines:
445 return "APACHE"
446 elif "BSD 3-Clause" in lines:
447 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700448
George Burgess IV7dffc252022-08-31 14:37:01 -0700449 return ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700450
George Burgess IV7dffc252022-08-31 14:37:01 -0700451 def generate_license(
452 self, skip_license_check, print_map_to_file, license_shorthand_file
453 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700454 """Generate single massive license file from metadata."""
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700455 metadata = load_all_package_metadata(self.working_dir)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700456
George Burgess IVb16816a2022-10-26 17:55:48 -0600457 special_unicode_license = "(MIT OR Apache-2.0) AND Unicode-DFS-2016"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700458 bad_licenses = {}
459
460 # Keep license map ordered so it generates a consistent license map
461 license_map = {}
462
463 skip_license_check = skip_license_check or []
George Burgess IV4ae42062022-08-15 18:54:51 -0700464 has_unicode_license = False
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700465
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700466 for package in metadata:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700467 # Skip the synthesized Cargo.toml packages that exist solely to
468 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700469 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700470 continue
471
George Burgess IV7dffc252022-08-31 14:37:01 -0700472 pkg_name = package["name"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700473 if pkg_name in skip_license_check:
474 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700475 "Skipped license check on {}. Reason: Skipped from command line".format(
476 pkg_name
477 )
478 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700479 continue
480
481 if pkg_name in self.MAP_LICENSE_TO_OTHER:
482 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700483 "Skipped license check on {}. Reason: License already in {}".format(
484 pkg_name, self.MAP_LICENSE_TO_OTHER[pkg_name]
485 )
486 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700487 continue
488
489 # Check if we have a static license map for this package. Use the
490 # static values if we have it already set.
491 if pkg_name in self.STATIC_LICENSE_MAP:
George Burgess IVb16816a2022-10-26 17:55:48 -0600492 license, license_file = self.STATIC_LICENSE_MAP[pkg_name]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700493 license_map[pkg_name] = {
494 "license": license,
495 "license_file": license_file,
496 }
497 continue
498
499 license_files = []
George Burgess IV93ba4732022-08-13 14:10:10 -0700500 # use `or ''` instead of get's default, since `package` may have a
501 # None value for 'license'.
George Burgess IV7dffc252022-08-31 14:37:01 -0700502 license = package.get("license") or ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700503
504 # We ignore the metadata for license file because most crates don't
505 # have it set. Just scan the source for licenses.
George Burgess IV7dffc252022-08-31 14:37:01 -0700506 pkg_version = package["version"]
507 license_files = list(
508 self._find_license_in_dir(
509 os.path.join(self.vendor_dir, f"{pkg_name}-{pkg_version}")
510 )
511 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700512
George Burgess IV4ae42062022-08-15 18:54:51 -0700513 # FIXME(b/240953811): The code later in this loop is only
514 # structured to handle ORs, not ANDs. Fortunately, this license in
515 # particular is `AND`ed between a super common license (Apache) and
516 # a more obscure one (unicode). This hack is specifically intended
517 # for the `unicode-ident` crate, though no crate name check is
518 # made, since it's OK other crates happen to have this license.
George Burgess IVb16816a2022-10-26 17:55:48 -0600519 if license == special_unicode_license:
George Burgess IV4ae42062022-08-15 18:54:51 -0700520 has_unicode_license = True
521 # We'll check later to be sure MIT or Apache-2.0 is represented
522 # properly.
523 for x in license_files:
George Burgess IV7dffc252022-08-31 14:37:01 -0700524 if os.path.basename(x) == "LICENSE-UNICODE":
George Burgess IV4ae42062022-08-15 18:54:51 -0700525 license_file = x
526 break
527 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700528 raise ValueError(
529 "No LICENSE-UNICODE found in " f"{license_files}"
530 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700531 license_map[pkg_name] = {
532 "license": license,
533 "license_file": license_file,
534 }
George Burgess IV4ae42062022-08-15 18:54:51 -0700535 continue
536
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700537 # If there are multiple licenses, they are delimited with "OR" or "/"
George Burgess IV7dffc252022-08-31 14:37:01 -0700538 delim = " OR " if " OR " in license else "/"
George Burgess IV40cc91c2022-08-15 13:07:40 -0700539 found = [x.strip() for x in license.split(delim)]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700540
541 # Filter licenses to ones we support
542 licenses_or = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700543 self.SUPPORTED_LICENSES[f]
544 for f in found
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700545 if f in self.SUPPORTED_LICENSES
546 ]
547
548 # If apache license is found, always prefer it because it simplifies
549 # license attribution (we can use existing Apache notice)
550 if self.APACHE_LICENSE in licenses_or:
George Burgess IV7dffc252022-08-31 14:37:01 -0700551 license_map[pkg_name] = {"license": self.APACHE_LICENSE}
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700552
553 # Handle single license that has at least one license file
554 # We pick the first license file and the license
555 elif len(licenses_or) == 1:
556 if license_files:
557 l = licenses_or[0]
558 lf = license_files[0]
559
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700560 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700561 "license": l,
562 "license_file": os.path.relpath(lf, self.working_dir),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700563 }
564 else:
565 bad_licenses[pkg_name] = "{} missing license file".format(
George Burgess IV7dffc252022-08-31 14:37:01 -0700566 licenses_or[0]
567 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700568 # Handle multiple licenses
569 elif len(licenses_or) > 1:
570 # Check preferred licenses in order
571 license_found = False
572 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
573 if not l in licenses_or:
574 continue
575
576 for f in license_files:
577 if self._guess_license_type(f) == l:
578 license_found = True
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700579 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700580 "license": l,
581 "license_file": os.path.relpath(
582 f, self.working_dir
583 ),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700584 }
585 break
586
587 # Break out of loop if license is found
588 if license_found:
589 break
590 else:
591 bad_licenses[pkg_name] = license
592
593 # If we had any bad licenses, we need to abort
594 if bad_licenses:
595 for k in bad_licenses.keys():
George Burgess IV7dffc252022-08-31 14:37:01 -0700596 print(
597 "{} had no acceptable licenses: {}".format(
598 k, bad_licenses[k]
599 )
600 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700601 raise Exception("Bad licenses in vendored packages.")
602
603 # Write license map to file
604 if print_map_to_file:
George Burgess IV7dffc252022-08-31 14:37:01 -0700605 with open(
606 os.path.join(self.working_dir, print_map_to_file), "w"
607 ) as lfile:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700608 json.dump(license_map, lfile, sort_keys=True)
609
610 # Raise missing licenses unless we have a valid reason to ignore them
611 raise_missing_license = False
612 for name, v in license_map.items():
George Burgess IV7dffc252022-08-31 14:37:01 -0700613 if (
614 "license_file" not in v
615 and v.get("license", "") != self.APACHE_LICENSE
616 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700617 raise_missing_license = True
George Burgess IV7dffc252022-08-31 14:37:01 -0700618 print(
619 " {}: Missing license file. Fix or add to ignorelist.".format(
620 name
621 )
622 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700623
624 if raise_missing_license:
625 raise Exception(
626 "Unhandled missing license file. "
George Burgess IV7dffc252022-08-31 14:37:01 -0700627 "Make sure all are accounted for before continuing."
628 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700629
George Burgess IVb16816a2022-10-26 17:55:48 -0600630 has_license_types = {x["license"] for x in license_map.values()}
George Burgess IV4ae42062022-08-15 18:54:51 -0700631 if has_unicode_license:
George Burgess IVb16816a2022-10-26 17:55:48 -0600632 # Replace this license with the actual SPDX license we plan to use.
633 has_license_types.remove(special_unicode_license)
634 has_license_types.add("unicode")
George Burgess IV4ae42062022-08-15 18:54:51 -0700635 if self.APACHE_LICENSE not in has_license_types:
George Burgess IV7dffc252022-08-31 14:37:01 -0700636 raise ValueError(
637 "Need the apache license; currently have: "
638 f"{sorted(has_license_types)}"
639 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700640
George Burgess IV04833702022-08-09 22:00:38 -0700641 sorted_licenses = sorted(has_license_types)
George Burgess IV124e6a12022-09-09 10:44:29 -0700642 print("The following licenses are in use:", sorted_licenses)
George Burgess IV7dffc252022-08-31 14:37:01 -0700643 header = textwrap.dedent(
644 """\
George Burgess IV04833702022-08-09 22:00:38 -0700645 # File to describe the licenses used by this registry.
Daniel Verkampd9d085b2022-09-07 10:52:27 -0700646 # Used so it's easy to automatically verify ebuilds are updated.
George Burgess IV04833702022-08-09 22:00:38 -0700647 # Each line is a license. Lines starting with # are comments.
George Burgess IV7dffc252022-08-31 14:37:01 -0700648 """
649 )
650 with open(license_shorthand_file, "w", encoding="utf-8") as f:
George Burgess IV04833702022-08-09 22:00:38 -0700651 f.write(header)
George Burgess IV7dffc252022-08-31 14:37:01 -0700652 f.write("\n".join(sorted_licenses))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700653
654
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700655# TODO(abps) - This needs to be replaced with datalog later. We should compile
656# all crab files into datalog and query it with our requirements
657# instead.
658class CrabManager:
659 """Manage audit files."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700660
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700661 def __init__(self, working_dir, crab_dir):
662 self.working_dir = working_dir
663 self.crab_dir = crab_dir
664
665 def _check_bad_traits(self, crabdata):
666 """Checks that a package's crab audit meets our requirements.
667
668 Args:
669 crabdata: Dict with crab keys in standard templated format.
670 """
George Burgess IV7dffc252022-08-31 14:37:01 -0700671 common = crabdata["common"]
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700672 # TODO(b/200578411) - Figure out what conditions we should enforce as
673 # part of the audit.
674 conditions = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700675 common.get("deny", None),
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700676 ]
677
678 # If any conditions are true, this crate is not acceptable.
679 return any(conditions)
680
681 def verify_traits(self):
George Burgess IV7dffc252022-08-31 14:37:01 -0700682 """Verify that all required CRAB traits for this repository are met."""
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700683 metadata = load_all_package_metadata(self.working_dir)
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700684
685 failing_crates = {}
686
687 # Verify all packages have a CRAB file associated with it and they meet
688 # all our required traits
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700689 for package in metadata:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700690 # Skip the synthesized Cargo.toml packages that exist solely to
691 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700692 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700693 continue
694
George Burgess IV7dffc252022-08-31 14:37:01 -0700695 crabname = "{}-{}".format(package["name"], package["version"])
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700696 filename = os.path.join(self.crab_dir, "{}.toml".format(crabname))
697
698 # If crab file doesn't exist, the crate fails
699 if not os.path.isfile(filename):
700 failing_crates[crabname] = "No crab file".format(filename)
701 continue
702
George Burgess IV7dffc252022-08-31 14:37:01 -0700703 with open(filename, "r") as f:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700704 crabdata = toml.loads(f.read())
705
706 # If crab file's crate_name and version keys don't match this
707 # package, it also fails. This is just housekeeping...
George Burgess IV7dffc252022-08-31 14:37:01 -0700708 if (
709 package["name"] != crabdata["crate_name"]
710 or package["version"] != crabdata["version"]
711 ):
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700712 failing_crates[crabname] = "Crate name or version don't match"
713 continue
714
715 if self._check_bad_traits(crabdata):
716 failing_crates[crabname] = "Failed bad traits check"
717
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700718 # If we had any failing crates, list them now, and exit with an error.
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700719 if failing_crates:
George Burgess IV7dffc252022-08-31 14:37:01 -0700720 print("Failed CRAB audit:")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700721 for k, v in failing_crates.items():
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700722 print(f" {k}: {v}")
723 raise ValueError("CRAB audit did not complete successfully.")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700724
725
George Burgess IVd0261472022-10-17 18:59:10 -0600726def clean_source_related_lines_in_place(cargo_toml):
727 """Removes all [[bin]] (and similar) sections in `cargo_toml`."""
728 cargo_toml.pop("bench", None)
729 cargo_toml.pop("bin", None)
730 cargo_toml.pop("examples", None)
731 cargo_toml.pop("test", None)
732
733 lib = cargo_toml.get("lib")
734 if lib:
735 lib.pop("path", None)
736
737 package = cargo_toml.get("package")
738 if package:
739 package.pop("build", None)
740 package.pop("default-run", None)
741 package.pop("include", None)
742
743
George Burgess IVd4ff0502022-08-14 23:27:57 -0700744def clean_features_in_place(cargo_toml):
745 """Removes all side-effects of features in `cargo_toml`."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700746 features = cargo_toml.get("features")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700747 if not features:
748 return
749
George Burgess IVd0261472022-10-17 18:59:10 -0600750 for name in features:
751 features[name] = []
George Burgess IVd4ff0502022-08-14 23:27:57 -0700752
753
George Burgess IVd0261472022-10-17 18:59:10 -0600754def remove_all_dependencies_in_place(cargo_toml):
George Burgess IVd4ff0502022-08-14 23:27:57 -0700755 """Removes all `target.*.dependencies` from `cargo_toml`."""
George Burgess IVd0261472022-10-17 18:59:10 -0600756 cargo_toml.pop("build-dependencies", None)
757 cargo_toml.pop("dependencies", None)
758 cargo_toml.pop("dev-dependencies", None)
759
George Burgess IV7dffc252022-08-31 14:37:01 -0700760 target = cargo_toml.get("target")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700761 if not target:
762 return
George Burgess IV0313d782022-08-15 23:45:44 -0700763
George Burgess IVd4ff0502022-08-14 23:27:57 -0700764 empty_keys = []
765 for key, values in target.items():
George Burgess IVd0261472022-10-17 18:59:10 -0600766 values.pop("build-dependencies", None)
George Burgess IV7dffc252022-08-31 14:37:01 -0700767 values.pop("dependencies", None)
768 values.pop("dev-dependencies", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700769 if not values:
770 empty_keys.append(key)
George Burgess IV0313d782022-08-15 23:45:44 -0700771
George Burgess IVd4ff0502022-08-14 23:27:57 -0700772 if len(empty_keys) == len(target):
George Burgess IV7dffc252022-08-31 14:37:01 -0700773 del cargo_toml["target"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700774 else:
775 for key in empty_keys:
776 del target[key]
George Burgess IV0313d782022-08-15 23:45:44 -0700777
778
George Burgess IV7dffc252022-08-31 14:37:01 -0700779class CrateDestroyer:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700780 def __init__(self, working_dir, vendor_dir):
781 self.working_dir = working_dir
782 self.vendor_dir = vendor_dir
783
784 def _modify_cargo_toml(self, pkg_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700785 with open(os.path.join(pkg_path, "Cargo.toml"), "r") as cargo:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700786 contents = toml.load(cargo)
787
George Burgess IV7dffc252022-08-31 14:37:01 -0700788 package = contents["package"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700789
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700790 # Change description, license and delete license key
George Burgess IV7dffc252022-08-31 14:37:01 -0700791 package["description"] = "Empty crate that should not build."
792 package["license"] = "Apache-2.0"
George Burgess IVd4ff0502022-08-14 23:27:57 -0700793
George Burgess IV7dffc252022-08-31 14:37:01 -0700794 package.pop("license_file", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700795 # If there's no build.rs but we specify `links = "foo"`, Cargo gets
796 # upset.
George Burgess IV7dffc252022-08-31 14:37:01 -0700797 package.pop("links", None)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700798
George Burgess IV0313d782022-08-15 23:45:44 -0700799 # Some packages have cfg-specific dependencies. Remove them here; we
800 # don't care about the dependencies of an empty package.
801 #
802 # This is a load-bearing optimization: `dev-python/toml` doesn't
803 # always round-trip dumps(loads(x)) correctly when `x` has keys with
804 # strings (b/242589711#comment3). The place this has bitten us so far
805 # is target dependencies, which can be harmlessly removed for now.
George Burgess IVd4ff0502022-08-14 23:27:57 -0700806 #
807 # Cleaning features in-place is also necessary, since we're removing
808 # dependencies, and a feature can enable features in dependencies.
809 # Cargo errors out on `[features] foo = "bar/baz"` if `bar` isn't a
810 # dependency.
811 clean_features_in_place(contents)
George Burgess IVd0261472022-10-17 18:59:10 -0600812 remove_all_dependencies_in_place(contents)
813
814 # Since we're removing all source files, also be sure to remove
815 # source-related keys.
816 clean_source_related_lines_in_place(contents)
George Burgess IV0313d782022-08-15 23:45:44 -0700817
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700818 with open(os.path.join(pkg_path, "Cargo.toml"), "w") as cargo:
819 toml.dump(contents, cargo)
820
George Burgess IV8e2cc042022-10-18 14:50:48 -0600821 def _replace_source_contents(self, package_path, compile_error):
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700822 # First load the checksum file before starting
823 checksum_file = os.path.join(package_path, ".cargo-checksum.json")
George Burgess IV7dffc252022-08-31 14:37:01 -0700824 with open(checksum_file, "r") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700825 checksum_contents = json.load(csum)
826
827 # Also load the cargo.toml file which we need to write back
828 cargo_file = os.path.join(package_path, "Cargo.toml")
George Burgess IV7dffc252022-08-31 14:37:01 -0700829 with open(cargo_file, "rb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700830 cargo_contents = cfile.read()
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700831
832 shutil.rmtree(package_path)
833
834 # Make package and src dirs and replace lib.rs
835 os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
836 with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
George Burgess IV8e2cc042022-10-18 14:50:48 -0600837 librs.write(
838 EMPTY_CRATE_BODY if compile_error else NOP_EMPTY_CRATE_BODY
839 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700840
841 # Restore cargo.toml
George Burgess IV7dffc252022-08-31 14:37:01 -0700842 with open(cargo_file, "wb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700843 cfile.write(cargo_contents)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700844
845 # Restore checksum
George Burgess IV7dffc252022-08-31 14:37:01 -0700846 with open(checksum_file, "w") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700847 json.dump(checksum_contents, csum)
848
George Burgess IV4196b082022-11-03 17:10:47 -0600849 def destroy_unused_crates(self, destroyed_crates_file: pathlib.Path):
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700850 metadata = [
851 (x["name"], x["version"])
852 for x in load_single_metadata(
853 self.working_dir, filter_platform=None
854 )["packages"]
855 ]
George Burgess IV7dffc252022-08-31 14:37:01 -0700856 used_packages = {
George Burgess IV9a264302022-12-16 15:36:01 -0700857 (x["name"], x["version"])
858 for x in load_all_package_metadata(self.working_dir)
George Burgess IV7dffc252022-08-31 14:37:01 -0700859 }
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700860
861 cleaned_packages = []
George Burgess IV40cc91c2022-08-15 13:07:40 -0700862 # Since we're asking for _all_ metadata packages, we may see
863 # duplication.
George Burgess IV9a264302022-12-16 15:36:01 -0700864 for package_desc in metadata:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700865 # Skip used packages
George Burgess IV9a264302022-12-16 15:36:01 -0700866 if package_desc in used_packages:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700867 continue
868
George Burgess IV9a264302022-12-16 15:36:01 -0700869 package_name, package_version = package_desc
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700870 # Detect the correct package path to destroy
George Burgess IV7dffc252022-08-31 14:37:01 -0700871 pkg_path = os.path.join(
872 self.vendor_dir,
George Burgess IVfb0a1c42022-11-15 13:47:19 -0700873 "{}-{}".format(package_name, package_version),
George Burgess IV7dffc252022-08-31 14:37:01 -0700874 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700875 if not os.path.isdir(pkg_path):
George Burgess IV8e2cc042022-10-18 14:50:48 -0600876 print(f"Crate {package_name} not found at {pkg_path}")
George Burgess IV635f7262022-08-09 21:32:20 -0700877 continue
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700878
George Burgess IV8e2cc042022-10-18 14:50:48 -0600879 self._replace_source_contents(
880 pkg_path, compile_error=package_name not in NOP_EMPTY_CRATES
881 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700882 self._modify_cargo_toml(pkg_path)
883 _rerun_checksums(pkg_path)
George Burgess IV4196b082022-11-03 17:10:47 -0600884 cleaned_packages.append((package_name, package_version))
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700885
George Burgess IV4196b082022-11-03 17:10:47 -0600886 for pkg, ver in cleaned_packages:
887 print(f"Removed unused crate {pkg}@{ver}")
888
889 # Write a list of crates that've been destroyed. This is used by
890 # `scripts/cargo-vet.py`.
891 file_header = "# List of destroyed crates autogenerated by vendor.py."
892 file_lines = [f"{pkg} {ver}" for pkg, ver in cleaned_packages]
893 destroyed_crates_file.write_text(
894 "\n".join([file_header] + file_lines), encoding="utf-8"
895 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700896
George Burgess IV7dffc252022-08-31 14:37:01 -0700897
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700898def main(args):
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800899 current_path = pathlib.Path(__file__).parent.absolute()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000900 patches = os.path.join(current_path, "patches")
901 vendor = os.path.join(current_path, "vendor")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700902 crab_dir = os.path.join(current_path, "crab", "crates")
George Burgess IV4196b082022-11-03 17:10:47 -0600903 vendor_artifacts = current_path / "vendor_artifacts"
904 license_shorthand_file = os.path.join(vendor_artifacts, "licenses_used.txt")
905 destroyed_crates_file = vendor_artifacts / "destroyed_crates.txt"
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800906
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700907 # First, actually run cargo vendor
908 run_cargo_vendor(current_path)
909
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000910 # Order matters here:
911 # - Apply patches (also re-calculates checksums)
912 # - Cleanup any owners files (otherwise, git check-in or checksums are
913 # unhappy)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700914 # - Destroy unused crates
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000915 apply_patches(patches, vendor)
916 cleanup_owners(vendor)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700917 destroyer = CrateDestroyer(current_path, vendor)
George Burgess IV4196b082022-11-03 17:10:47 -0600918 destroyer.destroy_unused_crates(destroyed_crates_file)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800919
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700920 # Combine license file and check for any bad licenses
921 lm = LicenseManager(current_path, vendor)
George Burgess IV7dffc252022-08-31 14:37:01 -0700922 lm.generate_license(
923 args.skip_license_check, args.license_map, license_shorthand_file
924 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700925
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700926 # Run crab audit on all packages
927 crab = CrabManager(current_path, crab_dir)
928 crab.verify_traits()
929
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800930
George Burgess IV7dffc252022-08-31 14:37:01 -0700931if __name__ == "__main__":
932 parser = argparse.ArgumentParser(description="Vendor packages properly")
933 parser.add_argument(
934 "--skip-license-check",
935 "-s",
936 help="Skip the license check on a specific package",
937 action="append",
938 )
939 parser.add_argument("--license-map", help="Write license map to this file")
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700940 args = parser.parse_args()
941
942 main(args)