blob: 511007257848575ae82f3d8f6d77c620cd81c3cc [file] [log] [blame]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
George Burgess IV9e0cfde2022-09-27 15:08:15 -07003# Copyright 2021 The ChromiumOS Authors
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6""" This script cleans up the vendor directory.
7"""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -07008import argparse
George Burgess IV635f7262022-08-09 21:32:20 -07009import collections
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000010import hashlib
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080011import json
12import os
13import pathlib
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070014import re
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -070015import shutil
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000016import subprocess
George Burgess IV04833702022-08-09 22:00:38 -070017import textwrap
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -070018import toml
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000019
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070020# We only care about crates we're actually going to use and that's usually
21# limited to ones with cfg(linux). For running `cargo metadata`, limit results
22# to only this platform
23DEFAULT_PLATFORM_FILTER = "x86_64-unknown-linux-gnu"
24
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000025
26def _rerun_checksums(package_path):
27 """Re-run checksums for given package.
28
29 Writes resulting checksums to $package_path/.cargo-checksum.json.
30 """
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070031 hashes = dict()
George Burgess IV7dffc252022-08-31 14:37:01 -070032 checksum_path = os.path.join(package_path, ".cargo-checksum.json")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000033 if not pathlib.Path(checksum_path).is_file():
34 return False
35
George Burgess IV7dffc252022-08-31 14:37:01 -070036 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000037 contents = json.load(fread)
38
39 for root, _, files in os.walk(package_path, topdown=True):
40 for f in files:
41 # Don't checksum an existing checksum file
42 if f == ".cargo-checksum.json":
43 continue
44
45 file_path = os.path.join(root, f)
George Burgess IV7dffc252022-08-31 14:37:01 -070046 with open(file_path, "rb") as frb:
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000047 m = hashlib.sha256()
48 m.update(frb.read())
49 d = m.hexdigest()
50
51 # Key is relative to the package path so strip from beginning
52 key = os.path.relpath(file_path, package_path)
53 hashes[key] = d
54
55 if hashes:
George Burgess IV7dffc252022-08-31 14:37:01 -070056 print(
57 "{} regenerated {} hashes".format(package_path, len(hashes.keys()))
58 )
59 contents["files"] = hashes
60 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070061 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000062
63 return True
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080064
65
66def _remove_OWNERS_checksum(root):
George Burgess IV7dffc252022-08-31 14:37:01 -070067 """Delete all OWNERS files from the checksum file.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080068
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000069 Args:
70 root: Root directory for the vendored crate.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080071
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +000072 Returns:
73 True if OWNERS was found and cleaned up. Otherwise False.
74 """
George Burgess IV7dffc252022-08-31 14:37:01 -070075 checksum_path = os.path.join(root, ".cargo-checksum.json")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080076 if not pathlib.Path(checksum_path).is_file():
77 return False
78
George Burgess IV7dffc252022-08-31 14:37:01 -070079 with open(checksum_path, "r") as fread:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080080 contents = json.load(fread)
81
82 del_keys = []
George Burgess IV7dffc252022-08-31 14:37:01 -070083 for cfile in contents["files"]:
84 if "OWNERS" in cfile:
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080085 del_keys.append(cfile)
86
87 for key in del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -070088 del contents["files"][key]
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080089
90 if del_keys:
George Burgess IV7dffc252022-08-31 14:37:01 -070091 print("{} deleted: {}".format(root, del_keys))
92 with open(checksum_path, "w") as fwrite:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -070093 json.dump(contents, fwrite, sort_keys=True)
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -080094
95 return bool(del_keys)
96
97
98def cleanup_owners(vendor_path):
George Burgess IV7dffc252022-08-31 14:37:01 -070099 """Remove owners checksums from the vendor directory.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800100
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000101 We currently do not check in the OWNERS files from vendored crates because
102 they interfere with the find-owners functionality in gerrit. This cleanup
103 simply finds all instances of "OWNERS" in the checksum files within and
104 removes them.
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800105
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000106 Args:
107 vendor_path: Absolute path to vendor directory.
108 """
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800109 deps_cleaned = []
110 for root, dirs, _ in os.walk(vendor_path):
111 for d in dirs:
112 removed = _remove_OWNERS_checksum(os.path.join(root, d))
113 if removed:
114 deps_cleaned.append(d)
115
116 if deps_cleaned:
George Burgess IV7dffc252022-08-31 14:37:01 -0700117 print("Cleanup owners:\n {}".format("\n".join(deps_cleaned)))
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800118
119
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000120def apply_single_patch(patch, workdir):
121 """Apply a single patch and return whether it was successful.
122
123 Returns:
124 True if successful. False otherwise.
125 """
George Burgess IV08664ba2022-10-03 11:09:33 -0700126 proc = subprocess.run(
127 [
128 "patch",
129 "-p1",
130 "--no-backup-if-mismatch",
131 "-i",
132 patch,
133 ],
134 cwd=workdir,
135 )
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000136 return proc.returncode == 0
137
138
George Burgess IV30c5c362022-08-19 17:05:02 -0700139def apply_patch_script(script, workdir):
140 """Run the given patch script, returning whether it exited cleanly.
141
142 Returns:
143 True if successful. False otherwise.
144 """
145 return subprocess.run([script], cwd=workdir).returncode == 0
146
147
George Burgess IV635f7262022-08-09 21:32:20 -0700148def determine_vendor_crates(vendor_path):
149 """Returns a map of {crate_name: [directory]} at the given vendor_path."""
150 result = collections.defaultdict(list)
151 for crate_name_plus_ver in os.listdir(vendor_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700152 name, _ = crate_name_plus_ver.rsplit("-", 1)
George Burgess IV40cc91c2022-08-15 13:07:40 -0700153 result[name].append(crate_name_plus_ver)
George Burgess IV635f7262022-08-09 21:32:20 -0700154
155 for crate_list in result.values():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700156 crate_list.sort()
George Burgess IV635f7262022-08-09 21:32:20 -0700157 return result
158
159
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000160def apply_patches(patches_path, vendor_path):
161 """Finds patches and applies them to sub-folders in the vendored crates.
162
163 Args:
164 patches_path: Path to folder with patches. Expect all patches to be one
165 level down (matching the crate name).
166 vendor_path: Root path to vendored crates directory.
167 """
168 checksums_for = {}
169
170 # Don't bother running if patches directory is empty
171 if not pathlib.Path(patches_path).is_dir():
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700172 return
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000173
George Burgess IV30c5c362022-08-19 17:05:02 -0700174 patches_failed = False
George Burgess IV635f7262022-08-09 21:32:20 -0700175 vendor_crate_map = determine_vendor_crates(vendor_path)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000176 # Look for all patches and apply them
177 for d in os.listdir(patches_path):
178 dir_path = os.path.join(patches_path, d)
179
180 # We don't process patches in root dir
181 if not os.path.isdir(dir_path):
182 continue
183
George Burgess IV30c5c362022-08-19 17:05:02 -0700184 # We accept one of two forms here:
185 # - direct targets (these name # `${crate_name}-${version}`)
186 # - simply the crate name (which applies to all versions of the
187 # crate)
188 direct_target = os.path.join(vendor_path, d)
189 if os.path.isdir(direct_target):
190 patch_targets = [d]
191 elif d in vendor_crate_map:
192 patch_targets = vendor_crate_map[d]
193 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700194 raise RuntimeError(f"Unknown crate in {vendor_path}: {d}")
George Burgess IV30c5c362022-08-19 17:05:02 -0700195
George Burgess IV635f7262022-08-09 21:32:20 -0700196 for patch in os.listdir(dir_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000197 file_path = os.path.join(dir_path, patch)
198
199 # Skip if not a patch file
George Burgess IV30c5c362022-08-19 17:05:02 -0700200 if not os.path.isfile(file_path):
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000201 continue
202
George Burgess IV30c5c362022-08-19 17:05:02 -0700203 if patch.endswith(".patch"):
204 apply = apply_single_patch
205 elif os.access(file_path, os.X_OK):
206 apply = apply_patch_script
George Burgess IV635f7262022-08-09 21:32:20 -0700207 else:
George Burgess IV30c5c362022-08-19 17:05:02 -0700208 # Unrecognized. Skip it.
209 continue
210
211 for target_name in patch_targets:
212 checksums_for[target_name] = True
213 target = os.path.join(vendor_path, target_name)
214 print(f"-- Applying {file_path} to {target}")
215 if not apply(file_path, target):
216 print(f"Failed to apply {file_path} to {target}")
217 patches_failed = True
218
219 # Do this late, so we can report all of the failing patches in one
220 # invocation.
221 if patches_failed:
George Burgess IV7dffc252022-08-31 14:37:01 -0700222 raise ValueError("Patches failed; please see above logs")
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000223
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000224 # Re-run checksums for all modified packages since we applied patches.
225 for key in checksums_for.keys():
226 _rerun_checksums(os.path.join(vendor_path, key))
227
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700228
George Burgess IV18af5632022-08-30 14:10:53 -0700229def get_workspace_cargo_toml(working_dir):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700230 """Returns all Cargo.toml files under working_dir."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700231 return [working_dir / "projects" / "Cargo.toml"]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700232
233
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700234def run_cargo_vendor(working_dir):
235 """Runs cargo vendor.
236
237 Args:
238 working_dir: Directory to run inside. This should be the directory where
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700239 Cargo.toml is kept.
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700240 """
George Burgess IV635f7262022-08-09 21:32:20 -0700241 # Cargo will refuse to revendor into versioned directories, which leads to
242 # repeated `./vendor.py` invocations trying to apply patches to
243 # already-patched sources. Remove the existing vendor directory to avoid
244 # this.
George Burgess IV7dffc252022-08-31 14:37:01 -0700245 vendor_dir = working_dir / "vendor"
George Burgess IV635f7262022-08-09 21:32:20 -0700246 if vendor_dir.exists():
George Burgess IV40cc91c2022-08-15 13:07:40 -0700247 shutil.rmtree(vendor_dir)
248
George Burgess IV18af5632022-08-30 14:10:53 -0700249 cargo_cmdline = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700250 "cargo",
251 "vendor",
252 "--versioned-dirs",
253 "-v",
254 "--manifest-path=projects/Cargo.toml",
255 "--",
256 "vendor",
George Burgess IV18af5632022-08-30 14:10:53 -0700257 ]
George Burgess IV40cc91c2022-08-15 13:07:40 -0700258 subprocess.check_call(cargo_cmdline, cwd=working_dir)
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000259
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700260
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700261def load_metadata(working_dir, filter_platform=DEFAULT_PLATFORM_FILTER):
George Burgess IV40cc91c2022-08-15 13:07:40 -0700262 """Load metadata for all projects under a given directory.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700263
264 Args:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700265 working_dir: Base directory to run from.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700266 filter_platform: Filter packages to ones configured for this platform.
267 """
George Burgess IV40cc91c2022-08-15 13:07:40 -0700268 metadata_objects = []
George Burgess IV18af5632022-08-30 14:10:53 -0700269 cmd = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700270 "cargo",
271 "metadata",
272 "--format-version=1",
273 "--manifest-path=projects/Cargo.toml",
George Burgess IV18af5632022-08-30 14:10:53 -0700274 ]
275 # Conditionally add platform filter
276 if filter_platform:
277 cmd += ("--filter-platform", filter_platform)
278 output = subprocess.check_output(cmd, cwd=working_dir)
279 return json.loads(output)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700280
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700281
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700282class LicenseManager:
George Burgess IV7dffc252022-08-31 14:37:01 -0700283 """Manage consolidating licenses for all packages."""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700284
George Burgess IV124e6a12022-09-09 10:44:29 -0700285 # These are all the licenses we support. Keys are what is seen in metadata
286 # and values are what is expected by ebuilds.
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700287 SUPPORTED_LICENSES = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700288 "0BSD": "0BSD",
289 "Apache-2.0": "Apache-2.0",
290 "BSD-3-Clause": "BSD-3",
291 "ISC": "ISC",
292 "MIT": "MIT",
293 "MPL-2.0": "MPL-2.0",
294 "unicode": "unicode",
Dan Callaghan91f80542022-09-09 10:57:23 +1000295 "Zlib": "ZLIB",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700296 }
297
298 # Prefer to take attribution licenses in this order. All these require that
299 # we actually use the license file found in the package so they MUST have
300 # a license file set.
George Burgess IV7dffc252022-08-31 14:37:01 -0700301 PREFERRED_ATTRIB_LICENSE_ORDER = ["MIT", "BSD-3", "ISC"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700302
303 # If Apache license is found, always prefer it (simplifies attribution)
George Burgess IV7dffc252022-08-31 14:37:01 -0700304 APACHE_LICENSE = "Apache-2.0"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700305
306 # Regex for license files found in the vendored directories. Search for
307 # these files with re.IGNORECASE.
308 #
309 # These will be searched in order with the earlier entries being preferred.
310 LICENSE_NAMES_REGEX = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700311 r"^license-mit$",
312 r"^copyright$",
313 r"^licen[cs]e.*$",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700314 ]
315
316 # Some crates have their license file in other crates. This usually occurs
317 # because multiple crates are published from the same git repository and the
318 # license isn't updated in each sub-crate. In these cases, we can just
319 # ignore these packages.
320 MAP_LICENSE_TO_OTHER = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700321 "failure_derive": "failure",
322 "grpcio-compiler": "grpcio",
323 "grpcio-sys": "grpcio",
324 "rustyline-derive": "rustyline",
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700325 }
326
327 # Map a package to a specific license and license file. Only use this if
328 # a package doesn't have an easily discoverable license or exports its
329 # license in a weird way. Prefer to patch the project with a license and
330 # upstream the patch instead.
331 STATIC_LICENSE_MAP = {
332 # "package name": ( "license name", "license file relative location")
George Burgess IVf4a5e362022-08-30 14:30:36 -0700333 # Patch for adding this is upstream, but the patch application doesn't
334 # apply to `cargo metadata`. This is presumably because it can't detect
335 # our vendor directory.
336 # https://gitlab.freedesktop.org/slirp/libslirp-sys/-/merge_requests/6
George Burgess IV7dffc252022-08-31 14:37:01 -0700337 "libslirp-sys": ("MIT", "LICENSE"),
Dan Callaghan91f80542022-09-09 10:57:23 +1000338 # Upstream prefers to embed license text inside README.md:
339 "riscv": ("ISC", "README.md"),
340 "riscv-rt": ("ISC", "README.md"),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700341 }
342
343 def __init__(self, working_dir, vendor_dir):
344 self.working_dir = working_dir
345 self.vendor_dir = vendor_dir
346
347 def _find_license_in_dir(self, search_dir):
348 for p in os.listdir(search_dir):
349 # Ignore anything that's not a file
350 if not os.path.isfile(os.path.join(search_dir, p)):
351 continue
352
353 # Now check if the name matches any of the regexes
354 # We'll return the first matching file.
355 for regex in self.LICENSE_NAMES_REGEX:
356 if re.search(regex, p, re.IGNORECASE):
357 yield os.path.join(search_dir, p)
358 break
359
360 def _guess_license_type(self, license_file):
George Burgess IV7dffc252022-08-31 14:37:01 -0700361 if "-MIT" in license_file:
362 return "MIT"
363 elif "-APACHE" in license_file:
364 return "APACHE"
365 elif "-BSD" in license_file:
366 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700367
George Burgess IV7dffc252022-08-31 14:37:01 -0700368 with open(license_file, "r") as f:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700369 lines = f.read()
George Burgess IV7dffc252022-08-31 14:37:01 -0700370 if "MIT" in lines:
371 return "MIT"
372 elif "Apache" in lines:
373 return "APACHE"
374 elif "BSD 3-Clause" in lines:
375 return "BSD-3"
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700376
George Burgess IV7dffc252022-08-31 14:37:01 -0700377 return ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700378
George Burgess IV7dffc252022-08-31 14:37:01 -0700379 def generate_license(
380 self, skip_license_check, print_map_to_file, license_shorthand_file
381 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700382 """Generate single massive license file from metadata."""
George Burgess IV18af5632022-08-30 14:10:53 -0700383 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700384
385 has_license_types = set()
386 bad_licenses = {}
387
388 # Keep license map ordered so it generates a consistent license map
389 license_map = {}
390
391 skip_license_check = skip_license_check or []
George Burgess IV4ae42062022-08-15 18:54:51 -0700392 has_unicode_license = False
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700393
George Burgess IV18af5632022-08-30 14:10:53 -0700394 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700395 # Skip the synthesized Cargo.toml packages that exist solely to
396 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700397 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700398 continue
399
George Burgess IV7dffc252022-08-31 14:37:01 -0700400 pkg_name = package["name"]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700401 if pkg_name in skip_license_check:
402 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700403 "Skipped license check on {}. Reason: Skipped from command line".format(
404 pkg_name
405 )
406 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700407 continue
408
409 if pkg_name in self.MAP_LICENSE_TO_OTHER:
410 print(
George Burgess IV7dffc252022-08-31 14:37:01 -0700411 "Skipped license check on {}. Reason: License already in {}".format(
412 pkg_name, self.MAP_LICENSE_TO_OTHER[pkg_name]
413 )
414 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700415 continue
416
417 # Check if we have a static license map for this package. Use the
418 # static values if we have it already set.
419 if pkg_name in self.STATIC_LICENSE_MAP:
420 (license, license_file) = self.STATIC_LICENSE_MAP[pkg_name]
421 license_map[pkg_name] = {
422 "license": license,
423 "license_file": license_file,
424 }
425 continue
426
427 license_files = []
George Burgess IV93ba4732022-08-13 14:10:10 -0700428 # use `or ''` instead of get's default, since `package` may have a
429 # None value for 'license'.
George Burgess IV7dffc252022-08-31 14:37:01 -0700430 license = package.get("license") or ""
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700431
432 # We ignore the metadata for license file because most crates don't
433 # have it set. Just scan the source for licenses.
George Burgess IV7dffc252022-08-31 14:37:01 -0700434 pkg_version = package["version"]
435 license_files = list(
436 self._find_license_in_dir(
437 os.path.join(self.vendor_dir, f"{pkg_name}-{pkg_version}")
438 )
439 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700440
George Burgess IV4ae42062022-08-15 18:54:51 -0700441 # FIXME(b/240953811): The code later in this loop is only
442 # structured to handle ORs, not ANDs. Fortunately, this license in
443 # particular is `AND`ed between a super common license (Apache) and
444 # a more obscure one (unicode). This hack is specifically intended
445 # for the `unicode-ident` crate, though no crate name check is
446 # made, since it's OK other crates happen to have this license.
George Burgess IV7dffc252022-08-31 14:37:01 -0700447 if license == "(MIT OR Apache-2.0) AND Unicode-DFS-2016":
George Burgess IV4ae42062022-08-15 18:54:51 -0700448 has_unicode_license = True
449 # We'll check later to be sure MIT or Apache-2.0 is represented
450 # properly.
451 for x in license_files:
George Burgess IV7dffc252022-08-31 14:37:01 -0700452 if os.path.basename(x) == "LICENSE-UNICODE":
George Burgess IV4ae42062022-08-15 18:54:51 -0700453 license_file = x
454 break
455 else:
George Burgess IV7dffc252022-08-31 14:37:01 -0700456 raise ValueError(
457 "No LICENSE-UNICODE found in " f"{license_files}"
458 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700459 license_map[pkg_name] = {
460 "license": license,
461 "license_file": license_file,
462 }
George Burgess IV7dffc252022-08-31 14:37:01 -0700463 has_license_types.add("unicode")
George Burgess IV4ae42062022-08-15 18:54:51 -0700464 continue
465
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700466 # If there are multiple licenses, they are delimited with "OR" or "/"
George Burgess IV7dffc252022-08-31 14:37:01 -0700467 delim = " OR " if " OR " in license else "/"
George Burgess IV40cc91c2022-08-15 13:07:40 -0700468 found = [x.strip() for x in license.split(delim)]
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700469
470 # Filter licenses to ones we support
471 licenses_or = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700472 self.SUPPORTED_LICENSES[f]
473 for f in found
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700474 if f in self.SUPPORTED_LICENSES
475 ]
476
477 # If apache license is found, always prefer it because it simplifies
478 # license attribution (we can use existing Apache notice)
479 if self.APACHE_LICENSE in licenses_or:
480 has_license_types.add(self.APACHE_LICENSE)
George Burgess IV7dffc252022-08-31 14:37:01 -0700481 license_map[pkg_name] = {"license": self.APACHE_LICENSE}
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700482
483 # Handle single license that has at least one license file
484 # We pick the first license file and the license
485 elif len(licenses_or) == 1:
486 if license_files:
487 l = licenses_or[0]
488 lf = license_files[0]
489
490 has_license_types.add(l)
491 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700492 "license": l,
493 "license_file": os.path.relpath(lf, self.working_dir),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700494 }
495 else:
496 bad_licenses[pkg_name] = "{} missing license file".format(
George Burgess IV7dffc252022-08-31 14:37:01 -0700497 licenses_or[0]
498 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700499 # Handle multiple licenses
500 elif len(licenses_or) > 1:
501 # Check preferred licenses in order
502 license_found = False
503 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
504 if not l in licenses_or:
505 continue
506
507 for f in license_files:
508 if self._guess_license_type(f) == l:
509 license_found = True
510 has_license_types.add(l)
511 license_map[pkg_name] = {
George Burgess IV7dffc252022-08-31 14:37:01 -0700512 "license": l,
513 "license_file": os.path.relpath(
514 f, self.working_dir
515 ),
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700516 }
517 break
518
519 # Break out of loop if license is found
520 if license_found:
521 break
522 else:
523 bad_licenses[pkg_name] = license
524
525 # If we had any bad licenses, we need to abort
526 if bad_licenses:
527 for k in bad_licenses.keys():
George Burgess IV7dffc252022-08-31 14:37:01 -0700528 print(
529 "{} had no acceptable licenses: {}".format(
530 k, bad_licenses[k]
531 )
532 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700533 raise Exception("Bad licenses in vendored packages.")
534
535 # Write license map to file
536 if print_map_to_file:
George Burgess IV7dffc252022-08-31 14:37:01 -0700537 with open(
538 os.path.join(self.working_dir, print_map_to_file), "w"
539 ) as lfile:
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700540 json.dump(license_map, lfile, sort_keys=True)
541
542 # Raise missing licenses unless we have a valid reason to ignore them
543 raise_missing_license = False
544 for name, v in license_map.items():
George Burgess IV7dffc252022-08-31 14:37:01 -0700545 if (
546 "license_file" not in v
547 and v.get("license", "") != self.APACHE_LICENSE
548 ):
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700549 raise_missing_license = True
George Burgess IV7dffc252022-08-31 14:37:01 -0700550 print(
551 " {}: Missing license file. Fix or add to ignorelist.".format(
552 name
553 )
554 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700555
556 if raise_missing_license:
557 raise Exception(
558 "Unhandled missing license file. "
George Burgess IV7dffc252022-08-31 14:37:01 -0700559 "Make sure all are accounted for before continuing."
560 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700561
George Burgess IV4ae42062022-08-15 18:54:51 -0700562 if has_unicode_license:
563 if self.APACHE_LICENSE not in has_license_types:
George Burgess IV7dffc252022-08-31 14:37:01 -0700564 raise ValueError(
565 "Need the apache license; currently have: "
566 f"{sorted(has_license_types)}"
567 )
George Burgess IV4ae42062022-08-15 18:54:51 -0700568
George Burgess IV04833702022-08-09 22:00:38 -0700569 sorted_licenses = sorted(has_license_types)
George Burgess IV124e6a12022-09-09 10:44:29 -0700570 print("The following licenses are in use:", sorted_licenses)
George Burgess IV7dffc252022-08-31 14:37:01 -0700571 header = textwrap.dedent(
572 """\
George Burgess IV04833702022-08-09 22:00:38 -0700573 # File to describe the licenses used by this registry.
Daniel Verkampd9d085b2022-09-07 10:52:27 -0700574 # Used so it's easy to automatically verify ebuilds are updated.
George Burgess IV04833702022-08-09 22:00:38 -0700575 # Each line is a license. Lines starting with # are comments.
George Burgess IV7dffc252022-08-31 14:37:01 -0700576 """
577 )
578 with open(license_shorthand_file, "w", encoding="utf-8") as f:
George Burgess IV04833702022-08-09 22:00:38 -0700579 f.write(header)
George Burgess IV7dffc252022-08-31 14:37:01 -0700580 f.write("\n".join(sorted_licenses))
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700581
582
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700583# TODO(abps) - This needs to be replaced with datalog later. We should compile
584# all crab files into datalog and query it with our requirements
585# instead.
586class CrabManager:
587 """Manage audit files."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700588
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700589 def __init__(self, working_dir, crab_dir):
590 self.working_dir = working_dir
591 self.crab_dir = crab_dir
592
593 def _check_bad_traits(self, crabdata):
594 """Checks that a package's crab audit meets our requirements.
595
596 Args:
597 crabdata: Dict with crab keys in standard templated format.
598 """
George Burgess IV7dffc252022-08-31 14:37:01 -0700599 common = crabdata["common"]
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700600 # TODO(b/200578411) - Figure out what conditions we should enforce as
601 # part of the audit.
602 conditions = [
George Burgess IV7dffc252022-08-31 14:37:01 -0700603 common.get("deny", None),
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700604 ]
605
606 # If any conditions are true, this crate is not acceptable.
607 return any(conditions)
608
609 def verify_traits(self):
George Burgess IV7dffc252022-08-31 14:37:01 -0700610 """Verify that all required CRAB traits for this repository are met."""
George Burgess IV18af5632022-08-30 14:10:53 -0700611 metadata = load_metadata(self.working_dir)
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700612
613 failing_crates = {}
614
615 # Verify all packages have a CRAB file associated with it and they meet
616 # all our required traits
George Burgess IV18af5632022-08-30 14:10:53 -0700617 for package in metadata["packages"]:
George Burgess IV40cc91c2022-08-15 13:07:40 -0700618 # Skip the synthesized Cargo.toml packages that exist solely to
619 # list dependencies.
George Burgess IV7dffc252022-08-31 14:37:01 -0700620 if "path+file:///" in package["id"]:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700621 continue
622
George Burgess IV7dffc252022-08-31 14:37:01 -0700623 crabname = "{}-{}".format(package["name"], package["version"])
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700624 filename = os.path.join(self.crab_dir, "{}.toml".format(crabname))
625
626 # If crab file doesn't exist, the crate fails
627 if not os.path.isfile(filename):
628 failing_crates[crabname] = "No crab file".format(filename)
629 continue
630
George Burgess IV7dffc252022-08-31 14:37:01 -0700631 with open(filename, "r") as f:
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700632 crabdata = toml.loads(f.read())
633
634 # If crab file's crate_name and version keys don't match this
635 # package, it also fails. This is just housekeeping...
George Burgess IV7dffc252022-08-31 14:37:01 -0700636 if (
637 package["name"] != crabdata["crate_name"]
638 or package["version"] != crabdata["version"]
639 ):
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700640 failing_crates[crabname] = "Crate name or version don't match"
641 continue
642
643 if self._check_bad_traits(crabdata):
644 failing_crates[crabname] = "Failed bad traits check"
645
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700646 # If we had any failing crates, list them now, and exit with an error.
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700647 if failing_crates:
George Burgess IV7dffc252022-08-31 14:37:01 -0700648 print("Failed CRAB audit:")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700649 for k, v in failing_crates.items():
George Burgess IV9e0cfde2022-09-27 15:08:15 -0700650 print(f" {k}: {v}")
651 raise ValueError("CRAB audit did not complete successfully.")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700652
653
George Burgess IVd4ff0502022-08-14 23:27:57 -0700654def clean_features_in_place(cargo_toml):
655 """Removes all side-effects of features in `cargo_toml`."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700656 features = cargo_toml.get("features")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700657 if not features:
658 return
659
660 for name, value in features.items():
George Burgess IV7dffc252022-08-31 14:37:01 -0700661 if name != "default":
George Burgess IVd4ff0502022-08-14 23:27:57 -0700662 features[name] = []
663
664
George Burgess IV0313d782022-08-15 23:45:44 -0700665def remove_all_target_dependencies_in_place(cargo_toml):
George Burgess IVd4ff0502022-08-14 23:27:57 -0700666 """Removes all `target.*.dependencies` from `cargo_toml`."""
George Burgess IV7dffc252022-08-31 14:37:01 -0700667 target = cargo_toml.get("target")
George Burgess IVd4ff0502022-08-14 23:27:57 -0700668 if not target:
669 return
George Burgess IV0313d782022-08-15 23:45:44 -0700670
George Burgess IVd4ff0502022-08-14 23:27:57 -0700671 empty_keys = []
672 for key, values in target.items():
George Burgess IV7dffc252022-08-31 14:37:01 -0700673 values.pop("dependencies", None)
674 values.pop("dev-dependencies", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700675 if not values:
676 empty_keys.append(key)
George Burgess IV0313d782022-08-15 23:45:44 -0700677
George Burgess IVd4ff0502022-08-14 23:27:57 -0700678 if len(empty_keys) == len(target):
George Burgess IV7dffc252022-08-31 14:37:01 -0700679 del cargo_toml["target"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700680 else:
681 for key in empty_keys:
682 del target[key]
George Burgess IV0313d782022-08-15 23:45:44 -0700683
684
George Burgess IV7dffc252022-08-31 14:37:01 -0700685class CrateDestroyer:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700686 LIB_RS_BODY = """compile_error!("This crate cannot be built for this configuration.");\n"""
687
688 def __init__(self, working_dir, vendor_dir):
689 self.working_dir = working_dir
690 self.vendor_dir = vendor_dir
691
692 def _modify_cargo_toml(self, pkg_path):
George Burgess IV7dffc252022-08-31 14:37:01 -0700693 with open(os.path.join(pkg_path, "Cargo.toml"), "r") as cargo:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700694 contents = toml.load(cargo)
695
George Burgess IV7dffc252022-08-31 14:37:01 -0700696 package = contents["package"]
George Burgess IVd4ff0502022-08-14 23:27:57 -0700697
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700698 # Change description, license and delete license key
George Burgess IV7dffc252022-08-31 14:37:01 -0700699 package["description"] = "Empty crate that should not build."
700 package["license"] = "Apache-2.0"
George Burgess IVd4ff0502022-08-14 23:27:57 -0700701
George Burgess IV7dffc252022-08-31 14:37:01 -0700702 package.pop("license_file", None)
George Burgess IVd4ff0502022-08-14 23:27:57 -0700703 # If there's no build.rs but we specify `links = "foo"`, Cargo gets
704 # upset.
George Burgess IV7dffc252022-08-31 14:37:01 -0700705 package.pop("links", None)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700706
George Burgess IV0313d782022-08-15 23:45:44 -0700707 # Some packages have cfg-specific dependencies. Remove them here; we
708 # don't care about the dependencies of an empty package.
709 #
710 # This is a load-bearing optimization: `dev-python/toml` doesn't
711 # always round-trip dumps(loads(x)) correctly when `x` has keys with
712 # strings (b/242589711#comment3). The place this has bitten us so far
713 # is target dependencies, which can be harmlessly removed for now.
George Burgess IVd4ff0502022-08-14 23:27:57 -0700714 #
715 # Cleaning features in-place is also necessary, since we're removing
716 # dependencies, and a feature can enable features in dependencies.
717 # Cargo errors out on `[features] foo = "bar/baz"` if `bar` isn't a
718 # dependency.
719 clean_features_in_place(contents)
George Burgess IV0313d782022-08-15 23:45:44 -0700720 remove_all_target_dependencies_in_place(contents)
721
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700722 with open(os.path.join(pkg_path, "Cargo.toml"), "w") as cargo:
723 toml.dump(contents, cargo)
724
725 def _replace_source_contents(self, package_path):
726 # First load the checksum file before starting
727 checksum_file = os.path.join(package_path, ".cargo-checksum.json")
George Burgess IV7dffc252022-08-31 14:37:01 -0700728 with open(checksum_file, "r") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700729 checksum_contents = json.load(csum)
730
731 # Also load the cargo.toml file which we need to write back
732 cargo_file = os.path.join(package_path, "Cargo.toml")
George Burgess IV7dffc252022-08-31 14:37:01 -0700733 with open(cargo_file, "rb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700734 cargo_contents = cfile.read()
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700735
736 shutil.rmtree(package_path)
737
738 # Make package and src dirs and replace lib.rs
739 os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
740 with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
741 librs.write(self.LIB_RS_BODY)
742
743 # Restore cargo.toml
George Burgess IV7dffc252022-08-31 14:37:01 -0700744 with open(cargo_file, "wb") as cfile:
George Burgess IV3e344e42022-08-09 21:07:04 -0700745 cfile.write(cargo_contents)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700746
747 # Restore checksum
George Burgess IV7dffc252022-08-31 14:37:01 -0700748 with open(checksum_file, "w") as csum:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700749 json.dump(checksum_contents, csum)
750
751 def destroy_unused_crates(self):
George Burgess IV18af5632022-08-30 14:10:53 -0700752 metadata = load_metadata(self.working_dir, filter_platform=None)
George Burgess IV7dffc252022-08-31 14:37:01 -0700753 used_packages = {
754 p["name"] for p in load_metadata(self.working_dir)["packages"]
755 }
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700756
757 cleaned_packages = []
George Burgess IV40cc91c2022-08-15 13:07:40 -0700758 # Since we're asking for _all_ metadata packages, we may see
759 # duplication.
George Burgess IV18af5632022-08-30 14:10:53 -0700760 for package in metadata["packages"]:
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700761 # Skip used packages
762 if package["name"] in used_packages:
763 continue
764
765 # Detect the correct package path to destroy
George Burgess IV7dffc252022-08-31 14:37:01 -0700766 pkg_path = os.path.join(
767 self.vendor_dir,
768 "{}-{}".format(package["name"], package["version"]),
769 )
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700770 if not os.path.isdir(pkg_path):
George Burgess IV635f7262022-08-09 21:32:20 -0700771 print(f'Crate {package["name"]} not found at {pkg_path}')
772 continue
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700773
774 self._replace_source_contents(pkg_path)
775 self._modify_cargo_toml(pkg_path)
776 _rerun_checksums(pkg_path)
777 cleaned_packages.append(package["name"])
778
779 for pkg in cleaned_packages:
George Burgess IV635f7262022-08-09 21:32:20 -0700780 print("Removed unused crate", pkg)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700781
George Burgess IV7dffc252022-08-31 14:37:01 -0700782
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700783def main(args):
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800784 current_path = pathlib.Path(__file__).parent.absolute()
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000785 patches = os.path.join(current_path, "patches")
786 vendor = os.path.join(current_path, "vendor")
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700787 crab_dir = os.path.join(current_path, "crab", "crates")
George Burgess IV04833702022-08-09 22:00:38 -0700788 license_shorthand_file = os.path.join(current_path, "licenses_used.txt")
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800789
Abhishek Pandit-Subedifa902382021-08-20 11:04:33 -0700790 # First, actually run cargo vendor
791 run_cargo_vendor(current_path)
792
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000793 # Order matters here:
794 # - Apply patches (also re-calculates checksums)
795 # - Cleanup any owners files (otherwise, git check-in or checksums are
796 # unhappy)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700797 # - Destroy unused crates
Abhishek Pandit-Subedi5065a0f2021-06-13 20:38:55 +0000798 apply_patches(patches, vendor)
799 cleanup_owners(vendor)
Abhishek Pandit-Subedif0eb6e02021-09-24 16:36:12 -0700800 destroyer = CrateDestroyer(current_path, vendor)
801 destroyer.destroy_unused_crates()
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800802
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700803 # Combine license file and check for any bad licenses
804 lm = LicenseManager(current_path, vendor)
George Burgess IV7dffc252022-08-31 14:37:01 -0700805 lm.generate_license(
806 args.skip_license_check, args.license_map, license_shorthand_file
807 )
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700808
Abhishek Pandit-Subedice0f5b22021-09-10 15:50:08 -0700809 # Run crab audit on all packages
810 crab = CrabManager(current_path, crab_dir)
811 crab.verify_traits()
812
Abhishek Pandit-Subedib75bd562021-02-25 15:32:22 -0800813
George Burgess IV7dffc252022-08-31 14:37:01 -0700814if __name__ == "__main__":
815 parser = argparse.ArgumentParser(description="Vendor packages properly")
816 parser.add_argument(
817 "--skip-license-check",
818 "-s",
819 help="Skip the license check on a specific package",
820 action="append",
821 )
822 parser.add_argument("--license-map", help="Write license map to this file")
Abhishek Pandit-Subedie393cb72021-08-22 10:41:13 -0700823 args = parser.parse_args()
824
825 main(args)