blob: 5de47ffdddf2993c97adb7782b5fe68cc704d6bf [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2012 The ChromiumOS Authors
Marc MERLIN0a621942013-09-30 15:22:38 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Marc MERLIN0a621942013-09-30 15:22:38 -07004
5"""Compares the packages between 2 images by parsing the license file output."""
6
7import re
8
9from chromite.lib import commandline
10
11
Marc MERLIN41585292013-10-09 15:00:49 -070012def GetPackagesLicensesFromHtml(html_file):
Alex Klein1699fab2022-09-08 08:46:06 -060013 """Get the list of packages and licenses in a ChromeOS license file.
Marc MERLIN0a621942013-09-30 15:22:38 -070014
Alex Klein1699fab2022-09-08 08:46:06 -060015 Args:
Alex Klein68b270c2023-04-14 14:42:50 -060016 html_file: which html license file to scan for packages.
Marc MERLIN0a621942013-09-30 15:22:38 -070017
Alex Klein1699fab2022-09-08 08:46:06 -060018 Returns:
Alex Klein68b270c2023-04-14 14:42:50 -060019 tuple of dictionary of packages and version numbers and set of licenses.
Marc MERLINea95f202013-10-02 17:07:00 -070020
Alex Klein1699fab2022-09-08 08:46:06 -060021 Raises:
Alex Klein68b270c2023-04-14 14:42:50 -060022 AssertionError: if regex failed.
Alex Klein1699fab2022-09-08 08:46:06 -060023 """
Marc MERLIN0a621942013-09-30 15:22:38 -070024
Alex Klein1699fab2022-09-08 08:46:06 -060025 packages = {}
26 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070027
Alex Klein1699fab2022-09-08 08:46:06 -060028 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
29 # Do not add <pre> in the regex or it would only show the first entry on
30 # a package that has multiple hits.
31 license_rgx1 = re.compile(r"Scanned (Source License .+):", re.IGNORECASE)
32 license_rgx2 = re.compile(r"(Custom License .+):", re.IGNORECASE)
33 license_rgx3 = re.compile(r"(Copyright Attribution .+):", re.IGNORECASE)
Alex Klein68b270c2023-04-14 14:42:50 -060034 # pylint: disable=line-too-long
Alex Klein1699fab2022-09-08 08:46:06 -060035 # This regex isn't as tight because it has to match these:
36 # Gentoo Package Stock License BZIP2:
37 # <a ... class="title">Gentoo Package Provided Stock License public-domain</a>
38 # <a ... class="title">Gentoo Package Stock License public-domain</a>
Alex Klein68b270c2023-04-14 14:42:50 -060039 # pylint: enable=line-too-long
Alex Klein1699fab2022-09-08 08:46:06 -060040 license_rgx4 = re.compile(r"(Stock License [^<:]+)", re.IGNORECASE)
41 license_rgx5 = re.compile(
42 r'class="title">(Custom License .+)</a>', re.IGNORECASE
43 )
Mike Frysinger31fdddd2023-02-24 15:50:55 -050044 with open(html_file, "r", encoding="utf-8") as f:
Alex Klein1699fab2022-09-08 08:46:06 -060045 for line in f:
46 # Grep and turn
47 # <span class="title">ath6k-34</span>
48 # into
49 # ath6k 34
50 match = pkg_rgx.search(line)
51 if match:
52 packages[match.group(1)] = match.group(2)
Marc MERLIN0a621942013-09-30 15:22:38 -070053
Alex Klein1699fab2022-09-08 08:46:06 -060054 match = license_rgx1.search(line)
55 if match:
56 # Turn Source license simplejson-2.5.0/LICENSE.txt
57 # into Source license simplejson/LICENSE.txt
58 # (we don't want to create diffs based on version numbers)
59 lic = re.sub(r"(.+)-([^/]+)/(.+)", r"\1/\3", match.group(1))
60 # Old files had this lowercased.
61 lic = re.sub(r"Source license", r"Source License", lic)
62 licenses.add(lic)
Marc MERLIN41585292013-10-09 15:00:49 -070063
Alex Klein1699fab2022-09-08 08:46:06 -060064 for rgx in (license_rgx2, license_rgx3, license_rgx4, license_rgx5):
65 match = rgx.search(line)
66 if match:
67 licenses.add(match.group(1))
Marc MERLINea95f202013-10-02 17:07:00 -070068
Alex Klein1699fab2022-09-08 08:46:06 -060069 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070070
71
72def ComparePkgLists(pkg_list1, pkg_list2):
Alex Klein1699fab2022-09-08 08:46:06 -060073 """Compare the package list in 2 dictionaries and output the differences.
Marc MERLIN0a621942013-09-30 15:22:38 -070074
Alex Klein1699fab2022-09-08 08:46:06 -060075 Args:
Alex Klein68b270c2023-04-14 14:42:50 -060076 pkg_list1: dict from GetPackagesLicensesFromHtml.
77 pkg_list2: dict from GetPackagesLicensesFromHtml.
Marc MERLIN0a621942013-09-30 15:22:38 -070078
Alex Klein1699fab2022-09-08 08:46:06 -060079 Returns:
Alex Klein68b270c2023-04-14 14:42:50 -060080 N/A (outputs result on stdout).
Alex Klein1699fab2022-09-08 08:46:06 -060081 """
Marc MERLIN0a621942013-09-30 15:22:38 -070082
Alex Klein1699fab2022-09-08 08:46:06 -060083 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
84 print(
85 "Package removed: %s-%s"
86 % (removed_package, pkg_list1[removed_package])
87 )
Marc MERLIN0a621942013-09-30 15:22:38 -070088
Alex Klein1699fab2022-09-08 08:46:06 -060089 print()
90 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
91 print(
92 "Package added: %s-%s" % (added_package, pkg_list2[added_package])
93 )
Marc MERLIN0a621942013-09-30 15:22:38 -070094
Alex Klein1699fab2022-09-08 08:46:06 -060095 print()
96 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
97 ver1 = pkg_list1[changed_package]
98 ver2 = pkg_list2[changed_package]
99 if ver1 != ver2:
100 print(
101 "Package updated: %s from %s to %s"
102 % (changed_package, ver1, ver2)
103 )
Marc MERLIN0a621942013-09-30 15:22:38 -0700104
105
Marc MERLINea95f202013-10-02 17:07:00 -0700106def CompareLicenseSets(set1, set2):
Alex Klein1699fab2022-09-08 08:46:06 -0600107 """Compare the license list in 2 sets and output the differences.
Marc MERLINea95f202013-10-02 17:07:00 -0700108
Alex Klein1699fab2022-09-08 08:46:06 -0600109 Args:
Alex Klein68b270c2023-04-14 14:42:50 -0600110 set1: set from GetPackagesLicensesFromHtml.
111 set2: set from GetPackagesLicensesFromHtml.
Marc MERLINea95f202013-10-02 17:07:00 -0700112
Alex Klein1699fab2022-09-08 08:46:06 -0600113 Returns:
Alex Klein68b270c2023-04-14 14:42:50 -0600114 N/A (outputs result on stdout).
Alex Klein1699fab2022-09-08 08:46:06 -0600115 """
Marc MERLINea95f202013-10-02 17:07:00 -0700116
Alex Klein1699fab2022-09-08 08:46:06 -0600117 for removed_license in sorted(set1 - set2):
118 print("License removed: %s" % (removed_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700119
Alex Klein1699fab2022-09-08 08:46:06 -0600120 print()
121 for added_license in sorted(set2 - set1):
122 print("License added: %s" % (added_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700123
124
Marc MERLIN0a621942013-09-30 15:22:38 -0700125def main(args):
Alex Klein1699fab2022-09-08 08:46:06 -0600126 parser = commandline.ArgumentParser(description=__doc__)
127 parser.add_argument(
128 "html1", metavar="license1.html", type="path", help="old html file"
129 )
130 parser.add_argument(
131 "html2", metavar="license2.html", type="path", help="new html file"
132 )
133 opts = parser.parse_args(args)
Marc MERLIN0a621942013-09-30 15:22:38 -0700134
Alex Klein1699fab2022-09-08 08:46:06 -0600135 pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
136 pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
137 ComparePkgLists(pkg_list1[0], pkg_list2[0])
138 print()
139 CompareLicenseSets(pkg_list1[1], pkg_list2[1])