blob: 7da574a4cded99b96a084efcbbb0385f4367d163 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2012 The ChromiumOS Authors
Marc MERLIN0a621942013-09-30 15:22:38 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Marc MERLIN0a621942013-09-30 15:22:38 -07004
5"""Compares the packages between 2 images by parsing the license file output."""
6
7import re
8
9from chromite.lib import commandline
10
11
Marc MERLIN41585292013-10-09 15:00:49 -070012def GetPackagesLicensesFromHtml(html_file):
Alex Klein1699fab2022-09-08 08:46:06 -060013 """Get the list of packages and licenses in a ChromeOS license file.
Marc MERLIN0a621942013-09-30 15:22:38 -070014
Alex Klein1699fab2022-09-08 08:46:06 -060015 Args:
16 html_file: which html license file to scan for packages.
Marc MERLIN0a621942013-09-30 15:22:38 -070017
Alex Klein1699fab2022-09-08 08:46:06 -060018 Returns:
19 tuple of dictionary of packages and version numbers and set of licenses.
Marc MERLINea95f202013-10-02 17:07:00 -070020
Alex Klein1699fab2022-09-08 08:46:06 -060021 Raises:
22 AssertionError: if regex failed.
23 """
Marc MERLIN0a621942013-09-30 15:22:38 -070024
Alex Klein1699fab2022-09-08 08:46:06 -060025 packages = {}
26 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070027
Alex Klein1699fab2022-09-08 08:46:06 -060028 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
29 # Do not add <pre> in the regex or it would only show the first entry on
30 # a package that has multiple hits.
31 license_rgx1 = re.compile(r"Scanned (Source License .+):", re.IGNORECASE)
32 license_rgx2 = re.compile(r"(Custom License .+):", re.IGNORECASE)
33 license_rgx3 = re.compile(r"(Copyright Attribution .+):", re.IGNORECASE)
34 # This regex isn't as tight because it has to match these:
35 # Gentoo Package Stock License BZIP2:
36 # <a ... class="title">Gentoo Package Provided Stock License public-domain</a>
37 # <a ... class="title">Gentoo Package Stock License public-domain</a>
38 license_rgx4 = re.compile(r"(Stock License [^<:]+)", re.IGNORECASE)
39 license_rgx5 = re.compile(
40 r'class="title">(Custom License .+)</a>', re.IGNORECASE
41 )
42 with open(html_file, "r") as f:
43 for line in f:
44 # Grep and turn
45 # <span class="title">ath6k-34</span>
46 # into
47 # ath6k 34
48 match = pkg_rgx.search(line)
49 if match:
50 packages[match.group(1)] = match.group(2)
Marc MERLIN0a621942013-09-30 15:22:38 -070051
Alex Klein1699fab2022-09-08 08:46:06 -060052 match = license_rgx1.search(line)
53 if match:
54 # Turn Source license simplejson-2.5.0/LICENSE.txt
55 # into Source license simplejson/LICENSE.txt
56 # (we don't want to create diffs based on version numbers)
57 lic = re.sub(r"(.+)-([^/]+)/(.+)", r"\1/\3", match.group(1))
58 # Old files had this lowercased.
59 lic = re.sub(r"Source license", r"Source License", lic)
60 licenses.add(lic)
Marc MERLIN41585292013-10-09 15:00:49 -070061
Alex Klein1699fab2022-09-08 08:46:06 -060062 for rgx in (license_rgx2, license_rgx3, license_rgx4, license_rgx5):
63 match = rgx.search(line)
64 if match:
65 licenses.add(match.group(1))
Marc MERLINea95f202013-10-02 17:07:00 -070066
Alex Klein1699fab2022-09-08 08:46:06 -060067 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070068
69
70def ComparePkgLists(pkg_list1, pkg_list2):
Alex Klein1699fab2022-09-08 08:46:06 -060071 """Compare the package list in 2 dictionaries and output the differences.
Marc MERLIN0a621942013-09-30 15:22:38 -070072
Alex Klein1699fab2022-09-08 08:46:06 -060073 Args:
74 pkg_list1: dict from GetPackagesLicensesFromHtml.
75 pkg_list2: dict from GetPackagesLicensesFromHtml.
Marc MERLIN0a621942013-09-30 15:22:38 -070076
Alex Klein1699fab2022-09-08 08:46:06 -060077 Returns:
78 N/A (outputs result on stdout).
79 """
Marc MERLIN0a621942013-09-30 15:22:38 -070080
Alex Klein1699fab2022-09-08 08:46:06 -060081 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
82 print(
83 "Package removed: %s-%s"
84 % (removed_package, pkg_list1[removed_package])
85 )
Marc MERLIN0a621942013-09-30 15:22:38 -070086
Alex Klein1699fab2022-09-08 08:46:06 -060087 print()
88 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
89 print(
90 "Package added: %s-%s" % (added_package, pkg_list2[added_package])
91 )
Marc MERLIN0a621942013-09-30 15:22:38 -070092
Alex Klein1699fab2022-09-08 08:46:06 -060093 print()
94 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
95 ver1 = pkg_list1[changed_package]
96 ver2 = pkg_list2[changed_package]
97 if ver1 != ver2:
98 print(
99 "Package updated: %s from %s to %s"
100 % (changed_package, ver1, ver2)
101 )
Marc MERLIN0a621942013-09-30 15:22:38 -0700102
103
Marc MERLINea95f202013-10-02 17:07:00 -0700104def CompareLicenseSets(set1, set2):
Alex Klein1699fab2022-09-08 08:46:06 -0600105 """Compare the license list in 2 sets and output the differences.
Marc MERLINea95f202013-10-02 17:07:00 -0700106
Alex Klein1699fab2022-09-08 08:46:06 -0600107 Args:
108 set1: set from GetPackagesLicensesFromHtml.
109 set2: set from GetPackagesLicensesFromHtml.
Marc MERLINea95f202013-10-02 17:07:00 -0700110
Alex Klein1699fab2022-09-08 08:46:06 -0600111 Returns:
112 N/A (outputs result on stdout).
113 """
Marc MERLINea95f202013-10-02 17:07:00 -0700114
Alex Klein1699fab2022-09-08 08:46:06 -0600115 for removed_license in sorted(set1 - set2):
116 print("License removed: %s" % (removed_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700117
Alex Klein1699fab2022-09-08 08:46:06 -0600118 print()
119 for added_license in sorted(set2 - set1):
120 print("License added: %s" % (added_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700121
122
Marc MERLIN0a621942013-09-30 15:22:38 -0700123def main(args):
Alex Klein1699fab2022-09-08 08:46:06 -0600124 parser = commandline.ArgumentParser(description=__doc__)
125 parser.add_argument(
126 "html1", metavar="license1.html", type="path", help="old html file"
127 )
128 parser.add_argument(
129 "html2", metavar="license2.html", type="path", help="new html file"
130 )
131 opts = parser.parse_args(args)
Marc MERLIN0a621942013-09-30 15:22:38 -0700132
Alex Klein1699fab2022-09-08 08:46:06 -0600133 pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
134 pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
135 ComparePkgLists(pkg_list1[0], pkg_list2[0])
136 print()
137 CompareLicenseSets(pkg_list1[1], pkg_list2[1])