blob: d0d31f470d2d00b35ac97028780670f5ce292001 [file] [log] [blame]
Marc MERLIN0a621942013-09-30 15:22:38 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Marc MERLIN0a621942013-09-30 15:22:38 -07004
5"""Compares the packages between 2 images by parsing the license file output."""
6
7import re
8
9from chromite.lib import commandline
10
11
Marc MERLIN41585292013-10-09 15:00:49 -070012def GetPackagesLicensesFromHtml(html_file):
13 """Get the list of packages and licenses in a ChromeOS license file.
Marc MERLIN0a621942013-09-30 15:22:38 -070014
15 Args:
16 html_file: which html license file to scan for packages.
17
18 Returns:
Marc MERLINea95f202013-10-02 17:07:00 -070019 tuple of dictionary of packages and version numbers and set of licenses.
20
21 Raises:
22 AssertionError: if regex failed.
Marc MERLIN0a621942013-09-30 15:22:38 -070023 """
24
25 packages = {}
Marc MERLINea95f202013-10-02 17:07:00 -070026 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070027
Marc MERLINea95f202013-10-02 17:07:00 -070028 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
Marc MERLIN3561ad82013-10-29 14:18:47 -070029 # Do not add <pre> in the regex or it would only show the first entry on
30 # a package that has multiple hits.
Marc MERLIN41585292013-10-09 15:00:49 -070031 license_rgx1 = re.compile(r'Scanned (Source License .+):', re.IGNORECASE)
Marc MERLIN3561ad82013-10-29 14:18:47 -070032 license_rgx2 = re.compile(r'(Custom License .+):', re.IGNORECASE)
33 license_rgx3 = re.compile(r'(Copyright Attribution .+):', re.IGNORECASE)
34 # This regex isn't as tight because it has to match these:
Marc MERLIN41585292013-10-09 15:00:49 -070035 # Gentoo Package Stock License BZIP2:
Marc MERLIN3561ad82013-10-29 14:18:47 -070036 # <a ... class="title">Gentoo Package Provided Stock License public-domain</a>
Marc MERLIN41585292013-10-09 15:00:49 -070037 # <a ... class="title">Gentoo Package Stock License public-domain</a>
Marc MERLIN3561ad82013-10-29 14:18:47 -070038 license_rgx4 = re.compile(r'(Stock License [^<:]+)', re.IGNORECASE)
39 license_rgx5 = re.compile(r'class="title">(Custom License .+)</a>',
Marc MERLIN41585292013-10-09 15:00:49 -070040 re.IGNORECASE)
Marc MERLIN0a621942013-09-30 15:22:38 -070041 with open(html_file, 'r') as f:
42 for line in f:
Marc MERLINea95f202013-10-02 17:07:00 -070043 # Grep and turn
44 # <span class="title">ath6k-34</span>
45 # into
46 # ath6k 34
47 match = pkg_rgx.search(line)
Marc MERLIN0a621942013-09-30 15:22:38 -070048 if match:
49 packages[match.group(1)] = match.group(2)
50
Marc MERLIN41585292013-10-09 15:00:49 -070051 match = license_rgx1.search(line)
Marc MERLINea95f202013-10-02 17:07:00 -070052 if match:
Marc MERLIN41585292013-10-09 15:00:49 -070053 # Turn Source license simplejson-2.5.0/LICENSE.txt
54 # into Source license simplejson/LICENSE.txt
55 # (we don't want to create diffs based on version numbers)
56 lic = re.sub(r'(.+)-([^/]+)/(.+)', r'\1/\3', match.group(1))
57 # Old files had this lowercased.
58 lic = re.sub(r'Source license', r'Source License', lic)
Marc MERLINea95f202013-10-02 17:07:00 -070059 licenses.add(lic)
Marc MERLIN41585292013-10-09 15:00:49 -070060
Marc MERLIN3561ad82013-10-29 14:18:47 -070061 for rgx in (license_rgx2, license_rgx3, license_rgx4, license_rgx5):
Marc MERLIN41585292013-10-09 15:00:49 -070062 match = rgx.search(line)
63 if match:
64 licenses.add(match.group(1))
Marc MERLINea95f202013-10-02 17:07:00 -070065
66 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070067
68
69def ComparePkgLists(pkg_list1, pkg_list2):
70 """Compare the package list in 2 dictionaries and output the differences.
71
72 Args:
Marc MERLIN41585292013-10-09 15:00:49 -070073 pkg_list1: dict from GetPackagesLicensesFromHtml.
74 pkg_list2: dict from GetPackagesLicensesFromHtml.
Marc MERLIN0a621942013-09-30 15:22:38 -070075
76 Returns:
77 N/A (outputs result on stdout).
78 """
79
80 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
Mike Frysinger383367e2014-09-16 15:06:17 -040081 print('Package removed: %s-%s' % (
82 removed_package, pkg_list1[removed_package]))
Marc MERLIN0a621942013-09-30 15:22:38 -070083
Mike Frysinger383367e2014-09-16 15:06:17 -040084 print()
Marc MERLIN0a621942013-09-30 15:22:38 -070085 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
Mike Frysinger383367e2014-09-16 15:06:17 -040086 print('Package added: %s-%s' % (
87 added_package, pkg_list2[added_package]))
Marc MERLIN0a621942013-09-30 15:22:38 -070088
Mike Frysinger383367e2014-09-16 15:06:17 -040089 print()
Marc MERLIN0a621942013-09-30 15:22:38 -070090 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
91 ver1 = pkg_list1[changed_package]
92 ver2 = pkg_list2[changed_package]
93 if ver1 != ver2:
Mike Frysinger383367e2014-09-16 15:06:17 -040094 print('Package updated: %s from %s to %s' % (changed_package, ver1, ver2))
Marc MERLIN0a621942013-09-30 15:22:38 -070095
96
Marc MERLINea95f202013-10-02 17:07:00 -070097def CompareLicenseSets(set1, set2):
98 """Compare the license list in 2 sets and output the differences.
99
100 Args:
Marc MERLIN41585292013-10-09 15:00:49 -0700101 set1: set from GetPackagesLicensesFromHtml.
102 set2: set from GetPackagesLicensesFromHtml.
Marc MERLINea95f202013-10-02 17:07:00 -0700103
104 Returns:
105 N/A (outputs result on stdout).
106 """
107
108 for removed_license in sorted(set1 - set2):
Mike Frysinger383367e2014-09-16 15:06:17 -0400109 print('License removed: %s' % (removed_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700110
Mike Frysinger383367e2014-09-16 15:06:17 -0400111 print()
Marc MERLINea95f202013-10-02 17:07:00 -0700112 for added_license in sorted(set2 - set1):
Mike Frysinger383367e2014-09-16 15:06:17 -0400113 print('License added: %s' % (added_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700114
115
Marc MERLIN0a621942013-09-30 15:22:38 -0700116def main(args):
Mike Frysingera78c0042020-07-19 14:40:38 -0400117 parser = commandline.ArgumentParser(description=__doc__)
Marc MERLIN0a621942013-09-30 15:22:38 -0700118 parser.add_argument('html1', metavar='license1.html', type='path',
119 help='old html file')
120 parser.add_argument('html2', metavar='license2.html', type='path',
121 help='new html file')
122 opts = parser.parse_args(args)
123
Marc MERLIN41585292013-10-09 15:00:49 -0700124 pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
125 pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
Marc MERLINea95f202013-10-02 17:07:00 -0700126 ComparePkgLists(pkg_list1[0], pkg_list2[0])
Mike Frysinger383367e2014-09-16 15:06:17 -0400127 print()
Marc MERLINea95f202013-10-02 17:07:00 -0700128 CompareLicenseSets(pkg_list1[1], pkg_list2[1])