blob: a95a59359c589b37296f9a9147684b3fe1865bb5 [file] [log] [blame]
Marc MERLIN0a621942013-09-30 15:22:38 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Marc MERLIN0a621942013-09-30 15:22:38 -07004
5"""Compares the packages between 2 images by parsing the license file output."""
6
Mike Frysinger383367e2014-09-16 15:06:17 -04007from __future__ import print_function
8
Marc MERLIN0a621942013-09-30 15:22:38 -07009import re
10
11from chromite.lib import commandline
12
13
Marc MERLIN41585292013-10-09 15:00:49 -070014def GetPackagesLicensesFromHtml(html_file):
15 """Get the list of packages and licenses in a ChromeOS license file.
Marc MERLIN0a621942013-09-30 15:22:38 -070016
17 Args:
18 html_file: which html license file to scan for packages.
19
20 Returns:
Marc MERLINea95f202013-10-02 17:07:00 -070021 tuple of dictionary of packages and version numbers and set of licenses.
22
23 Raises:
24 AssertionError: if regex failed.
Marc MERLIN0a621942013-09-30 15:22:38 -070025 """
26
27 packages = {}
Marc MERLINea95f202013-10-02 17:07:00 -070028 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070029
Marc MERLINea95f202013-10-02 17:07:00 -070030 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
Marc MERLIN3561ad82013-10-29 14:18:47 -070031 # Do not add <pre> in the regex or it would only show the first entry on
32 # a package that has multiple hits.
Marc MERLIN41585292013-10-09 15:00:49 -070033 license_rgx1 = re.compile(r'Scanned (Source License .+):', re.IGNORECASE)
Marc MERLIN3561ad82013-10-29 14:18:47 -070034 license_rgx2 = re.compile(r'(Custom License .+):', re.IGNORECASE)
35 license_rgx3 = re.compile(r'(Copyright Attribution .+):', re.IGNORECASE)
36 # This regex isn't as tight because it has to match these:
Marc MERLIN41585292013-10-09 15:00:49 -070037 # Gentoo Package Stock License BZIP2:
Marc MERLIN3561ad82013-10-29 14:18:47 -070038 # <a ... class="title">Gentoo Package Provided Stock License public-domain</a>
Marc MERLIN41585292013-10-09 15:00:49 -070039 # <a ... class="title">Gentoo Package Stock License public-domain</a>
Marc MERLIN3561ad82013-10-29 14:18:47 -070040 license_rgx4 = re.compile(r'(Stock License [^<:]+)', re.IGNORECASE)
41 license_rgx5 = re.compile(r'class="title">(Custom License .+)</a>',
Marc MERLIN41585292013-10-09 15:00:49 -070042 re.IGNORECASE)
Marc MERLIN0a621942013-09-30 15:22:38 -070043 with open(html_file, 'r') as f:
44 for line in f:
Marc MERLINea95f202013-10-02 17:07:00 -070045 # Grep and turn
46 # <span class="title">ath6k-34</span>
47 # into
48 # ath6k 34
49 match = pkg_rgx.search(line)
Marc MERLIN0a621942013-09-30 15:22:38 -070050 if match:
51 packages[match.group(1)] = match.group(2)
52
Marc MERLIN41585292013-10-09 15:00:49 -070053 match = license_rgx1.search(line)
Marc MERLINea95f202013-10-02 17:07:00 -070054 if match:
Marc MERLIN41585292013-10-09 15:00:49 -070055 # Turn Source license simplejson-2.5.0/LICENSE.txt
56 # into Source license simplejson/LICENSE.txt
57 # (we don't want to create diffs based on version numbers)
58 lic = re.sub(r'(.+)-([^/]+)/(.+)', r'\1/\3', match.group(1))
59 # Old files had this lowercased.
60 lic = re.sub(r'Source license', r'Source License', lic)
Marc MERLINea95f202013-10-02 17:07:00 -070061 licenses.add(lic)
Marc MERLIN41585292013-10-09 15:00:49 -070062
Marc MERLIN3561ad82013-10-29 14:18:47 -070063 for rgx in (license_rgx2, license_rgx3, license_rgx4, license_rgx5):
Marc MERLIN41585292013-10-09 15:00:49 -070064 match = rgx.search(line)
65 if match:
66 licenses.add(match.group(1))
Marc MERLINea95f202013-10-02 17:07:00 -070067
68 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070069
70
71def ComparePkgLists(pkg_list1, pkg_list2):
72 """Compare the package list in 2 dictionaries and output the differences.
73
74 Args:
Marc MERLIN41585292013-10-09 15:00:49 -070075 pkg_list1: dict from GetPackagesLicensesFromHtml.
76 pkg_list2: dict from GetPackagesLicensesFromHtml.
Marc MERLIN0a621942013-09-30 15:22:38 -070077
78 Returns:
79 N/A (outputs result on stdout).
80 """
81
82 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
Mike Frysinger383367e2014-09-16 15:06:17 -040083 print('Package removed: %s-%s' % (
84 removed_package, pkg_list1[removed_package]))
Marc MERLIN0a621942013-09-30 15:22:38 -070085
Mike Frysinger383367e2014-09-16 15:06:17 -040086 print()
Marc MERLIN0a621942013-09-30 15:22:38 -070087 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
Mike Frysinger383367e2014-09-16 15:06:17 -040088 print('Package added: %s-%s' % (
89 added_package, pkg_list2[added_package]))
Marc MERLIN0a621942013-09-30 15:22:38 -070090
Mike Frysinger383367e2014-09-16 15:06:17 -040091 print()
Marc MERLIN0a621942013-09-30 15:22:38 -070092 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
93 ver1 = pkg_list1[changed_package]
94 ver2 = pkg_list2[changed_package]
95 if ver1 != ver2:
Mike Frysinger383367e2014-09-16 15:06:17 -040096 print('Package updated: %s from %s to %s' % (changed_package, ver1, ver2))
Marc MERLIN0a621942013-09-30 15:22:38 -070097
98
Marc MERLINea95f202013-10-02 17:07:00 -070099def CompareLicenseSets(set1, set2):
100 """Compare the license list in 2 sets and output the differences.
101
102 Args:
Marc MERLIN41585292013-10-09 15:00:49 -0700103 set1: set from GetPackagesLicensesFromHtml.
104 set2: set from GetPackagesLicensesFromHtml.
Marc MERLINea95f202013-10-02 17:07:00 -0700105
106 Returns:
107 N/A (outputs result on stdout).
108 """
109
110 for removed_license in sorted(set1 - set2):
Mike Frysinger383367e2014-09-16 15:06:17 -0400111 print('License removed: %s' % (removed_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700112
Mike Frysinger383367e2014-09-16 15:06:17 -0400113 print()
Marc MERLINea95f202013-10-02 17:07:00 -0700114 for added_license in sorted(set2 - set1):
Mike Frysinger383367e2014-09-16 15:06:17 -0400115 print('License added: %s' % (added_license))
Marc MERLINea95f202013-10-02 17:07:00 -0700116
117
Marc MERLIN0a621942013-09-30 15:22:38 -0700118def main(args):
119 parser = commandline.ArgumentParser(usage=__doc__)
120 parser.add_argument('html1', metavar='license1.html', type='path',
121 help='old html file')
122 parser.add_argument('html2', metavar='license2.html', type='path',
123 help='new html file')
124 opts = parser.parse_args(args)
125
Marc MERLIN41585292013-10-09 15:00:49 -0700126 pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
127 pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
Marc MERLINea95f202013-10-02 17:07:00 -0700128 ComparePkgLists(pkg_list1[0], pkg_list2[0])
Mike Frysinger383367e2014-09-16 15:06:17 -0400129 print()
Marc MERLINea95f202013-10-02 17:07:00 -0700130 CompareLicenseSets(pkg_list1[1], pkg_list2[1])