blob: 83bd138828d6c9aae038761fe284452b3c817249 [file] [log] [blame]
Marc MERLIN0a621942013-09-30 15:22:38 -07001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7
8"""Compares the packages between 2 images by parsing the license file output."""
9
10import re
11
12from chromite.lib import commandline
13
14
Marc MERLIN41585292013-10-09 15:00:49 -070015def GetPackagesLicensesFromHtml(html_file):
16 """Get the list of packages and licenses in a ChromeOS license file.
Marc MERLIN0a621942013-09-30 15:22:38 -070017
18 Args:
19 html_file: which html license file to scan for packages.
20
21 Returns:
Marc MERLINea95f202013-10-02 17:07:00 -070022 tuple of dictionary of packages and version numbers and set of licenses.
23
24 Raises:
25 AssertionError: if regex failed.
Marc MERLIN0a621942013-09-30 15:22:38 -070026 """
27
28 packages = {}
Marc MERLINea95f202013-10-02 17:07:00 -070029 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070030
Marc MERLINea95f202013-10-02 17:07:00 -070031 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
Marc MERLIN41585292013-10-09 15:00:49 -070032 license_rgx1 = re.compile(r'Scanned (Source License .+):', re.IGNORECASE)
33 license_rgx2 = re.compile(r'<pre>(Custom License .+):', re.IGNORECASE)
34 # Gentoo Package Stock License BZIP2:
35 # <a ... class="title">Gentoo Package Stock License public-domain</a>
36 license_rgx3 = re.compile(r'Gentoo Package (Stock License [^<:]+)',
37 re.IGNORECASE)
38 license_rgx4 = re.compile(r'class="title">(Custom License .+)</a>',
39 re.IGNORECASE)
Marc MERLIN0a621942013-09-30 15:22:38 -070040 with open(html_file, 'r') as f:
41 for line in f:
Marc MERLINea95f202013-10-02 17:07:00 -070042 # Grep and turn
43 # <span class="title">ath6k-34</span>
44 # into
45 # ath6k 34
46 match = pkg_rgx.search(line)
Marc MERLIN0a621942013-09-30 15:22:38 -070047 if match:
48 packages[match.group(1)] = match.group(2)
49
Marc MERLIN41585292013-10-09 15:00:49 -070050 match = license_rgx1.search(line)
Marc MERLINea95f202013-10-02 17:07:00 -070051 if match:
Marc MERLIN41585292013-10-09 15:00:49 -070052 # Turn Source license simplejson-2.5.0/LICENSE.txt
53 # into Source license simplejson/LICENSE.txt
54 # (we don't want to create diffs based on version numbers)
55 lic = re.sub(r'(.+)-([^/]+)/(.+)', r'\1/\3', match.group(1))
56 # Old files had this lowercased.
57 lic = re.sub(r'Source license', r'Source License', lic)
Marc MERLINea95f202013-10-02 17:07:00 -070058 licenses.add(lic)
Marc MERLIN41585292013-10-09 15:00:49 -070059
60 for rgx in (license_rgx2, license_rgx3, license_rgx4):
61 match = rgx.search(line)
62 if match:
63 licenses.add(match.group(1))
Marc MERLINea95f202013-10-02 17:07:00 -070064
65 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070066
67
68def ComparePkgLists(pkg_list1, pkg_list2):
69 """Compare the package list in 2 dictionaries and output the differences.
70
71 Args:
Marc MERLIN41585292013-10-09 15:00:49 -070072 pkg_list1: dict from GetPackagesLicensesFromHtml.
73 pkg_list2: dict from GetPackagesLicensesFromHtml.
Marc MERLIN0a621942013-09-30 15:22:38 -070074
75 Returns:
76 N/A (outputs result on stdout).
77 """
78
79 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
80 print 'Package removed: %s-%s' % (
81 removed_package, pkg_list1[removed_package])
82
83 print
84 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
85 print 'Package added: %s-%s' % (
86 added_package, pkg_list2[added_package])
87
88 print
89 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
90 ver1 = pkg_list1[changed_package]
91 ver2 = pkg_list2[changed_package]
92 if ver1 != ver2:
93 print 'Package updated: %s from %s to %s' % (changed_package, ver1, ver2)
94
95
Marc MERLINea95f202013-10-02 17:07:00 -070096def CompareLicenseSets(set1, set2):
97 """Compare the license list in 2 sets and output the differences.
98
99 Args:
Marc MERLIN41585292013-10-09 15:00:49 -0700100 set1: set from GetPackagesLicensesFromHtml.
101 set2: set from GetPackagesLicensesFromHtml.
Marc MERLINea95f202013-10-02 17:07:00 -0700102
103 Returns:
104 N/A (outputs result on stdout).
105 """
106
107 for removed_license in sorted(set1 - set2):
108 print 'License removed: %s' % (removed_license)
109
110 print
111 for added_license in sorted(set2 - set1):
112 print 'License added: %s' % (added_license)
113
114
Marc MERLIN0a621942013-09-30 15:22:38 -0700115def main(args):
116 parser = commandline.ArgumentParser(usage=__doc__)
117 parser.add_argument('html1', metavar='license1.html', type='path',
118 help='old html file')
119 parser.add_argument('html2', metavar='license2.html', type='path',
120 help='new html file')
121 opts = parser.parse_args(args)
122
Marc MERLIN41585292013-10-09 15:00:49 -0700123 pkg_list1 = GetPackagesLicensesFromHtml(opts.html1)
124 pkg_list2 = GetPackagesLicensesFromHtml(opts.html2)
Marc MERLINea95f202013-10-02 17:07:00 -0700125 ComparePkgLists(pkg_list1[0], pkg_list2[0])
126 print
127 CompareLicenseSets(pkg_list1[1], pkg_list2[1])