blob: acdeab2a9ab562de20aae71983aa2d5702a06880 [file] [log] [blame]
Marc MERLIN0a621942013-09-30 15:22:38 -07001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7
8"""Compares the packages between 2 images by parsing the license file output."""
9
10import re
11
12from chromite.lib import commandline
13
14
15def GetTreePackages(html_file):
16 """Get the list of debian packages in an unpacked ProdNG tree.
17
18 Args:
19 html_file: which html license file to scan for packages.
20
21 Returns:
Marc MERLINea95f202013-10-02 17:07:00 -070022 tuple of dictionary of packages and version numbers and set of licenses.
23
24 Raises:
25 AssertionError: if regex failed.
Marc MERLIN0a621942013-09-30 15:22:38 -070026 """
27
28 packages = {}
Marc MERLINea95f202013-10-02 17:07:00 -070029 licenses = set()
Marc MERLIN0a621942013-09-30 15:22:38 -070030
Marc MERLINea95f202013-10-02 17:07:00 -070031 pkg_rgx = re.compile(r'<span class="title">(.+)-(.+)</span>')
32 license_rgx = re.compile(
33 r'(?:Gentoo Package (Stock License .+)</a>|Scanned (Source license .+):)')
Marc MERLIN0a621942013-09-30 15:22:38 -070034 with open(html_file, 'r') as f:
35 for line in f:
Marc MERLINea95f202013-10-02 17:07:00 -070036 # Grep and turn
37 # <span class="title">ath6k-34</span>
38 # into
39 # ath6k 34
40 match = pkg_rgx.search(line)
Marc MERLIN0a621942013-09-30 15:22:38 -070041 if match:
42 packages[match.group(1)] = match.group(2)
43
Marc MERLINea95f202013-10-02 17:07:00 -070044 match = license_rgx.search(line)
45 if match:
46 lic = None
47 if match.group(1):
48 lic = match.group(1)
49 else:
50 # Turn Source license simplejson-2.5.0/LICENSE.txt
51 # into Source license simplejson/LICENSE.txt
52 # (we don't want to create diffs based on version numbers)
53 lic = re.sub(r'(.+)-([^/]+)/(.+)', r'\1/\3', match.group(2))
54
55 licenses.add(lic)
56 if not lic:
57 raise AssertionError('License for %s came up empty')
58
59 return (packages, licenses)
Marc MERLIN0a621942013-09-30 15:22:38 -070060
61
62def ComparePkgLists(pkg_list1, pkg_list2):
63 """Compare the package list in 2 dictionaries and output the differences.
64
65 Args:
66 pkg_list1: dict from GetTreePackages.
67 pkg_list2: dict from GetTreePackages.
68
69 Returns:
70 N/A (outputs result on stdout).
71 """
72
73 for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
74 print 'Package removed: %s-%s' % (
75 removed_package, pkg_list1[removed_package])
76
77 print
78 for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
79 print 'Package added: %s-%s' % (
80 added_package, pkg_list2[added_package])
81
82 print
83 for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
84 ver1 = pkg_list1[changed_package]
85 ver2 = pkg_list2[changed_package]
86 if ver1 != ver2:
87 print 'Package updated: %s from %s to %s' % (changed_package, ver1, ver2)
88
89
Marc MERLINea95f202013-10-02 17:07:00 -070090def CompareLicenseSets(set1, set2):
91 """Compare the license list in 2 sets and output the differences.
92
93 Args:
94 set1: set from GetTreePackages.
95 set2: set from GetTreePackages.
96
97 Returns:
98 N/A (outputs result on stdout).
99 """
100
101 for removed_license in sorted(set1 - set2):
102 print 'License removed: %s' % (removed_license)
103
104 print
105 for added_license in sorted(set2 - set1):
106 print 'License added: %s' % (added_license)
107
108
Marc MERLIN0a621942013-09-30 15:22:38 -0700109def main(args):
110 parser = commandline.ArgumentParser(usage=__doc__)
111 parser.add_argument('html1', metavar='license1.html', type='path',
112 help='old html file')
113 parser.add_argument('html2', metavar='license2.html', type='path',
114 help='new html file')
115 opts = parser.parse_args(args)
116
117 pkg_list1 = GetTreePackages(opts.html1)
118 pkg_list2 = GetTreePackages(opts.html2)
Marc MERLINea95f202013-10-02 17:07:00 -0700119 ComparePkgLists(pkg_list1[0], pkg_list2[0])
120 print
121 CompareLicenseSets(pkg_list1[1], pkg_list2[1])