licensing: script to show review diff of 2 license files.

This script parses 2 credits.html files and shows an easy to review
diff between them.

TEST=hand tested
polgara:/usr/local/google2/gerrit-int/chromium-os/chromite/license-generation$ ../bin/diff_license_html output.html output-new.html
Package removed: Coreboot-2013.04
Package removed: U-Boot-2013.06
Package removed: alsa-headers-1.0.25
Package removed: argparse-1.2.1
Package removed: autoconf-2.68
Package removed: autoconf-wrapper-10
Package removed: automake-1.11.1
Package removed: automake-wrapper-5
Package removed: binutils-2.22
Package removed: binutils-config-3
Package removed: busybox-1.21.0
Package removed: diffutils-3.2
Package removed: docbook-xml-dtd-4.1.2
Package removed: eselect-opengl-1.2.4
Package removed: flex-2.5.35_p10
Package removed: font-util-1.2.0
Package removed: gdbm-1.9.1
Package removed: glproto-1.4.14
Package removed: gmock-1.6.0
Package removed: gtest-1.6.0
Package removed: gtk-doc-am-1.18
Package removed: htpdate-1.0.4
Package removed: intltool-0.41.0
Package removed: jsonrpclib-0_pre20110820
Package removed: kbd-1.15.3
Package removed: ladspa-sdk-1.13
Package removed: less-441
Package removed: leveldb-0.0.1
Package removed: libaio-0.3.109
Package removed: libatomic_ops-7.2d
Package removed: libtool-2.4
Package removed: llvm-3.2
Package removed: m2crypto-0.21.1
Package removed: make-3.82
Package removed: mime-types-8
Package removed: netifaces-0.8
Package removed: pax-utils-0.4
Package removed: popt-1.16
Package removed: portage-2.1.10.11
Package removed: python-2.7.3
Package removed: python-evdev-0.3.1
Package removed: python-updater-0.10
Package removed: pyyaml-3.09
Package removed: ragel-6.7
Package removed: rsync-3.0.8
Package removed: sandbox-2.6
Package removed: scons-2.0.1
Package removed: setproctitle-1.1.6
Package removed: setuptools-0.6.14
Package removed: sgml-common-0.6.3
Package removed: simplejson-2.5.0
Package removed: stressapptest-1.0.4
Package removed: swig-2.0.4
Package removed: unittest2-0.5.1
Package removed: xcb-proto-1.7.1
Package removed: xxd-1.10

Package added: avahi-0.6.31
Package added: brltty-4.5
Package added: coreboot-2013.04
Package added: crosextrafonts-carlito-20130920
Package added: gcc-libs-0.0.1
Package added: libdaemon-0.14
Package added: u-boot-2013.06

Package updated: curl from 7.23.1 to 7.31.0
Package updated: dbus-glib from 0.100 to 0.100.2
Package updated: libxml2 from 2.7.8 to 2.9.1
Package updated: timezone-data from 2012j to 2013d

BUG=chromium:197970 chromium:271812

Change-Id: I236c65c44990de89495a04da9bb39211d4babe96
Reviewed-on: https://chromium-review.googlesource.com/171143
Reviewed-by: Marc MERLIN <merlin@chromium.org>
Tested-by: Marc MERLIN <merlin@chromium.org>
Commit-Queue: Marc MERLIN <merlin@chromium.org>
diff --git a/scripts/diff_license_html.py b/scripts/diff_license_html.py
new file mode 100644
index 0000000..c465113
--- /dev/null
+++ b/scripts/diff_license_html.py
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+
+"""Compares the packages between 2 images by parsing the license file output."""
+
+import re
+
+from chromite.lib import commandline
+
+
+def GetTreePackages(html_file):
+  """Get the list of debian packages in an unpacked ProdNG tree.
+
+  Args:
+    html_file: which html license file to scan for packages.
+
+  Returns:
+    dictionary of packages and version numbers.
+  """
+
+  packages = {}
+
+  # Grep and turn
+  # <span class="title">ath6k-34</span>
+  # into
+  # ath6k 34
+  exp = re.compile(r'<span class="title">(.+)-(.+)</span>')
+  with open(html_file, 'r') as f:
+    for line in f:
+      match = exp.search(line)
+      if match:
+        packages[match.group(1)] = match.group(2)
+
+  return packages
+
+
+def ComparePkgLists(pkg_list1, pkg_list2):
+  """Compare the package list in 2 dictionaries and output the differences.
+
+  Args:
+    pkg_list1: dict from GetTreePackages.
+    pkg_list2: dict from GetTreePackages.
+
+  Returns:
+    N/A (outputs result on stdout).
+  """
+
+  for removed_package in sorted(set(pkg_list1) - set(pkg_list2)):
+    print 'Package removed: %s-%s' % (
+        removed_package, pkg_list1[removed_package])
+
+  print
+  for added_package in sorted(set(pkg_list2) - set(pkg_list1)):
+    print 'Package added: %s-%s' % (
+        added_package, pkg_list2[added_package])
+
+  print
+  for changed_package in sorted(set(pkg_list1) & set(pkg_list2)):
+    ver1 = pkg_list1[changed_package]
+    ver2 = pkg_list2[changed_package]
+    if ver1 != ver2:
+      print 'Package updated: %s from %s to %s' % (changed_package, ver1, ver2)
+
+
+def main(args):
+  parser = commandline.ArgumentParser(usage=__doc__)
+  parser.add_argument('html1', metavar='license1.html', type='path',
+                      help='old html file')
+  parser.add_argument('html2', metavar='license2.html', type='path',
+                      help='new html file')
+  opts = parser.parse_args(args)
+
+  pkg_list1 = GetTreePackages(opts.html1)
+  pkg_list2 = GetTreePackages(opts.html2)
+  ComparePkgLists(pkg_list1, pkg_list2)