vendor.py: add unicode license parsing support

This addition is hacky and I don't like it, but there's a very specific
license we have to peephole here, and blocking this project on
implementing complete license parsing here seems like a not-great
trade-off. If anyone wants to volunteer to write a proper parse tree,
please feel free :)

BUG=b:240953811
TEST=./vendor.py

Change-Id: I1d57ce78349e8538b3eac77623cc4431212fc0d4
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/rust_crates/+/3832588
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@google.com>
Tested-by: George Burgess <gbiv@chromium.org>
Commit-Queue: George Burgess <gbiv@chromium.org>
diff --git a/vendor.py b/vendor.py
index dbaf634..4786819 100755
--- a/vendor.py
+++ b/vendor.py
@@ -290,7 +290,8 @@
         'Apache-2.0': 'Apache-2.0',
         'MIT': 'MIT',
         'BSD-3-Clause': 'BSD-3',
-        'ISC': 'ISC'
+        'ISC': 'ISC',
+        'unicode': 'unicode',
     }
 
     # Prefer to take attribution licenses in this order. All these require that
@@ -378,6 +379,7 @@
         license_map = {}
 
         skip_license_check = skip_license_check or []
+        has_unicode_license = False
 
         for package in all_packages:
             # Skip the synthesized Cargo.toml packages that exist solely to
@@ -419,6 +421,30 @@
             license_files = list(self._find_license_in_dir(
                 os.path.join(self.vendor_dir, f'{pkg_name}-{pkg_version}')))
 
+            # FIXME(b/240953811): The code later in this loop is only
+            # structured to handle ORs, not ANDs. Fortunately, this license in
+            # particular is `AND`ed between a super common license (Apache) and
+            # a more obscure one (unicode). This hack is specifically intended
+            # for the `unicode-ident` crate, though no crate name check is
+            # made, since it's OK other crates happen to have this license.
+            if license == '(MIT OR Apache-2.0) AND Unicode-DFS-2016':
+                has_unicode_license = True
+                # We'll check later to be sure MIT or Apache-2.0 is represented
+                # properly.
+                for x in license_files:
+                    if os.path.basename(x) == 'LICENSE-UNICODE':
+                        license_file = x
+                        break
+                else:
+                    raise ValueError('No LICENSE-UNICODE found in '
+                                     f'{license_files}')
+                license_map[pkg_name] = {
+                    "license": license,
+                    "license_file": license_file,
+                }
+                has_license_types.add('unicode')
+                continue
+
             # If there are multiple licenses, they are delimited with "OR" or "/"
             delim = ' OR ' if ' OR ' in license else '/'
             found = [x.strip() for x in license.split(delim)]
@@ -503,6 +529,11 @@
                 "Unhandled missing license file. "
                 "Make sure all are accounted for before continuing.")
 
+        if has_unicode_license:
+            if self.APACHE_LICENSE not in has_license_types:
+                raise ValueError('Need the apache license; currently have: '
+                                 f'{sorted(has_license_types)}')
+
         sorted_licenses = sorted(has_license_types)
         print("Add the following licenses to the ebuild:\n",
               sorted_licenses)