pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """A database of OWNERS files.""" |
| 6 | |
dpranke@chromium.org | fdecfb7 | 2011-03-16 23:27:23 +0000 | [diff] [blame] | 7 | import collections |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 8 | import re |
| 9 | |
| 10 | |
| 11 | # If this is present by itself on a line, this means that everyone can review. |
| 12 | EVERYONE = '*' |
| 13 | |
| 14 | |
| 15 | # Recognizes 'X@Y' email addresses. Very simplistic. |
| 16 | BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$' |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 17 | |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 18 | |
dpranke@chromium.org | 923950f | 2011-03-17 23:40:00 +0000 | [diff] [blame] | 19 | def _assert_is_collection(obj): |
dpranke@chromium.org | e6a4ab3 | 2011-03-31 01:23:08 +0000 | [diff] [blame] | 20 | assert not isinstance(obj, basestring) |
maruel@chromium.org | 725f1c3 | 2011-04-01 20:24:54 +0000 | [diff] [blame] | 21 | # Module 'collections' has no 'Iterable' member |
| 22 | # pylint: disable=E1101 |
dpranke@chromium.org | e6a4ab3 | 2011-03-31 01:23:08 +0000 | [diff] [blame] | 23 | if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'): |
| 24 | assert (isinstance(obj, collections.Iterable) and |
| 25 | isinstance(obj, collections.Sized)) |
dpranke@chromium.org | 923950f | 2011-03-17 23:40:00 +0000 | [diff] [blame] | 26 | |
| 27 | |
dpranke@chromium.org | 898a10e | 2011-03-04 21:54:43 +0000 | [diff] [blame] | 28 | class SyntaxErrorInOwnersFile(Exception): |
dpranke@chromium.org | 86bbf19 | 2011-03-09 21:37:06 +0000 | [diff] [blame] | 29 | def __init__(self, path, lineno, msg): |
| 30 | super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg)) |
dpranke@chromium.org | 898a10e | 2011-03-04 21:54:43 +0000 | [diff] [blame] | 31 | self.path = path |
dpranke@chromium.org | 86bbf19 | 2011-03-09 21:37:06 +0000 | [diff] [blame] | 32 | self.lineno = lineno |
dpranke@chromium.org | 898a10e | 2011-03-04 21:54:43 +0000 | [diff] [blame] | 33 | self.msg = msg |
| 34 | |
| 35 | def __str__(self): |
dpranke@chromium.org | 86bbf19 | 2011-03-09 21:37:06 +0000 | [diff] [blame] | 36 | return "%s:%d syntax error: %s" % (self.path, self.lineno, self.msg) |
dpranke@chromium.org | 898a10e | 2011-03-04 21:54:43 +0000 | [diff] [blame] | 37 | |
| 38 | |
dpranke@chromium.org | 898a10e | 2011-03-04 21:54:43 +0000 | [diff] [blame] | 39 | class Database(object): |
| 40 | """A database of OWNERS files for a repository. |
| 41 | |
| 42 | This class allows you to find a suggested set of reviewers for a list |
| 43 | of changed files, and see if a list of changed files is covered by a |
| 44 | list of reviewers.""" |
| 45 | |
| 46 | def __init__(self, root, fopen, os_path): |
| 47 | """Args: |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 48 | root: the path to the root of the Repository |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 49 | open: function callback to open a text file for reading |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 50 | os_path: module/object callback with fields for 'abspath', 'dirname', |
| 51 | 'exists', and 'join' |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 52 | """ |
| 53 | self.root = root |
| 54 | self.fopen = fopen |
| 55 | self.os_path = os_path |
| 56 | |
dpranke@chromium.org | 627ea67 | 2011-03-11 23:29:03 +0000 | [diff] [blame] | 57 | # Pick a default email regexp to use; callers can override as desired. |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 58 | self.email_regexp = re.compile(BASIC_EMAIL_REGEXP) |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 59 | |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 60 | # Mapping of owners to the paths they own. |
| 61 | self.owned_by = {EVERYONE: set()} |
| 62 | |
| 63 | # Mapping of paths to authorized owners. |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 64 | self.owners_for = {} |
| 65 | |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 66 | # Set of paths that stop us from looking above them for owners. |
| 67 | # (This is implicitly true for the root directory). |
| 68 | self.stop_looking = set(['']) |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 69 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 70 | def reviewers_for(self, files): |
dpranke@chromium.org | fdecfb7 | 2011-03-16 23:27:23 +0000 | [diff] [blame] | 71 | """Returns a suggested set of reviewers that will cover the files. |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 72 | |
dpranke@chromium.org | fdecfb7 | 2011-03-16 23:27:23 +0000 | [diff] [blame] | 73 | files is a sequence of paths relative to (and under) self.root.""" |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 74 | self._check_paths(files) |
| 75 | self._load_data_needed_for(files) |
| 76 | return self._covering_set_of_owners_for(files) |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 77 | |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 78 | def directories_not_covered_by(self, files, reviewers): |
| 79 | """Returns the set of directories that are not owned by a reviewer. |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 80 | |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 81 | Determines which of the given files are not owned by at least one of the |
| 82 | reviewers, then returns a set containing the applicable enclosing |
| 83 | directories, i.e. the ones upward from the files that have OWNERS files. |
dpranke@chromium.org | fdecfb7 | 2011-03-16 23:27:23 +0000 | [diff] [blame] | 84 | |
| 85 | Args: |
| 86 | files is a sequence of paths relative to (and under) self.root. |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 87 | reviewers is a sequence of strings matching self.email_regexp. |
| 88 | """ |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 89 | self._check_paths(files) |
| 90 | self._check_reviewers(reviewers) |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 91 | self._load_data_needed_for(files) |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 92 | |
| 93 | dirs = set([self.os_path.dirname(f) for f in files]) |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 94 | covered_dirs = self._dirs_covered_by(reviewers) |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 95 | uncovered_dirs = [self._enclosing_dir_with_owners(d) for d in dirs |
| 96 | if not self._is_dir_covered_by(d, covered_dirs)] |
| 97 | |
| 98 | return set(uncovered_dirs) |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 99 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 100 | def _check_paths(self, files): |
| 101 | def _is_under(f, pfx): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 102 | return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx) |
dpranke@chromium.org | 923950f | 2011-03-17 23:40:00 +0000 | [diff] [blame] | 103 | _assert_is_collection(files) |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 104 | assert all(_is_under(f, self.os_path.abspath(self.root)) for f in files) |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 105 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 106 | def _check_reviewers(self, reviewers): |
dpranke@chromium.org | 923950f | 2011-03-17 23:40:00 +0000 | [diff] [blame] | 107 | _assert_is_collection(reviewers) |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 108 | assert all(self.email_regexp.match(r) for r in reviewers) |
| 109 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 110 | def _dirs_covered_by(self, reviewers): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 111 | dirs = self.owned_by[EVERYONE] |
| 112 | for r in reviewers: |
| 113 | dirs = dirs | self.owned_by.get(r, set()) |
| 114 | return dirs |
| 115 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 116 | def _stop_looking(self, dirname): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 117 | return dirname in self.stop_looking |
| 118 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 119 | def _is_dir_covered_by(self, dirname, covered_dirs): |
| 120 | while not dirname in covered_dirs and not self._stop_looking(dirname): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 121 | dirname = self.os_path.dirname(dirname) |
| 122 | return dirname in covered_dirs |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 123 | |
pam@chromium.org | f46aed9 | 2012-03-08 09:18:17 +0000 | [diff] [blame] | 124 | def _enclosing_dir_with_owners(self, directory): |
| 125 | """Returns the innermost enclosing directory that has an OWNERS file.""" |
| 126 | dirpath = directory |
| 127 | while not dirpath in self.owners_for: |
| 128 | if self._stop_looking(dirpath): |
| 129 | break |
| 130 | dirpath = self.os_path.dirname(dirpath) |
| 131 | return dirpath |
| 132 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 133 | def _load_data_needed_for(self, files): |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 134 | for f in files: |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 135 | dirpath = self.os_path.dirname(f) |
| 136 | while not dirpath in self.owners_for: |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 137 | self._read_owners_in_dir(dirpath) |
| 138 | if self._stop_looking(dirpath): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 139 | break |
| 140 | dirpath = self.os_path.dirname(dirpath) |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 141 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 142 | def _read_owners_in_dir(self, dirpath): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 143 | owners_path = self.os_path.join(self.root, dirpath, 'OWNERS') |
| 144 | if not self.os_path.exists(owners_path): |
| 145 | return |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 146 | |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 147 | lineno = 0 |
| 148 | for line in self.fopen(owners_path): |
| 149 | lineno += 1 |
| 150 | line = line.strip() |
bauerb@chromium.org | 20d1943 | 2011-06-08 16:34:18 +0000 | [diff] [blame] | 151 | if line.startswith('#') or line == '': |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 152 | continue |
| 153 | if line == 'set noparent': |
| 154 | self.stop_looking.add(dirpath) |
| 155 | continue |
dpranke@chromium.org | 86bbf19 | 2011-03-09 21:37:06 +0000 | [diff] [blame] | 156 | if line.startswith('set '): |
| 157 | raise SyntaxErrorInOwnersFile(owners_path, lineno, |
| 158 | 'unknown option: "%s"' % line[4:].strip()) |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 159 | if self.email_regexp.match(line) or line == EVERYONE: |
| 160 | self.owned_by.setdefault(line, set()).add(dirpath) |
| 161 | self.owners_for.setdefault(dirpath, set()).add(line) |
| 162 | continue |
dpranke@chromium.org | 86bbf19 | 2011-03-09 21:37:06 +0000 | [diff] [blame] | 163 | raise SyntaxErrorInOwnersFile(owners_path, lineno, |
| 164 | ('line is not a comment, a "set" directive, ' |
| 165 | 'or an email address: "%s"' % line)) |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 166 | |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 167 | def _covering_set_of_owners_for(self, files): |
zork@chromium.org | 046e175 | 2012-05-07 05:56:12 +0000 | [diff] [blame] | 168 | # Get the set of directories from the files. |
| 169 | dirs = set() |
dpranke@chromium.org | 2a00962 | 2011-03-01 02:43:31 +0000 | [diff] [blame] | 170 | for f in files: |
zork@chromium.org | 046e175 | 2012-05-07 05:56:12 +0000 | [diff] [blame] | 171 | dirs.add(self.os_path.dirname(f)) |
| 172 | |
| 173 | owned_dirs = {} |
| 174 | dir_owners = {} |
| 175 | |
| 176 | for current_dir in dirs: |
| 177 | # Get the list of owners for each directory. |
| 178 | current_owners = set() |
| 179 | dirname = current_dir |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 180 | while dirname in self.owners_for: |
zork@chromium.org | 046e175 | 2012-05-07 05:56:12 +0000 | [diff] [blame] | 181 | for owner in self.owners_for[dirname]: |
| 182 | current_owners.add(owner) |
dpranke@chromium.org | 7eea259 | 2011-03-09 21:35:46 +0000 | [diff] [blame] | 183 | if self._stop_looking(dirname): |
dpranke@chromium.org | 6dada4e | 2011-03-08 22:32:40 +0000 | [diff] [blame] | 184 | break |
| 185 | dirname = self.os_path.dirname(dirname) |
zork@chromium.org | 046e175 | 2012-05-07 05:56:12 +0000 | [diff] [blame] | 186 | |
| 187 | # Map each directory to a list of its owners. |
| 188 | dir_owners[current_dir] = current_owners |
| 189 | |
| 190 | # Add the directory to the list of each owner. |
| 191 | for owner in current_owners: |
| 192 | if not owner in owned_dirs: |
| 193 | owned_dirs[owner] = set() |
| 194 | owned_dirs[owner].add(current_dir) |
| 195 | |
| 196 | final_owners = set() |
| 197 | while dirs: |
| 198 | # Find the owner that has the most directories. |
| 199 | max_count = 0 |
| 200 | max_owner = None |
| 201 | owner_count = {} |
| 202 | for dirname in dirs: |
| 203 | for owner in dir_owners[dirname]: |
| 204 | count = owner_count.get(owner, 0) + 1 |
| 205 | owner_count[owner] = count |
| 206 | if count >= max_count: |
| 207 | max_owner = owner |
| 208 | |
| 209 | # If no more directories have OWNERS, we're done. |
| 210 | if not max_owner: |
| 211 | break |
| 212 | |
| 213 | final_owners.add(max_owner) |
| 214 | |
| 215 | # Remove all directories owned by the current owner from the remaining |
| 216 | # list. |
| 217 | for dirname in owned_dirs[max_owner]: |
bauerb@chromium.org | 82c8c21 | 2012-05-08 07:46:09 +0000 | [diff] [blame] | 218 | if dirname in dirs: |
| 219 | dirs.remove(dirname) |
zork@chromium.org | 046e175 | 2012-05-07 05:56:12 +0000 | [diff] [blame] | 220 | |
| 221 | return final_owners |