blob: a863a7853ca78ffa9740289cd1d90e558071e813 [file] [log] [blame]
pam@chromium.orgf46aed92012-03-08 09:18:17 +00001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
dpranke@chromium.org2a009622011-03-01 02:43:31 +00002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
dpranke@chromium.org17cc2442012-10-17 21:12:09 +00005"""A database of OWNERS files.
6
7OWNERS files indicate who is allowed to approve changes in a specific directory
8(or who is allowed to make changes without needing approval of another OWNER).
9Note that all changes must still be reviewed by someone familiar with the code,
10so you may need approval from both an OWNER and a reviewer in many cases.
11
12The syntax of the OWNERS file is, roughly:
13
14lines := (\s* line? \s* "\n")*
15
16line := directive
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +000017 | "per-file" \s+ glob \s* "=" \s* directive
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000018 | comment
19
20directive := "set noparent"
21 | email_address
22 | "*"
23
24glob := [a-zA-Z0-9_-*?]+
25
26comment := "#" [^"\n"]*
27
28Email addresses must follow the foo@bar.com short form (exact syntax given
29in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
30shell conventions, and relative and absolute paths are not allowed (i.e.,
31globs only refer to the files in the current directory).
32
33If a user's email is one of the email_addresses in the file, the user is
34considered an "OWNER" for all files in the directory.
35
36If the "per-file" directive is used, the line only applies to files in that
37directory that match the filename glob specified.
38
39If the "set noparent" directive used, then only entries in this OWNERS file
40apply to files in this directory; if the "set noparent" directive is not
41used, then entries in OWNERS files in enclosing (upper) directories also
42apply (up until a "set noparent is encountered").
43
44If "per-file glob=set noparent" is used, then global directives are ignored
45for the glob, and only the "per-file" owners are used for files matching that
46glob.
47
48Examples for all of these combinations can be found in tests/owners_unittest.py.
49"""
dpranke@chromium.org2a009622011-03-01 02:43:31 +000050
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +000051import collections
dpranke@chromium.org055a0de2012-12-19 20:13:33 +000052import random
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000053import re
54
55
56# If this is present by itself on a line, this means that everyone can review.
57EVERYONE = '*'
58
59
60# Recognizes 'X@Y' email addresses. Very simplistic.
61BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
dpranke@chromium.org2a009622011-03-01 02:43:31 +000062
dpranke@chromium.org2a009622011-03-01 02:43:31 +000063
dpranke@chromium.org923950f2011-03-17 23:40:00 +000064def _assert_is_collection(obj):
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000065 assert not isinstance(obj, basestring)
maruel@chromium.org725f1c32011-04-01 20:24:54 +000066 # Module 'collections' has no 'Iterable' member
67 # pylint: disable=E1101
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000068 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
69 assert (isinstance(obj, collections.Iterable) and
70 isinstance(obj, collections.Sized))
dpranke@chromium.org923950f2011-03-17 23:40:00 +000071
72
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000073class SyntaxErrorInOwnersFile(Exception):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000074 def __init__(self, path, lineno, msg):
75 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000076 self.path = path
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000077 self.lineno = lineno
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000078 self.msg = msg
79
80 def __str__(self):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000081 return "%s:%d syntax error: %s" % (self.path, self.lineno, self.msg)
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000082
83
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000084class Database(object):
85 """A database of OWNERS files for a repository.
86
87 This class allows you to find a suggested set of reviewers for a list
88 of changed files, and see if a list of changed files is covered by a
89 list of reviewers."""
90
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000091 def __init__(self, root, fopen, os_path, glob):
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000092 """Args:
dpranke@chromium.org2a009622011-03-01 02:43:31 +000093 root: the path to the root of the Repository
dpranke@chromium.org2a009622011-03-01 02:43:31 +000094 open: function callback to open a text file for reading
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000095 os_path: module/object callback with fields for 'abspath', 'dirname',
96 'exists', and 'join'
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000097 glob: function callback to list entries in a directory match a glob
98 (i.e., glob.glob)
dpranke@chromium.org2a009622011-03-01 02:43:31 +000099 """
100 self.root = root
101 self.fopen = fopen
102 self.os_path = os_path
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000103 self.glob = glob
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000104
dpranke@chromium.org627ea672011-03-11 23:29:03 +0000105 # Pick a default email regexp to use; callers can override as desired.
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000106 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000107
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000108 # Mapping of owners to the paths they own.
109 self.owned_by = {EVERYONE: set()}
110
111 # Mapping of paths to authorized owners.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000112 self.owners_for = {}
113
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000114 # Set of paths that stop us from looking above them for owners.
115 # (This is implicitly true for the root directory).
116 self.stop_looking = set([''])
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000117
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000118 def reviewers_for(self, files):
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000119 """Returns a suggested set of reviewers that will cover the files.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000120
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000121 files is a sequence of paths relative to (and under) self.root."""
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000122 self._check_paths(files)
123 self._load_data_needed_for(files)
124 return self._covering_set_of_owners_for(files)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000125
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000126 # TODO(dpranke): rename to objects_not_covered_by
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000127 def directories_not_covered_by(self, files, reviewers):
128 """Returns the set of directories that are not owned by a reviewer.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000129
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000130 Determines which of the given files are not owned by at least one of the
131 reviewers, then returns a set containing the applicable enclosing
132 directories, i.e. the ones upward from the files that have OWNERS files.
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000133
134 Args:
135 files is a sequence of paths relative to (and under) self.root.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000136 reviewers is a sequence of strings matching self.email_regexp.
137 """
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000138 self._check_paths(files)
139 self._check_reviewers(reviewers)
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000140 self._load_data_needed_for(files)
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000141
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000142 objs = set()
143 for f in files:
144 if f in self.owners_for:
145 objs.add(f)
146 else:
147 objs.add(self.os_path.dirname(f))
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000148
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000149 covered_objs = self._objs_covered_by(reviewers)
150 uncovered_objs = [self._enclosing_obj_with_owners(o) for o in objs
151 if not self._is_obj_covered_by(o, covered_objs)]
152
153 return set(uncovered_objs)
154
155 objects_not_covered_by = directories_not_covered_by
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000156
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000157 def _check_paths(self, files):
158 def _is_under(f, pfx):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000159 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000160 _assert_is_collection(files)
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000161 assert all(not self.os_path.isabs(f) and
162 _is_under(f, self.os_path.abspath(self.root)) for f in files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000163
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000164 def _check_reviewers(self, reviewers):
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000165 _assert_is_collection(reviewers)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000166 assert all(self.email_regexp.match(r) for r in reviewers)
167
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000168 # TODO(dpranke): Rename to _objs_covered_by and update_callers
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000169 def _dirs_covered_by(self, reviewers):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000170 dirs = self.owned_by[EVERYONE]
171 for r in reviewers:
172 dirs = dirs | self.owned_by.get(r, set())
173 return dirs
174
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000175 _objs_covered_by = _dirs_covered_by
176
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000177 def _stop_looking(self, dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000178 return dirname in self.stop_looking
179
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000180 # TODO(dpranke): Rename to _is_dir_covered_by and update callers.
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000181 def _is_dir_covered_by(self, dirname, covered_dirs):
182 while not dirname in covered_dirs and not self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000183 dirname = self.os_path.dirname(dirname)
184 return dirname in covered_dirs
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000185
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000186 _is_obj_covered_by = _is_dir_covered_by
187
188 # TODO(dpranke): Rename to _enclosing_obj_with_owners and update callers.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000189 def _enclosing_dir_with_owners(self, directory):
190 """Returns the innermost enclosing directory that has an OWNERS file."""
191 dirpath = directory
192 while not dirpath in self.owners_for:
193 if self._stop_looking(dirpath):
194 break
195 dirpath = self.os_path.dirname(dirpath)
196 return dirpath
197
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000198 _enclosing_obj_with_owners = _enclosing_dir_with_owners
199
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000200 def _load_data_needed_for(self, files):
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000201 for f in files:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000202 dirpath = self.os_path.dirname(f)
203 while not dirpath in self.owners_for:
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000204 self._read_owners_in_dir(dirpath)
205 if self._stop_looking(dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000206 break
207 dirpath = self.os_path.dirname(dirpath)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000208
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000209 def _read_owners_in_dir(self, dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000210 owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
211 if not self.os_path.exists(owners_path):
212 return
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000213
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000214 lineno = 0
215 for line in self.fopen(owners_path):
216 lineno += 1
217 line = line.strip()
bauerb@chromium.org20d19432011-06-08 16:34:18 +0000218 if line.startswith('#') or line == '':
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000219 continue
220 if line == 'set noparent':
221 self.stop_looking.add(dirpath)
222 continue
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000223
224 m = re.match("per-file (.+)=(.+)", line)
225 if m:
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +0000226 glob_string = m.group(1).strip()
227 directive = m.group(2).strip()
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000228 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000229 if '/' in glob_string or '\\' in glob_string:
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000230 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000231 'per-file globs cannot span directories or use escapes: "%s"' %
232 line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000233 baselines = self.glob(full_glob_string)
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000234 for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000235 self._add_entry(baseline, directive, "per-file line",
236 owners_path, lineno)
237 continue
238
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000239 if line.startswith('set '):
240 raise SyntaxErrorInOwnersFile(owners_path, lineno,
241 'unknown option: "%s"' % line[4:].strip())
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000242
243 self._add_entry(dirpath, line, "line", owners_path, lineno)
244
245 def _add_entry(self, path, directive, line_type, owners_path, lineno):
246 if directive == "set noparent":
247 self.stop_looking.add(path)
248 elif self.email_regexp.match(directive) or directive == EVERYONE:
249 self.owned_by.setdefault(directive, set()).add(path)
250 self.owners_for.setdefault(path, set()).add(directive)
251 else:
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000252 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000253 ('%s is not a "set" directive, "*", '
254 'or an email address: "%s"' % (line_type, directive)))
255
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000256 def _covering_set_of_owners_for(self, files):
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000257 dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
258 all_possible_owners = self._all_possible_owners(dirs_remaining)
259 suggested_owners = set()
260 while dirs_remaining:
261 owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
262 suggested_owners.add(owner)
263 for dirname, _ in all_possible_owners[owner]:
264 dirs_remaining.remove(dirname)
265 return suggested_owners
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000266
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000267 def _all_possible_owners(self, dirs):
268 """Returns a list of (potential owner, distance-from-dir) tuples; a
269 distance of 1 is the lowest/closest possible distance (which makes the
270 subsequent math easier)."""
271 all_possible_owners = {}
zork@chromium.org046e1752012-05-07 05:56:12 +0000272 for current_dir in dirs:
zork@chromium.org046e1752012-05-07 05:56:12 +0000273 dirname = current_dir
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000274 distance = 1
275 while True:
276 for owner in self.owners_for.get(dirname, []):
277 all_possible_owners.setdefault(owner, [])
278 # It's possible the same owner might match a directory from
279 # multiple files, and we only want the closest entry.
280 if not any(current_dir == el[0] for el in all_possible_owners[owner]):
281 all_possible_owners[owner].append((current_dir, distance))
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000282 if self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000283 break
284 dirname = self.os_path.dirname(dirname)
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000285 distance += 1
286 return all_possible_owners
zork@chromium.org046e1752012-05-07 05:56:12 +0000287
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000288 @staticmethod
289 def lowest_cost_owner(all_possible_owners, dirs):
290 # We want to minimize both the number of reviewers and the distance
291 # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
292 # an arbitrarily-selected scaling factor that seems to work well - it
293 # will select one reviewer in the parent directory over three reviewers
294 # in subdirs, but not one reviewer over just two.
295 total_costs_by_owner = {}
296 for owner in all_possible_owners:
297 total_distance = 0
298 num_directories_owned = 0
299 for dirname, distance in all_possible_owners[owner]:
300 if dirname in dirs:
301 total_distance += distance
302 num_directories_owned += 1
303 if num_directories_owned:
304 total_costs_by_owner[owner] = (total_distance /
305 pow(num_directories_owned, 1.75))
zork@chromium.org046e1752012-05-07 05:56:12 +0000306
dpranke@chromium.org055a0de2012-12-19 20:13:33 +0000307 # Return the lowest cost owner. In the case of a tie, pick one randomly.
308 lowest_cost = min(total_costs_by_owner.itervalues())
309 lowest_cost_owners = filter(
310 lambda owner: total_costs_by_owner[owner] == lowest_cost,
311 total_costs_by_owner)
312 return random.Random().choice(lowest_cost_owners)