blob: cc667beea3c575f527470d9bd25269b4e95c6bae [file] [log] [blame]
pam@chromium.orgf46aed92012-03-08 09:18:17 +00001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
dpranke@chromium.org2a009622011-03-01 02:43:31 +00002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
dpranke@chromium.org17cc2442012-10-17 21:12:09 +00005"""A database of OWNERS files.
6
7OWNERS files indicate who is allowed to approve changes in a specific directory
8(or who is allowed to make changes without needing approval of another OWNER).
9Note that all changes must still be reviewed by someone familiar with the code,
10so you may need approval from both an OWNER and a reviewer in many cases.
11
12The syntax of the OWNERS file is, roughly:
13
14lines := (\s* line? \s* "\n")*
15
16line := directive
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +000017 | "per-file" \s+ glob \s* "=" \s* directive
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000018 | comment
19
20directive := "set noparent"
21 | email_address
22 | "*"
23
24glob := [a-zA-Z0-9_-*?]+
25
26comment := "#" [^"\n"]*
27
28Email addresses must follow the foo@bar.com short form (exact syntax given
29in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
30shell conventions, and relative and absolute paths are not allowed (i.e.,
31globs only refer to the files in the current directory).
32
33If a user's email is one of the email_addresses in the file, the user is
34considered an "OWNER" for all files in the directory.
35
36If the "per-file" directive is used, the line only applies to files in that
37directory that match the filename glob specified.
38
39If the "set noparent" directive used, then only entries in this OWNERS file
40apply to files in this directory; if the "set noparent" directive is not
41used, then entries in OWNERS files in enclosing (upper) directories also
42apply (up until a "set noparent is encountered").
43
44If "per-file glob=set noparent" is used, then global directives are ignored
45for the glob, and only the "per-file" owners are used for files matching that
46glob.
47
48Examples for all of these combinations can be found in tests/owners_unittest.py.
49"""
dpranke@chromium.org2a009622011-03-01 02:43:31 +000050
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +000051import collections
dpranke@chromium.orgc591a702012-12-20 20:14:58 +000052import random
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000053import re
54
55
56# If this is present by itself on a line, this means that everyone can review.
57EVERYONE = '*'
58
59
60# Recognizes 'X@Y' email addresses. Very simplistic.
61BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
dpranke@chromium.org2a009622011-03-01 02:43:31 +000062
dpranke@chromium.org2a009622011-03-01 02:43:31 +000063
dpranke@chromium.org923950f2011-03-17 23:40:00 +000064def _assert_is_collection(obj):
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000065 assert not isinstance(obj, basestring)
maruel@chromium.org725f1c32011-04-01 20:24:54 +000066 # Module 'collections' has no 'Iterable' member
67 # pylint: disable=E1101
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000068 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
69 assert (isinstance(obj, collections.Iterable) and
70 isinstance(obj, collections.Sized))
dpranke@chromium.org923950f2011-03-17 23:40:00 +000071
72
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000073class SyntaxErrorInOwnersFile(Exception):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000074 def __init__(self, path, lineno, msg):
75 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000076 self.path = path
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000077 self.lineno = lineno
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000078 self.msg = msg
79
80 def __str__(self):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000081 return "%s:%d syntax error: %s" % (self.path, self.lineno, self.msg)
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000082
83
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000084class Database(object):
85 """A database of OWNERS files for a repository.
86
87 This class allows you to find a suggested set of reviewers for a list
88 of changed files, and see if a list of changed files is covered by a
89 list of reviewers."""
90
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000091 def __init__(self, root, fopen, os_path, glob):
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000092 """Args:
dpranke@chromium.org2a009622011-03-01 02:43:31 +000093 root: the path to the root of the Repository
dpranke@chromium.org2a009622011-03-01 02:43:31 +000094 open: function callback to open a text file for reading
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000095 os_path: module/object callback with fields for 'abspath', 'dirname',
96 'exists', and 'join'
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000097 glob: function callback to list entries in a directory match a glob
98 (i.e., glob.glob)
dpranke@chromium.org2a009622011-03-01 02:43:31 +000099 """
100 self.root = root
101 self.fopen = fopen
102 self.os_path = os_path
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000103 self.glob = glob
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000104
dpranke@chromium.org627ea672011-03-11 23:29:03 +0000105 # Pick a default email regexp to use; callers can override as desired.
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000106 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000107
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000108 # Mapping of owners to the paths they own.
109 self.owned_by = {EVERYONE: set()}
110
111 # Mapping of paths to authorized owners.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000112 self.owners_for = {}
113
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000114 # Set of paths that stop us from looking above them for owners.
115 # (This is implicitly true for the root directory).
116 self.stop_looking = set([''])
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000117
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000118 def reviewers_for(self, files, author):
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000119 """Returns a suggested set of reviewers that will cover the files.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000120
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000121 files is a sequence of paths relative to (and under) self.root.
122 If author is nonempty, we ensure it is not included in the set returned
123 in order avoid suggesting the author as a reviewer for their own changes."""
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000124 self._check_paths(files)
125 self._load_data_needed_for(files)
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000126 suggested_owners = self._covering_set_of_owners_for(files, author)
dpranke@chromium.org9d66f482013-01-18 02:57:11 +0000127 if EVERYONE in suggested_owners:
128 if len(suggested_owners) > 1:
129 suggested_owners.remove(EVERYONE)
130 else:
131 suggested_owners = set(['<anyone>'])
132 return suggested_owners
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000133
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000134 def files_not_covered_by(self, files, reviewers):
135 """Returns the files not owned by one of the reviewers.
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000136
137 Args:
138 files is a sequence of paths relative to (and under) self.root.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000139 reviewers is a sequence of strings matching self.email_regexp.
140 """
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000141 self._check_paths(files)
142 self._check_reviewers(reviewers)
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000143 self._load_data_needed_for(files)
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000144
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000145 covered_objs = self._objs_covered_by(reviewers)
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000146 uncovered_files = [f for f in files
147 if not self._is_obj_covered_by(f, covered_objs)]
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000148
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000149 return set(uncovered_files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000150
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000151 def _check_paths(self, files):
152 def _is_under(f, pfx):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000153 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000154 _assert_is_collection(files)
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000155 assert all(not self.os_path.isabs(f) and
156 _is_under(f, self.os_path.abspath(self.root)) for f in files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000157
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000158 def _check_reviewers(self, reviewers):
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000159 _assert_is_collection(reviewers)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000160 assert all(self.email_regexp.match(r) for r in reviewers)
161
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000162 def _objs_covered_by(self, reviewers):
163 objs = self.owned_by[EVERYONE]
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000164 for r in reviewers:
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000165 objs = objs | self.owned_by.get(r, set())
166 return objs
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000167
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000168 def _stop_looking(self, objname):
169 return objname in self.stop_looking
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000170
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000171 def _is_obj_covered_by(self, objname, covered_objs):
172 while not objname in covered_objs and not self._stop_looking(objname):
173 objname = self.os_path.dirname(objname)
174 return objname in covered_objs
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000175
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000176 def _enclosing_dir_with_owners(self, objname):
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000177 """Returns the innermost enclosing directory that has an OWNERS file."""
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000178 dirpath = objname
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000179 while not dirpath in self.owners_for:
180 if self._stop_looking(dirpath):
181 break
182 dirpath = self.os_path.dirname(dirpath)
183 return dirpath
184
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000185 def _load_data_needed_for(self, files):
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000186 for f in files:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000187 dirpath = self.os_path.dirname(f)
188 while not dirpath in self.owners_for:
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000189 self._read_owners_in_dir(dirpath)
190 if self._stop_looking(dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000191 break
192 dirpath = self.os_path.dirname(dirpath)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000193
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000194 def _read_owners_in_dir(self, dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000195 owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
196 if not self.os_path.exists(owners_path):
197 return
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000198
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000199 lineno = 0
200 for line in self.fopen(owners_path):
201 lineno += 1
202 line = line.strip()
bauerb@chromium.org20d19432011-06-08 16:34:18 +0000203 if line.startswith('#') or line == '':
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000204 continue
205 if line == 'set noparent':
206 self.stop_looking.add(dirpath)
207 continue
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000208
209 m = re.match("per-file (.+)=(.+)", line)
210 if m:
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +0000211 glob_string = m.group(1).strip()
212 directive = m.group(2).strip()
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000213 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000214 if '/' in glob_string or '\\' in glob_string:
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000215 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000216 'per-file globs cannot span directories or use escapes: "%s"' %
217 line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000218 baselines = self.glob(full_glob_string)
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000219 for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000220 self._add_entry(baseline, directive, "per-file line",
221 owners_path, lineno)
222 continue
223
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000224 if line.startswith('set '):
225 raise SyntaxErrorInOwnersFile(owners_path, lineno,
226 'unknown option: "%s"' % line[4:].strip())
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000227
228 self._add_entry(dirpath, line, "line", owners_path, lineno)
229
230 def _add_entry(self, path, directive, line_type, owners_path, lineno):
231 if directive == "set noparent":
232 self.stop_looking.add(path)
233 elif self.email_regexp.match(directive) or directive == EVERYONE:
234 self.owned_by.setdefault(directive, set()).add(path)
235 self.owners_for.setdefault(path, set()).add(directive)
236 else:
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000237 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000238 ('%s is not a "set" directive, "*", '
239 'or an email address: "%s"' % (line_type, directive)))
240
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000241 def _covering_set_of_owners_for(self, files, author):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000242 dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000243 all_possible_owners = self._all_possible_owners(dirs_remaining, author)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000244 suggested_owners = set()
245 while dirs_remaining:
246 owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
247 suggested_owners.add(owner)
248 dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
249 dirs_remaining -= dirs_to_remove
250 return suggested_owners
dpranke@chromium.org5e5d37b2012-12-19 21:04:58 +0000251
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000252 def _all_possible_owners(self, dirs, author):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000253 """Returns a list of (potential owner, distance-from-dir) tuples; a
254 distance of 1 is the lowest/closest possible distance (which makes the
255 subsequent math easier)."""
256 all_possible_owners = {}
zork@chromium.org046e1752012-05-07 05:56:12 +0000257 for current_dir in dirs:
zork@chromium.org046e1752012-05-07 05:56:12 +0000258 dirname = current_dir
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000259 distance = 1
260 while True:
261 for owner in self.owners_for.get(dirname, []):
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000262 if author and owner == author:
263 continue
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000264 all_possible_owners.setdefault(owner, [])
265 # If the same person is in multiple OWNERS files above a given
266 # directory, only count the closest one.
267 if not any(current_dir == el[0] for el in all_possible_owners[owner]):
268 all_possible_owners[owner].append((current_dir, distance))
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000269 if self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000270 break
271 dirname = self.os_path.dirname(dirname)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000272 distance += 1
273 return all_possible_owners
zork@chromium.org046e1752012-05-07 05:56:12 +0000274
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000275 @staticmethod
276 def lowest_cost_owner(all_possible_owners, dirs):
277 # We want to minimize both the number of reviewers and the distance
278 # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
279 # an arbitrarily-selected scaling factor that seems to work well - it
280 # will select one reviewer in the parent directory over three reviewers
281 # in subdirs, but not one reviewer over just two.
282 total_costs_by_owner = {}
283 for owner in all_possible_owners:
284 total_distance = 0
285 num_directories_owned = 0
286 for dirname, distance in all_possible_owners[owner]:
287 if dirname in dirs:
288 total_distance += distance
289 num_directories_owned += 1
290 if num_directories_owned:
291 total_costs_by_owner[owner] = (total_distance /
292 pow(num_directories_owned, 1.75))
zork@chromium.org046e1752012-05-07 05:56:12 +0000293
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000294 # Return the lowest cost owner. In the case of a tie, pick one randomly.
295 lowest_cost = min(total_costs_by_owner.itervalues())
296 lowest_cost_owners = filter(
297 lambda owner: total_costs_by_owner[owner] == lowest_cost,
298 total_costs_by_owner)
299 return random.Random().choice(lowest_cost_owners)