blob: 1b1775f08c48ecbbb720c7d622c30d5932e7f84e [file] [log] [blame]
pam@chromium.orgf46aed92012-03-08 09:18:17 +00001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
dpranke@chromium.org2a009622011-03-01 02:43:31 +00002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
dpranke@chromium.org17cc2442012-10-17 21:12:09 +00005"""A database of OWNERS files.
6
7OWNERS files indicate who is allowed to approve changes in a specific directory
8(or who is allowed to make changes without needing approval of another OWNER).
9Note that all changes must still be reviewed by someone familiar with the code,
10so you may need approval from both an OWNER and a reviewer in many cases.
11
12The syntax of the OWNERS file is, roughly:
13
14lines := (\s* line? \s* "\n")*
15
16line := directive
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +000017 | "per-file" \s+ glob \s* "=" \s* directive
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000018 | comment
19
20directive := "set noparent"
21 | email_address
22 | "*"
23
24glob := [a-zA-Z0-9_-*?]+
25
26comment := "#" [^"\n"]*
27
28Email addresses must follow the foo@bar.com short form (exact syntax given
29in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
30shell conventions, and relative and absolute paths are not allowed (i.e.,
31globs only refer to the files in the current directory).
32
33If a user's email is one of the email_addresses in the file, the user is
34considered an "OWNER" for all files in the directory.
35
36If the "per-file" directive is used, the line only applies to files in that
37directory that match the filename glob specified.
38
39If the "set noparent" directive used, then only entries in this OWNERS file
40apply to files in this directory; if the "set noparent" directive is not
41used, then entries in OWNERS files in enclosing (upper) directories also
42apply (up until a "set noparent is encountered").
43
44If "per-file glob=set noparent" is used, then global directives are ignored
45for the glob, and only the "per-file" owners are used for files matching that
46glob.
47
48Examples for all of these combinations can be found in tests/owners_unittest.py.
49"""
dpranke@chromium.org2a009622011-03-01 02:43:31 +000050
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +000051import collections
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000052import re
53
54
55# If this is present by itself on a line, this means that everyone can review.
56EVERYONE = '*'
57
58
59# Recognizes 'X@Y' email addresses. Very simplistic.
60BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
dpranke@chromium.org2a009622011-03-01 02:43:31 +000061
dpranke@chromium.org2a009622011-03-01 02:43:31 +000062
dpranke@chromium.org923950f2011-03-17 23:40:00 +000063def _assert_is_collection(obj):
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000064 assert not isinstance(obj, basestring)
maruel@chromium.org725f1c32011-04-01 20:24:54 +000065 # Module 'collections' has no 'Iterable' member
66 # pylint: disable=E1101
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000067 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
68 assert (isinstance(obj, collections.Iterable) and
69 isinstance(obj, collections.Sized))
dpranke@chromium.org923950f2011-03-17 23:40:00 +000070
71
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000072class SyntaxErrorInOwnersFile(Exception):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000073 def __init__(self, path, lineno, msg):
74 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000075 self.path = path
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000076 self.lineno = lineno
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000077 self.msg = msg
78
79 def __str__(self):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000080 return "%s:%d syntax error: %s" % (self.path, self.lineno, self.msg)
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000081
82
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000083class Database(object):
84 """A database of OWNERS files for a repository.
85
86 This class allows you to find a suggested set of reviewers for a list
87 of changed files, and see if a list of changed files is covered by a
88 list of reviewers."""
89
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000090 def __init__(self, root, fopen, os_path, glob):
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000091 """Args:
dpranke@chromium.org2a009622011-03-01 02:43:31 +000092 root: the path to the root of the Repository
dpranke@chromium.org2a009622011-03-01 02:43:31 +000093 open: function callback to open a text file for reading
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000094 os_path: module/object callback with fields for 'abspath', 'dirname',
95 'exists', and 'join'
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000096 glob: function callback to list entries in a directory match a glob
97 (i.e., glob.glob)
dpranke@chromium.org2a009622011-03-01 02:43:31 +000098 """
99 self.root = root
100 self.fopen = fopen
101 self.os_path = os_path
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000102 self.glob = glob
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000103
dpranke@chromium.org627ea672011-03-11 23:29:03 +0000104 # Pick a default email regexp to use; callers can override as desired.
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000105 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000106
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000107 # Mapping of owners to the paths they own.
108 self.owned_by = {EVERYONE: set()}
109
110 # Mapping of paths to authorized owners.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000111 self.owners_for = {}
112
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000113 # Set of paths that stop us from looking above them for owners.
114 # (This is implicitly true for the root directory).
115 self.stop_looking = set([''])
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000116
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000117 def reviewers_for(self, files):
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000118 """Returns a suggested set of reviewers that will cover the files.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000119
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000120 files is a sequence of paths relative to (and under) self.root."""
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000121 self._check_paths(files)
122 self._load_data_needed_for(files)
123 return self._covering_set_of_owners_for(files)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000124
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000125 # TODO(dpranke): rename to objects_not_covered_by
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000126 def directories_not_covered_by(self, files, reviewers):
127 """Returns the set of directories that are not owned by a reviewer.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000128
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000129 Determines which of the given files are not owned by at least one of the
130 reviewers, then returns a set containing the applicable enclosing
131 directories, i.e. the ones upward from the files that have OWNERS files.
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000132
133 Args:
134 files is a sequence of paths relative to (and under) self.root.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000135 reviewers is a sequence of strings matching self.email_regexp.
136 """
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000137 self._check_paths(files)
138 self._check_reviewers(reviewers)
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000139 self._load_data_needed_for(files)
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000140
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000141 objs = set()
142 for f in files:
143 if f in self.owners_for:
144 objs.add(f)
145 else:
146 objs.add(self.os_path.dirname(f))
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000147
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000148 covered_objs = self._objs_covered_by(reviewers)
149 uncovered_objs = [self._enclosing_obj_with_owners(o) for o in objs
150 if not self._is_obj_covered_by(o, covered_objs)]
151
152 return set(uncovered_objs)
153
154 objects_not_covered_by = directories_not_covered_by
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000155
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000156 def _check_paths(self, files):
157 def _is_under(f, pfx):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000158 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000159 _assert_is_collection(files)
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000160 assert all(not self.os_path.isabs(f) and
161 _is_under(f, self.os_path.abspath(self.root)) for f in files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000162
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000163 def _check_reviewers(self, reviewers):
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000164 _assert_is_collection(reviewers)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000165 assert all(self.email_regexp.match(r) for r in reviewers)
166
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000167 # TODO(dpranke): Rename to _objs_covered_by and update_callers
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000168 def _dirs_covered_by(self, reviewers):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000169 dirs = self.owned_by[EVERYONE]
170 for r in reviewers:
171 dirs = dirs | self.owned_by.get(r, set())
172 return dirs
173
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000174 _objs_covered_by = _dirs_covered_by
175
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000176 def _stop_looking(self, dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000177 return dirname in self.stop_looking
178
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000179 # TODO(dpranke): Rename to _is_dir_covered_by and update callers.
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000180 def _is_dir_covered_by(self, dirname, covered_dirs):
181 while not dirname in covered_dirs and not self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000182 dirname = self.os_path.dirname(dirname)
183 return dirname in covered_dirs
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000184
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000185 _is_obj_covered_by = _is_dir_covered_by
186
187 # TODO(dpranke): Rename to _enclosing_obj_with_owners and update callers.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000188 def _enclosing_dir_with_owners(self, directory):
189 """Returns the innermost enclosing directory that has an OWNERS file."""
190 dirpath = directory
191 while not dirpath in self.owners_for:
192 if self._stop_looking(dirpath):
193 break
194 dirpath = self.os_path.dirname(dirpath)
195 return dirpath
196
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000197 _enclosing_obj_with_owners = _enclosing_dir_with_owners
198
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000199 def _load_data_needed_for(self, files):
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000200 for f in files:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000201 dirpath = self.os_path.dirname(f)
202 while not dirpath in self.owners_for:
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000203 self._read_owners_in_dir(dirpath)
204 if self._stop_looking(dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000205 break
206 dirpath = self.os_path.dirname(dirpath)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000207
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000208 def _read_owners_in_dir(self, dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000209 owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
210 if not self.os_path.exists(owners_path):
211 return
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000212
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000213 lineno = 0
214 for line in self.fopen(owners_path):
215 lineno += 1
216 line = line.strip()
bauerb@chromium.org20d19432011-06-08 16:34:18 +0000217 if line.startswith('#') or line == '':
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000218 continue
219 if line == 'set noparent':
220 self.stop_looking.add(dirpath)
221 continue
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000222
223 m = re.match("per-file (.+)=(.+)", line)
224 if m:
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +0000225 glob_string = m.group(1).strip()
226 directive = m.group(2).strip()
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000227 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000228 if '/' in glob_string or '\\' in glob_string:
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000229 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000230 'per-file globs cannot span directories or use escapes: "%s"' %
231 line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000232 baselines = self.glob(full_glob_string)
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000233 for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000234 self._add_entry(baseline, directive, "per-file line",
235 owners_path, lineno)
236 continue
237
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000238 if line.startswith('set '):
239 raise SyntaxErrorInOwnersFile(owners_path, lineno,
240 'unknown option: "%s"' % line[4:].strip())
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000241
242 self._add_entry(dirpath, line, "line", owners_path, lineno)
243
244 def _add_entry(self, path, directive, line_type, owners_path, lineno):
245 if directive == "set noparent":
246 self.stop_looking.add(path)
247 elif self.email_regexp.match(directive) or directive == EVERYONE:
248 self.owned_by.setdefault(directive, set()).add(path)
249 self.owners_for.setdefault(path, set()).add(directive)
250 else:
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000251 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000252 ('%s is not a "set" directive, "*", '
253 'or an email address: "%s"' % (line_type, directive)))
254
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000255
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000256 def _covering_set_of_owners_for(self, files):
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000257 # Get the set of directories from the files.
258 dirs = set()
259 for f in files:
260 dirs.add(self._enclosing_dir_with_owners(f))
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000261
zork@chromium.org046e1752012-05-07 05:56:12 +0000262
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000263 owned_dirs = {}
264 dir_owners = {}
zork@chromium.org046e1752012-05-07 05:56:12 +0000265
266 for current_dir in dirs:
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000267 # Get the list of owners for each directory.
268 current_owners = set()
zork@chromium.org046e1752012-05-07 05:56:12 +0000269 dirname = current_dir
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000270 while dirname in self.owners_for:
271 current_owners |= self.owners_for[dirname]
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000272 if self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000273 break
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000274 prev_parent = dirname
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000275 dirname = self.os_path.dirname(dirname)
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000276 if prev_parent == dirname:
277 break
zork@chromium.org046e1752012-05-07 05:56:12 +0000278
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000279 # Map each directory to a list of its owners.
280 dir_owners[current_dir] = current_owners
zork@chromium.org046e1752012-05-07 05:56:12 +0000281
dpranke@chromium.org13f24eb2012-12-19 00:03:27 +0000282 # Add the directory to the list of each owner.
283 for owner in current_owners:
284 owned_dirs.setdefault(owner, set()).add(current_dir)
285
286 final_owners = set()
287 while dirs:
288 # Find the owner that has the most directories.
289 max_count = 0
290 max_owner = None
291 owner_count = {}
292 for dirname in dirs:
293 for owner in dir_owners[dirname]:
294 count = owner_count.get(owner, 0) + 1
295 owner_count[owner] = count
296 if count >= max_count:
297 max_owner = owner
298 max_count = count
299
300 # If no more directories have OWNERS, we're done.
301 if not max_owner:
302 break
303
304 final_owners.add(max_owner)
305
306 # Remove all directories owned by the current owner from the remaining
307 # list.
308 for dirname in owned_dirs[max_owner]:
309 dirs.discard(dirname)
310
311 return final_owners