blob: 30646ffa24dd203754a08abf5bba6d60f0f938cf [file] [log] [blame]
pam@chromium.orgf46aed92012-03-08 09:18:17 +00001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
dpranke@chromium.org2a009622011-03-01 02:43:31 +00002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
dpranke@chromium.org17cc2442012-10-17 21:12:09 +00005"""A database of OWNERS files.
6
7OWNERS files indicate who is allowed to approve changes in a specific directory
8(or who is allowed to make changes without needing approval of another OWNER).
9Note that all changes must still be reviewed by someone familiar with the code,
10so you may need approval from both an OWNER and a reviewer in many cases.
11
12The syntax of the OWNERS file is, roughly:
13
14lines := (\s* line? \s* "\n")*
15
16line := directive
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +000017 | "per-file" \s+ glob \s* "=" \s* directive
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000018 | comment
19
20directive := "set noparent"
21 | email_address
22 | "*"
23
24glob := [a-zA-Z0-9_-*?]+
25
26comment := "#" [^"\n"]*
27
28Email addresses must follow the foo@bar.com short form (exact syntax given
29in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
30shell conventions, and relative and absolute paths are not allowed (i.e.,
31globs only refer to the files in the current directory).
32
33If a user's email is one of the email_addresses in the file, the user is
34considered an "OWNER" for all files in the directory.
35
36If the "per-file" directive is used, the line only applies to files in that
37directory that match the filename glob specified.
38
39If the "set noparent" directive used, then only entries in this OWNERS file
40apply to files in this directory; if the "set noparent" directive is not
41used, then entries in OWNERS files in enclosing (upper) directories also
42apply (up until a "set noparent is encountered").
43
44If "per-file glob=set noparent" is used, then global directives are ignored
45for the glob, and only the "per-file" owners are used for files matching that
46glob.
47
48Examples for all of these combinations can be found in tests/owners_unittest.py.
49"""
dpranke@chromium.org2a009622011-03-01 02:43:31 +000050
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +000051import collections
dpranke@chromium.orgc591a702012-12-20 20:14:58 +000052import random
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000053import re
54
55
56# If this is present by itself on a line, this means that everyone can review.
57EVERYONE = '*'
58
59
60# Recognizes 'X@Y' email addresses. Very simplistic.
61BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
dpranke@chromium.org2a009622011-03-01 02:43:31 +000062
dpranke@chromium.org2a009622011-03-01 02:43:31 +000063
dpranke@chromium.org923950f2011-03-17 23:40:00 +000064def _assert_is_collection(obj):
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000065 assert not isinstance(obj, basestring)
maruel@chromium.org725f1c32011-04-01 20:24:54 +000066 # Module 'collections' has no 'Iterable' member
67 # pylint: disable=E1101
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000068 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
69 assert (isinstance(obj, collections.Iterable) and
70 isinstance(obj, collections.Sized))
dpranke@chromium.org923950f2011-03-17 23:40:00 +000071
72
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000073class SyntaxErrorInOwnersFile(Exception):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000074 def __init__(self, path, lineno, msg):
75 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000076 self.path = path
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000077 self.lineno = lineno
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000078 self.msg = msg
79
80 def __str__(self):
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +000081 return '%s:%d syntax error: %s' % (self.path, self.lineno, self.msg)
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000082
83
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000084class Database(object):
85 """A database of OWNERS files for a repository.
86
87 This class allows you to find a suggested set of reviewers for a list
88 of changed files, and see if a list of changed files is covered by a
89 list of reviewers."""
90
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000091 def __init__(self, root, fopen, os_path, glob):
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000092 """Args:
dpranke@chromium.org2a009622011-03-01 02:43:31 +000093 root: the path to the root of the Repository
dpranke@chromium.org2a009622011-03-01 02:43:31 +000094 open: function callback to open a text file for reading
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000095 os_path: module/object callback with fields for 'abspath', 'dirname',
96 'exists', and 'join'
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000097 glob: function callback to list entries in a directory match a glob
98 (i.e., glob.glob)
dpranke@chromium.org2a009622011-03-01 02:43:31 +000099 """
100 self.root = root
101 self.fopen = fopen
102 self.os_path = os_path
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000103 self.glob = glob
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000104
dpranke@chromium.org627ea672011-03-11 23:29:03 +0000105 # Pick a default email regexp to use; callers can override as desired.
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000106 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000107
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000108 # Mapping of owners to the paths they own.
109 self.owned_by = {EVERYONE: set()}
110
111 # Mapping of paths to authorized owners.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000112 self.owners_for = {}
113
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000114 # Mapping reviewers to the preceding comment per file in the OWNERS files.
115 self.comments = {}
116
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000117 # Set of paths that stop us from looking above them for owners.
118 # (This is implicitly true for the root directory).
119 self.stop_looking = set([''])
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000120
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000121 def reviewers_for(self, files, author):
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000122 """Returns a suggested set of reviewers that will cover the files.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000123
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000124 files is a sequence of paths relative to (and under) self.root.
125 If author is nonempty, we ensure it is not included in the set returned
126 in order avoid suggesting the author as a reviewer for their own changes."""
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000127 self._check_paths(files)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000128 self.load_data_needed_for(files)
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000129 suggested_owners = self._covering_set_of_owners_for(files, author)
dpranke@chromium.org9d66f482013-01-18 02:57:11 +0000130 if EVERYONE in suggested_owners:
131 if len(suggested_owners) > 1:
132 suggested_owners.remove(EVERYONE)
133 else:
134 suggested_owners = set(['<anyone>'])
135 return suggested_owners
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000136
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000137 def files_not_covered_by(self, files, reviewers):
138 """Returns the files not owned by one of the reviewers.
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000139
140 Args:
141 files is a sequence of paths relative to (and under) self.root.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000142 reviewers is a sequence of strings matching self.email_regexp.
143 """
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000144 self._check_paths(files)
145 self._check_reviewers(reviewers)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000146 self.load_data_needed_for(files)
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000147
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000148 covered_objs = self._objs_covered_by(reviewers)
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000149 uncovered_files = [f for f in files
150 if not self._is_obj_covered_by(f, covered_objs)]
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000151
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000152 return set(uncovered_files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000153
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000154 def _check_paths(self, files):
155 def _is_under(f, pfx):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000156 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000157 _assert_is_collection(files)
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000158 assert all(not self.os_path.isabs(f) and
159 _is_under(f, self.os_path.abspath(self.root)) for f in files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000160
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000161 def _check_reviewers(self, reviewers):
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000162 _assert_is_collection(reviewers)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000163 assert all(self.email_regexp.match(r) for r in reviewers)
164
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000165 def _objs_covered_by(self, reviewers):
166 objs = self.owned_by[EVERYONE]
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000167 for r in reviewers:
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000168 objs = objs | self.owned_by.get(r, set())
169 return objs
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000170
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000171 def _stop_looking(self, objname):
172 return objname in self.stop_looking
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000173
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000174 def _is_obj_covered_by(self, objname, covered_objs):
175 while not objname in covered_objs and not self._stop_looking(objname):
176 objname = self.os_path.dirname(objname)
177 return objname in covered_objs
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000178
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000179 def _enclosing_dir_with_owners(self, objname):
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000180 """Returns the innermost enclosing directory that has an OWNERS file."""
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000181 dirpath = objname
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000182 while not dirpath in self.owners_for:
183 if self._stop_looking(dirpath):
184 break
185 dirpath = self.os_path.dirname(dirpath)
186 return dirpath
187
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000188 def load_data_needed_for(self, files):
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000189 for f in files:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000190 dirpath = self.os_path.dirname(f)
191 while not dirpath in self.owners_for:
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000192 self._read_owners_in_dir(dirpath)
193 if self._stop_looking(dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000194 break
195 dirpath = self.os_path.dirname(dirpath)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000196
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000197 def _read_owners_in_dir(self, dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000198 owners_path = self.os_path.join(self.root, dirpath, 'OWNERS')
199 if not self.os_path.exists(owners_path):
200 return
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000201 comment = []
202 in_comment = False
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000203 lineno = 0
204 for line in self.fopen(owners_path):
205 lineno += 1
206 line = line.strip()
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000207 if line.startswith('#'):
208 if not in_comment:
209 comment = []
210 comment.append(line[1:].strip())
211 in_comment = True
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000212 continue
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000213 if line == '':
214 continue
215 in_comment = False
216
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000217 if line == 'set noparent':
218 self.stop_looking.add(dirpath)
219 continue
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000220
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000221 m = re.match('per-file (.+)=(.+)', line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000222 if m:
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +0000223 glob_string = m.group(1).strip()
224 directive = m.group(2).strip()
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000225 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000226 if '/' in glob_string or '\\' in glob_string:
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000227 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000228 'per-file globs cannot span directories or use escapes: "%s"' %
229 line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000230 baselines = self.glob(full_glob_string)
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000231 for baseline in (self.os_path.relpath(b, self.root) for b in baselines):
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000232 self._add_entry(baseline, directive, 'per-file line',
233 owners_path, lineno, '\n'.join(comment))
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000234 continue
235
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000236 if line.startswith('set '):
237 raise SyntaxErrorInOwnersFile(owners_path, lineno,
238 'unknown option: "%s"' % line[4:].strip())
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000239
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000240 self._add_entry(dirpath, line, 'line', owners_path, lineno,
241 ' '.join(comment))
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000242
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000243 def _add_entry(self, path, directive,
244 line_type, owners_path, lineno, comment):
245 if directive == 'set noparent':
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000246 self.stop_looking.add(path)
247 elif self.email_regexp.match(directive) or directive == EVERYONE:
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000248 self.comments.setdefault(directive, {})
249 self.comments[directive][path] = comment
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000250 self.owned_by.setdefault(directive, set()).add(path)
251 self.owners_for.setdefault(path, set()).add(directive)
252 else:
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000253 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000254 ('%s is not a "set" directive, "*", '
255 'or an email address: "%s"' % (line_type, directive)))
256
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000257 def _covering_set_of_owners_for(self, files, author):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000258 dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000259 all_possible_owners = self.all_possible_owners(dirs_remaining, author)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000260 suggested_owners = set()
261 while dirs_remaining:
262 owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
263 suggested_owners.add(owner)
264 dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
265 dirs_remaining -= dirs_to_remove
266 return suggested_owners
dpranke@chromium.org5e5d37b2012-12-19 21:04:58 +0000267
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000268 def all_possible_owners(self, dirs, author):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000269 """Returns a list of (potential owner, distance-from-dir) tuples; a
270 distance of 1 is the lowest/closest possible distance (which makes the
271 subsequent math easier)."""
272 all_possible_owners = {}
zork@chromium.org046e1752012-05-07 05:56:12 +0000273 for current_dir in dirs:
zork@chromium.org046e1752012-05-07 05:56:12 +0000274 dirname = current_dir
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000275 distance = 1
276 while True:
277 for owner in self.owners_for.get(dirname, []):
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000278 if author and owner == author:
279 continue
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000280 all_possible_owners.setdefault(owner, [])
281 # If the same person is in multiple OWNERS files above a given
282 # directory, only count the closest one.
283 if not any(current_dir == el[0] for el in all_possible_owners[owner]):
284 all_possible_owners[owner].append((current_dir, distance))
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000285 if self._stop_looking(dirname):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000286 break
287 dirname = self.os_path.dirname(dirname)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000288 distance += 1
289 return all_possible_owners
zork@chromium.org046e1752012-05-07 05:56:12 +0000290
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000291 @staticmethod
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000292 def total_costs_by_owner(all_possible_owners, dirs):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000293 # We want to minimize both the number of reviewers and the distance
294 # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
295 # an arbitrarily-selected scaling factor that seems to work well - it
296 # will select one reviewer in the parent directory over three reviewers
297 # in subdirs, but not one reviewer over just two.
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000298 result = {}
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000299 for owner in all_possible_owners:
300 total_distance = 0
301 num_directories_owned = 0
302 for dirname, distance in all_possible_owners[owner]:
303 if dirname in dirs:
304 total_distance += distance
305 num_directories_owned += 1
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000306 if num_directories_owned:
307 result[owner] = (total_distance /
308 pow(num_directories_owned, 1.75))
309 return result
zork@chromium.org046e1752012-05-07 05:56:12 +0000310
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000311 @staticmethod
312 def lowest_cost_owner(all_possible_owners, dirs):
313 total_costs_by_owner = Database.total_costs_by_owner(all_possible_owners,
314 dirs)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000315 # Return the lowest cost owner. In the case of a tie, pick one randomly.
316 lowest_cost = min(total_costs_by_owner.itervalues())
317 lowest_cost_owners = filter(
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000318 lambda owner: total_costs_by_owner[owner] == lowest_cost,
319 total_costs_by_owner)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000320 return random.Random().choice(lowest_cost_owners)