blob: 7fa69e365e91b24f7de8bb9683d65fb5059ff322 [file] [log] [blame]
pam@chromium.orgf46aed92012-03-08 09:18:17 +00001# Copyright (c) 2012 The Chromium Authors. All rights reserved.
dpranke@chromium.org2a009622011-03-01 02:43:31 +00002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
dpranke@chromium.org17cc2442012-10-17 21:12:09 +00005"""A database of OWNERS files.
6
7OWNERS files indicate who is allowed to approve changes in a specific directory
8(or who is allowed to make changes without needing approval of another OWNER).
9Note that all changes must still be reviewed by someone familiar with the code,
10so you may need approval from both an OWNER and a reviewer in many cases.
11
12The syntax of the OWNERS file is, roughly:
13
Daniel Cheng74fda712018-09-05 03:56:39 +000014lines := (\s* line? \s* comment? \s* "\n")*
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000015
Daniel Cheng74fda712018-09-05 03:56:39 +000016line := directive
17 | "per-file" \s+ glob \s* "=" \s* directive
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000018
Daniel Cheng74fda712018-09-05 03:56:39 +000019directive := "set noparent"
20 | "file:" owner_file
21 | email_address
22 | "*"
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000023
Daniel Cheng74fda712018-09-05 03:56:39 +000024glob := [a-zA-Z0-9_-*?]+
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000025
Daniel Cheng74fda712018-09-05 03:56:39 +000026comment := "#" [^"\n"]*
27
28owner_file := "OWNERS"
29 | [^"\n"]* "_OWNERS"
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000030
31Email addresses must follow the foo@bar.com short form (exact syntax given
32in BASIC_EMAIL_REGEXP, below). Filename globs follow the simple unix
33shell conventions, and relative and absolute paths are not allowed (i.e.,
34globs only refer to the files in the current directory).
35
36If a user's email is one of the email_addresses in the file, the user is
37considered an "OWNER" for all files in the directory.
38
39If the "per-file" directive is used, the line only applies to files in that
40directory that match the filename glob specified.
41
42If the "set noparent" directive used, then only entries in this OWNERS file
43apply to files in this directory; if the "set noparent" directive is not
44used, then entries in OWNERS files in enclosing (upper) directories also
45apply (up until a "set noparent is encountered").
46
47If "per-file glob=set noparent" is used, then global directives are ignored
48for the glob, and only the "per-file" owners are used for files matching that
49glob.
50
peter@chromium.org2ce13132015-04-16 16:42:08 +000051If the "file:" directive is used, the referred to OWNERS file will be parsed and
52considered when determining the valid set of OWNERS. If the filename starts with
53"//" it is relative to the root of the repository, otherwise it is relative to
Daniel Cheng74fda712018-09-05 03:56:39 +000054the current file. The referred to file *must* be named OWNERS or end in a suffix
55of _OWNERS.
peter@chromium.org2ce13132015-04-16 16:42:08 +000056
dpranke@chromium.org17cc2442012-10-17 21:12:09 +000057Examples for all of these combinations can be found in tests/owners_unittest.py.
58"""
dpranke@chromium.org2a009622011-03-01 02:43:31 +000059
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +000060import collections
dtu944b6052016-07-14 14:48:21 -070061import fnmatch
dpranke@chromium.orgc591a702012-12-20 20:14:58 +000062import random
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +000063import re
64
65
66# If this is present by itself on a line, this means that everyone can review.
67EVERYONE = '*'
68
69
70# Recognizes 'X@Y' email addresses. Very simplistic.
71BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
dpranke@chromium.org2a009622011-03-01 02:43:31 +000072
dpranke@chromium.org2a009622011-03-01 02:43:31 +000073
Jochen Eisinger72606f82017-04-04 10:44:18 +020074# Key for global comments per email address. Should be unlikely to be a
75# pathname.
76GLOBAL_STATUS = '*'
77
78
dpranke@chromium.org923950f2011-03-17 23:40:00 +000079def _assert_is_collection(obj):
Edward Lemur14705d82019-10-30 22:17:10 +000080 assert not isinstance(obj, str)
maruel@chromium.org725f1c32011-04-01 20:24:54 +000081 # Module 'collections' has no 'Iterable' member
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080082 # pylint: disable=no-member
dpranke@chromium.orge6a4ab32011-03-31 01:23:08 +000083 if hasattr(collections, 'Iterable') and hasattr(collections, 'Sized'):
84 assert (isinstance(obj, collections.Iterable) and
85 isinstance(obj, collections.Sized))
dpranke@chromium.org923950f2011-03-17 23:40:00 +000086
87
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000088class SyntaxErrorInOwnersFile(Exception):
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000089 def __init__(self, path, lineno, msg):
90 super(SyntaxErrorInOwnersFile, self).__init__((path, lineno, msg))
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000091 self.path = path
dpranke@chromium.org86bbf192011-03-09 21:37:06 +000092 self.lineno = lineno
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000093 self.msg = msg
94
95 def __str__(self):
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +000096 return '%s:%d syntax error: %s' % (self.path, self.lineno, self.msg)
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000097
98
dpranke@chromium.org898a10e2011-03-04 21:54:43 +000099class Database(object):
100 """A database of OWNERS files for a repository.
101
102 This class allows you to find a suggested set of reviewers for a list
103 of changed files, and see if a list of changed files is covered by a
104 list of reviewers."""
105
Jochen Eisingereb744762017-04-05 11:00:05 +0200106 def __init__(self, root, fopen, os_path):
dpranke@chromium.org898a10e2011-03-04 21:54:43 +0000107 """Args:
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000108 root: the path to the root of the Repository
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000109 open: function callback to open a text file for reading
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000110 os_path: module/object callback with fields for 'abspath', 'dirname',
mbjorgef2d73522016-07-14 13:28:59 -0700111 'exists', 'join', and 'relpath'
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000112 """
113 self.root = root
114 self.fopen = fopen
115 self.os_path = os_path
116
dpranke@chromium.org627ea672011-03-11 23:29:03 +0000117 # Pick a default email regexp to use; callers can override as desired.
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000118 self.email_regexp = re.compile(BASIC_EMAIL_REGEXP)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000119
Jochen Eisingerd0573ec2017-04-13 10:55:06 +0200120 # Replacement contents for the given files. Maps the file name of an
121 # OWNERS file (relative to root) to an iterator returning the replacement
122 # file contents.
123 self.override_files = {}
124
dtu944b6052016-07-14 14:48:21 -0700125 # Mapping of owners to the paths or globs they own.
126 self._owners_to_paths = {EVERYONE: set()}
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000127
Daniel Bratelld6bf5172019-05-21 07:20:12 +0000128 # Mappings of directories -> globs in the directory -> owners
129 # Example: "chrome/browser" -> "chrome/browser/*.h" -> ("john", "maria")
dtu944b6052016-07-14 14:48:21 -0700130 self._paths_to_owners = {}
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000131
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000132 # Mapping reviewers to the preceding comment per file in the OWNERS files.
133 self.comments = {}
134
nick7e16cf32016-09-16 16:05:05 -0700135 # Cache of compiled regexes for _fnmatch()
136 self._fnmatch_cache = {}
137
Daniel Bratellb2b66992019-04-25 15:19:33 +0000138 # Sets of paths that stop us from looking above them for owners.
Daniel Bratelld6bf5172019-05-21 07:20:12 +0000139 # (This is implicitly true for the root directory).
140 #
141 # The implementation is a mapping:
142 # Directory -> globs in the directory,
143 #
144 # Example:
145 # 'ui/events/devices/mojo' -> 'ui/events/devices/mojo/*_struct_traits*.*'
Daniel Bratellb2b66992019-04-25 15:19:33 +0000146 self._stop_looking = {'': set([''])}
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000147
peter@chromium.org2ce13132015-04-16 16:42:08 +0000148 # Set of files which have already been read.
149 self.read_files = set()
150
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800151 # Set of files which were included from other files. Files are processed
152 # differently depending on whether they are regular owners files or
153 # being included from another file.
154 self._included_files = {}
155
Jochen Eisingereb744762017-04-05 11:00:05 +0200156 # File with global status lines for owners.
157 self._status_file = None
158
Daniel Cheng24bca4e2018-11-01 04:11:41 +0000159 def _file_affects_ownership(self, path):
160 """Returns true if the path refers to a file that could affect ownership."""
161 filename = self.os_path.split(path)[-1]
162 return filename == 'OWNERS' or filename.endswith('_OWNERS')
163
164
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000165 def reviewers_for(self, files, author):
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000166 """Returns a suggested set of reviewers that will cover the files.
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000167
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000168 files is a sequence of paths relative to (and under) self.root.
169 If author is nonempty, we ensure it is not included in the set returned
170 in order avoid suggesting the author as a reviewer for their own changes."""
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000171 self._check_paths(files)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000172 self.load_data_needed_for(files)
dtu944b6052016-07-14 14:48:21 -0700173
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000174 suggested_owners = self._covering_set_of_owners_for(files, author)
dpranke@chromium.org9d66f482013-01-18 02:57:11 +0000175 if EVERYONE in suggested_owners:
176 if len(suggested_owners) > 1:
177 suggested_owners.remove(EVERYONE)
178 else:
179 suggested_owners = set(['<anyone>'])
180 return suggested_owners
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000181
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000182 def files_not_covered_by(self, files, reviewers):
183 """Returns the files not owned by one of the reviewers.
dpranke@chromium.orgfdecfb72011-03-16 23:27:23 +0000184
185 Args:
186 files is a sequence of paths relative to (and under) self.root.
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000187 reviewers is a sequence of strings matching self.email_regexp.
188 """
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000189 self._check_paths(files)
190 self._check_reviewers(reviewers)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000191 self.load_data_needed_for(files)
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000192
dtu944b6052016-07-14 14:48:21 -0700193 return set(f for f in files if not self._is_obj_covered_by(f, reviewers))
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000194
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000195 def _check_paths(self, files):
196 def _is_under(f, pfx):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000197 return self.os_path.abspath(self.os_path.join(pfx, f)).startswith(pfx)
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000198 _assert_is_collection(files)
dpranke@chromium.orgb54a78e2012-12-13 23:37:23 +0000199 assert all(not self.os_path.isabs(f) and
200 _is_under(f, self.os_path.abspath(self.root)) for f in files)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000201
dpranke@chromium.org7eea2592011-03-09 21:35:46 +0000202 def _check_reviewers(self, reviewers):
dpranke@chromium.org923950f2011-03-17 23:40:00 +0000203 _assert_is_collection(reviewers)
Gabriel Charette9df9e9f2017-06-14 15:44:50 -0400204 assert all(self.email_regexp.match(r) for r in reviewers), reviewers
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000205
dtu944b6052016-07-14 14:48:21 -0700206 def _is_obj_covered_by(self, objname, reviewers):
207 reviewers = list(reviewers) + [EVERYONE]
208 while True:
209 for reviewer in reviewers:
210 for owned_pattern in self._owners_to_paths.get(reviewer, set()):
211 if fnmatch.fnmatch(objname, owned_pattern):
212 return True
213 if self._should_stop_looking(objname):
214 break
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000215 objname = self.os_path.dirname(objname)
dtu944b6052016-07-14 14:48:21 -0700216 return False
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000217
Francois Dorayd42c6812017-05-30 15:10:20 -0400218 def enclosing_dir_with_owners(self, objname):
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000219 """Returns the innermost enclosing directory that has an OWNERS file."""
dpranke@chromium.org6b1e3ee2013-02-23 00:06:38 +0000220 dirpath = objname
dtu944b6052016-07-14 14:48:21 -0700221 while not self._owners_for(dirpath):
222 if self._should_stop_looking(dirpath):
pam@chromium.orgf46aed92012-03-08 09:18:17 +0000223 break
224 dirpath = self.os_path.dirname(dirpath)
225 return dirpath
226
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000227 def load_data_needed_for(self, files):
Jochen Eisinger72606f82017-04-04 10:44:18 +0200228 self._read_global_comments()
Daniel Bratellb2b66992019-04-25 15:19:33 +0000229 visited_dirs = set()
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000230 for f in files:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000231 dirpath = self.os_path.dirname(f)
Daniel Bratellb2b66992019-04-25 15:19:33 +0000232 while dirpath not in visited_dirs:
233 visited_dirs.add(dirpath)
234
235 obj_owners = self._owners_for(dirpath)
236 if obj_owners:
237 break
peter@chromium.org2ce13132015-04-16 16:42:08 +0000238 self._read_owners(self.os_path.join(dirpath, 'OWNERS'))
dtu944b6052016-07-14 14:48:21 -0700239 if self._should_stop_looking(dirpath):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000240 break
Daniel Bratellb2b66992019-04-25 15:19:33 +0000241
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000242 dirpath = self.os_path.dirname(dirpath)
dpranke@chromium.org2a009622011-03-01 02:43:31 +0000243
dtu944b6052016-07-14 14:48:21 -0700244 def _should_stop_looking(self, objname):
Daniel Bratellb2b66992019-04-25 15:19:33 +0000245 dirname = objname
246 while True:
247 if dirname in self._stop_looking:
248 if any(self._fnmatch(objname, stop_looking)
249 for stop_looking in self._stop_looking[dirname]):
250 return True
251 up_dirname = self.os_path.dirname(dirname)
252 if up_dirname == dirname:
253 break
254 dirname = up_dirname
255 return False
256
257 def _get_root_affected_dir(self, obj_name):
258 """Returns the deepest directory/path that is affected by a file pattern
259 |obj_name|."""
260 root_affected_dir = obj_name
261 while '*' in root_affected_dir:
262 root_affected_dir = self.os_path.dirname(root_affected_dir)
263 return root_affected_dir
dtu944b6052016-07-14 14:48:21 -0700264
265 def _owners_for(self, objname):
266 obj_owners = set()
Daniel Bratellb2b66992019-04-25 15:19:33 +0000267
268 # Possibly relevant rules can be found stored at every directory
269 # level so iterate upwards, looking for them.
270 dirname = objname
271 while True:
272 dir_owner_rules = self._paths_to_owners.get(dirname)
273 if dir_owner_rules:
Marc-Antoine Ruel8e57b4b2019-10-11 01:01:36 +0000274 for owned_path, path_owners in dir_owner_rules.items():
Daniel Bratellb2b66992019-04-25 15:19:33 +0000275 if self._fnmatch(objname, owned_path):
276 obj_owners |= path_owners
277 up_dirname = self.os_path.dirname(dirname)
278 if up_dirname == dirname:
279 break
280 dirname = up_dirname
281
dtu944b6052016-07-14 14:48:21 -0700282 return obj_owners
283
peter@chromium.org2ce13132015-04-16 16:42:08 +0000284 def _read_owners(self, path):
285 owners_path = self.os_path.join(self.root, path)
Jochen Eisingere3991bc2017-11-05 13:18:58 -0800286 if not (self.os_path.exists(owners_path) or (path in self.override_files)):
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000287 return
peter@chromium.org2ce13132015-04-16 16:42:08 +0000288
289 if owners_path in self.read_files:
290 return
291
292 self.read_files.add(owners_path)
293
Jochen Eisingereb744762017-04-05 11:00:05 +0200294 is_toplevel = path == 'OWNERS'
295
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000296 comment = []
peter@chromium.org2ce13132015-04-16 16:42:08 +0000297 dirpath = self.os_path.dirname(path)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000298 in_comment = False
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200299 # We treat the beginning of the file as an blank line.
300 previous_line_was_blank = True
301 reset_comment_after_use = False
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000302 lineno = 0
Jochen Eisingerd0573ec2017-04-13 10:55:06 +0200303
304 if path in self.override_files:
305 file_iter = self.override_files[path]
306 else:
307 file_iter = self.fopen(owners_path)
308
309 for line in file_iter:
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000310 lineno += 1
311 line = line.strip()
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000312 if line.startswith('#'):
Jochen Eisingereb744762017-04-05 11:00:05 +0200313 if is_toplevel:
Bruce Dawson9c062012019-05-02 19:20:28 +0000314 m = re.match(r'#\s*OWNERS_STATUS\s+=\s+(.+)$', line)
Jochen Eisingereb744762017-04-05 11:00:05 +0200315 if m:
316 self._status_file = m.group(1).strip()
317 continue
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000318 if not in_comment:
319 comment = []
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200320 reset_comment_after_use = not previous_line_was_blank
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000321 comment.append(line[1:].strip())
322 in_comment = True
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000323 continue
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000324 in_comment = False
325
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200326 if line == '':
327 comment = []
328 previous_line_was_blank = True
329 continue
330
Edward Lesmes5c62ed52018-04-19 16:47:15 -0400331 # If the line ends with a comment, strip the comment and store it for this
332 # line only.
333 line, _, line_comment = line.partition('#')
334 line = line.strip()
335 line_comment = [line_comment.strip()] if line_comment else []
336
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200337 previous_line_was_blank = False
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000338 if line == 'set noparent':
Daniel Bratellb2b66992019-04-25 15:19:33 +0000339 self._stop_looking.setdefault(
340 self._get_root_affected_dir(dirpath), set()).add(dirpath)
dpranke@chromium.org6dada4e2011-03-08 22:32:40 +0000341 continue
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000342
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000343 m = re.match('per-file (.+)=(.+)', line)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000344 if m:
dpranke@chromium.orgd16e48b2012-12-03 21:53:49 +0000345 glob_string = m.group(1).strip()
346 directive = m.group(2).strip()
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000347 full_glob_string = self.os_path.join(self.root, dirpath, glob_string)
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000348 if '/' in glob_string or '\\' in glob_string:
dpranke@chromium.orge3b1c3d2012-10-20 22:28:14 +0000349 raise SyntaxErrorInOwnersFile(owners_path, lineno,
dpranke@chromium.org9e227d52012-10-20 23:47:42 +0000350 'per-file globs cannot span directories or use escapes: "%s"' %
351 line)
dtu944b6052016-07-14 14:48:21 -0700352 relative_glob_string = self.os_path.relpath(full_glob_string, self.root)
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800353 self._add_entry(relative_glob_string, directive, owners_path,
Edward Lesmes5c62ed52018-04-19 16:47:15 -0400354 lineno, '\n'.join(comment + line_comment))
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200355 if reset_comment_after_use:
356 comment = []
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000357 continue
358
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000359 if line.startswith('set '):
360 raise SyntaxErrorInOwnersFile(owners_path, lineno,
361 'unknown option: "%s"' % line[4:].strip())
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000362
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800363 self._add_entry(dirpath, line, owners_path, lineno,
Edward Lesmes5c62ed52018-04-19 16:47:15 -0400364 ' '.join(comment + line_comment))
Jochen Eisingerb624bfe2017-04-19 14:55:34 +0200365 if reset_comment_after_use:
366 comment = []
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000367
Jochen Eisinger72606f82017-04-04 10:44:18 +0200368 def _read_global_comments(self):
Jochen Eisingereb744762017-04-05 11:00:05 +0200369 if not self._status_file:
370 if not 'OWNERS' in self.read_files:
371 self._read_owners('OWNERS')
372 if not self._status_file:
373 return
Jochen Eisinger72606f82017-04-04 10:44:18 +0200374
Jochen Eisingereb744762017-04-05 11:00:05 +0200375 owners_status_path = self.os_path.join(self.root, self._status_file)
Jochen Eisinger72606f82017-04-04 10:44:18 +0200376 if not self.os_path.exists(owners_status_path):
Jochen Eisingereb744762017-04-05 11:00:05 +0200377 raise IOError('Could not find global status file "%s"' %
Jochen Eisinger72606f82017-04-04 10:44:18 +0200378 owners_status_path)
379
380 if owners_status_path in self.read_files:
381 return
382
383 self.read_files.add(owners_status_path)
384
385 lineno = 0
386 for line in self.fopen(owners_status_path):
387 lineno += 1
388 line = line.strip()
389 if line.startswith('#'):
390 continue
391 if line == '':
392 continue
393
394 m = re.match('(.+?):(.+)', line)
395 if m:
396 owner = m.group(1).strip()
397 comment = m.group(2).strip()
398 if not self.email_regexp.match(owner):
399 raise SyntaxErrorInOwnersFile(owners_status_path, lineno,
400 'invalid email address: "%s"' % owner)
401
402 self.comments.setdefault(owner, {})
403 self.comments[owner][GLOBAL_STATUS] = comment
404 continue
405
406 raise SyntaxErrorInOwnersFile(owners_status_path, lineno,
407 'cannot parse status entry: "%s"' % line.strip())
408
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800409 def _add_entry(self, owned_paths, directive, owners_path, lineno, comment):
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000410 if directive == 'set noparent':
Daniel Bratellb2b66992019-04-25 15:19:33 +0000411 self._stop_looking.setdefault(
412 self._get_root_affected_dir(owned_paths), set()).add(owned_paths)
peter@chromium.org2ce13132015-04-16 16:42:08 +0000413 elif directive.startswith('file:'):
Daniel Cheng74fda712018-09-05 03:56:39 +0000414 include_file = self._resolve_include(directive[5:], owners_path, lineno)
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800415 if not include_file:
peter@chromium.org2ce13132015-04-16 16:42:08 +0000416 raise SyntaxErrorInOwnersFile(owners_path, lineno,
417 ('%s does not refer to an existing file.' % directive[5:]))
418
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800419 included_owners = self._read_just_the_owners(include_file)
420 for owner in included_owners:
421 self._owners_to_paths.setdefault(owner, set()).add(owned_paths)
Daniel Bratellb2b66992019-04-25 15:19:33 +0000422 self._paths_to_owners.setdefault(
423 self._get_root_affected_dir(owned_paths), {}).setdefault(
424 owned_paths, set()).add(owner)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000425 elif self.email_regexp.match(directive) or directive == EVERYONE:
Jochen Eisinger72606f82017-04-04 10:44:18 +0200426 if comment:
427 self.comments.setdefault(directive, {})
428 self.comments[directive][owned_paths] = comment
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800429 self._owners_to_paths.setdefault(directive, set()).add(owned_paths)
Daniel Bratellb2b66992019-04-25 15:19:33 +0000430 self._paths_to_owners.setdefault(
431 self._get_root_affected_dir(owned_paths), {}).setdefault(
432 owned_paths, set()).add(directive)
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000433 else:
dpranke@chromium.org86bbf192011-03-09 21:37:06 +0000434 raise SyntaxErrorInOwnersFile(owners_path, lineno,
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800435 ('"%s" is not a "set noparent", file include, "*", '
436 'or an email address.' % (directive,)))
dpranke@chromium.org17cc2442012-10-17 21:12:09 +0000437
Daniel Cheng74fda712018-09-05 03:56:39 +0000438 def _resolve_include(self, path, start, lineno):
peter@chromium.org2ce13132015-04-16 16:42:08 +0000439 if path.startswith('//'):
440 include_path = path[2:]
441 else:
442 assert start.startswith(self.root)
mbjorgef2d73522016-07-14 13:28:59 -0700443 start = self.os_path.dirname(self.os_path.relpath(start, self.root))
Michael Achenbachff46da82019-10-21 19:40:10 +0000444 include_path = self.os_path.normpath(self.os_path.join(start, path))
peter@chromium.org2ce13132015-04-16 16:42:08 +0000445
Jochen Eisingere3991bc2017-11-05 13:18:58 -0800446 if include_path in self.override_files:
447 return include_path
448
peter@chromium.org2ce13132015-04-16 16:42:08 +0000449 owners_path = self.os_path.join(self.root, include_path)
Daniel Cheng74fda712018-09-05 03:56:39 +0000450 # Paths included via "file:" must end in OWNERS or _OWNERS. Files that can
451 # affect ownership have a different set of ownership rules, so that users
452 # cannot self-approve changes adding themselves to an OWNERS file.
Daniel Cheng24bca4e2018-11-01 04:11:41 +0000453 if not self._file_affects_ownership(owners_path):
Daniel Cheng74fda712018-09-05 03:56:39 +0000454 raise SyntaxErrorInOwnersFile(start, lineno, 'file: include must specify '
455 'a file named OWNERS or ending in _OWNERS')
456
peter@chromium.org2ce13132015-04-16 16:42:08 +0000457 if not self.os_path.exists(owners_path):
458 return None
459
460 return include_path
461
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800462 def _read_just_the_owners(self, include_file):
463 if include_file in self._included_files:
464 return self._included_files[include_file]
465
466 owners = set()
467 self._included_files[include_file] = owners
468 lineno = 0
Jochen Eisingere3991bc2017-11-05 13:18:58 -0800469 if include_file in self.override_files:
470 file_iter = self.override_files[include_file]
471 else:
472 file_iter = self.fopen(self.os_path.join(self.root, include_file))
473 for line in file_iter:
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800474 lineno += 1
475 line = line.strip()
476 if (line.startswith('#') or line == '' or
477 line.startswith('set noparent') or
478 line.startswith('per-file')):
479 continue
480
John Budorick7f75c0e2019-08-23 22:51:00 +0000481 # If the line ends with a comment, strip the comment.
482 line, _delim, _comment = line.partition('#')
483 line = line.strip()
484
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800485 if self.email_regexp.match(line) or line == EVERYONE:
486 owners.add(line)
487 continue
488 if line.startswith('file:'):
Daniel Cheng74fda712018-09-05 03:56:39 +0000489 sub_include_file = self._resolve_include(line[5:], include_file, lineno)
Dirk Pranke4dc849f2017-02-28 15:31:19 -0800490 sub_owners = self._read_just_the_owners(sub_include_file)
491 owners.update(sub_owners)
492 continue
493
494 raise SyntaxErrorInOwnersFile(include_file, lineno,
495 ('"%s" is not a "set noparent", file include, "*", '
496 'or an email address.' % (line,)))
497 return owners
498
dpranke@chromium.orgdbf8b4e2013-02-28 19:24:16 +0000499 def _covering_set_of_owners_for(self, files, author):
Francois Dorayd42c6812017-05-30 15:10:20 -0400500 dirs_remaining = set(self.enclosing_dir_with_owners(f) for f in files)
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000501 all_possible_owners = self.all_possible_owners(dirs_remaining, author)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000502 suggested_owners = set()
Aaron Gable93248c52017-05-15 11:23:02 -0700503 while dirs_remaining and all_possible_owners:
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000504 owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
505 suggested_owners.add(owner)
506 dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
507 dirs_remaining -= dirs_to_remove
Aaron Gable93248c52017-05-15 11:23:02 -0700508 # Now that we've used `owner` and covered all their dirs, remove them
509 # from consideration.
510 del all_possible_owners[owner]
Edward Lemur14705d82019-10-30 22:17:10 +0000511 for o, dirs in list(all_possible_owners.items()):
Aaron Gable93248c52017-05-15 11:23:02 -0700512 new_dirs = [(d, dist) for (d, dist) in dirs if d not in dirs_to_remove]
513 if not new_dirs:
514 del all_possible_owners[o]
515 else:
516 all_possible_owners[o] = new_dirs
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000517 return suggested_owners
dpranke@chromium.org5e5d37b2012-12-19 21:04:58 +0000518
Daniel Bratellb2b66992019-04-25 15:19:33 +0000519 def _all_possible_owners_for_dir_or_file(self, dir_or_file, author,
520 cache):
521 """Returns a dict of {potential owner: (dir_or_file, distance)} mappings.
522 """
523 assert not dir_or_file.startswith("/")
524 res = cache.get(dir_or_file)
525 if res is None:
526 res = {}
527 dirname = dir_or_file
528 for owner in self._owners_for(dirname):
529 if author and owner == author:
530 continue
531 res.setdefault(owner, [])
532 res[owner] = (dir_or_file, 1)
533 if not self._should_stop_looking(dirname):
534 dirname = self.os_path.dirname(dirname)
535
536 parent_res = self._all_possible_owners_for_dir_or_file(dirname,
537 author, cache)
538
539 # Merge the parent information with our information, adjusting
540 # distances as necessary, and replacing the parent directory
541 # names with our names.
Marc-Antoine Ruel8e57b4b2019-10-11 01:01:36 +0000542 for owner, par_dir_and_distances in parent_res.items():
Daniel Bratellb2b66992019-04-25 15:19:33 +0000543 if owner in res:
544 # If the same person is in multiple OWNERS files above a given
545 # directory, only count the closest one.
546 continue
547 parent_distance = par_dir_and_distances[1]
548 res[owner] = (dir_or_file, parent_distance + 1)
549
550 cache[dir_or_file] = res
551
552 return res
553
554 def all_possible_owners(self, dirs_and_files, author):
Aaron Gable93248c52017-05-15 11:23:02 -0700555 """Returns a dict of {potential owner: (dir, distance)} mappings.
556
557 A distance of 1 is the lowest/closest possible distance (which makes the
558 subsequent math easier).
559 """
Daniel Bratellb2b66992019-04-25 15:19:33 +0000560
561 all_possible_owners_for_dir_or_file_cache = {}
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000562 all_possible_owners = {}
Daniel Bratellb2b66992019-04-25 15:19:33 +0000563 for current_dir in dirs_and_files:
564 dir_owners = self._all_possible_owners_for_dir_or_file(
565 current_dir, author,
566 all_possible_owners_for_dir_or_file_cache)
Marc-Antoine Ruel8e57b4b2019-10-11 01:01:36 +0000567 for owner, dir_and_distance in dir_owners.items():
Daniel Bratellb2b66992019-04-25 15:19:33 +0000568 if owner in all_possible_owners:
569 all_possible_owners[owner].append(dir_and_distance)
570 else:
571 all_possible_owners[owner] = [dir_and_distance]
572
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000573 return all_possible_owners
zork@chromium.org046e1752012-05-07 05:56:12 +0000574
nick7e16cf32016-09-16 16:05:05 -0700575 def _fnmatch(self, filename, pattern):
576 """Same as fnmatch.fnmatch(), but interally caches the compiled regexes."""
577 matcher = self._fnmatch_cache.get(pattern)
578 if matcher is None:
579 matcher = re.compile(fnmatch.translate(pattern)).match
580 self._fnmatch_cache[pattern] = matcher
581 return matcher(filename)
582
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000583 @staticmethod
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000584 def total_costs_by_owner(all_possible_owners, dirs):
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000585 # We want to minimize both the number of reviewers and the distance
586 # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
587 # an arbitrarily-selected scaling factor that seems to work well - it
588 # will select one reviewer in the parent directory over three reviewers
589 # in subdirs, but not one reviewer over just two.
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000590 result = {}
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000591 for owner in all_possible_owners:
592 total_distance = 0
593 num_directories_owned = 0
594 for dirname, distance in all_possible_owners[owner]:
595 if dirname in dirs:
596 total_distance += distance
597 num_directories_owned += 1
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000598 if num_directories_owned:
599 result[owner] = (total_distance /
600 pow(num_directories_owned, 1.75))
601 return result
zork@chromium.org046e1752012-05-07 05:56:12 +0000602
ikarienator@chromium.orgfaf3fdf2013-09-20 02:11:48 +0000603 @staticmethod
604 def lowest_cost_owner(all_possible_owners, dirs):
605 total_costs_by_owner = Database.total_costs_by_owner(all_possible_owners,
606 dirs)
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000607 # Return the lowest cost owner. In the case of a tie, pick one randomly.
Marc-Antoine Ruel8e57b4b2019-10-11 01:01:36 +0000608 lowest_cost = min(total_costs_by_owner.values())
Edward Lemur14705d82019-10-30 22:17:10 +0000609 lowest_cost_owners = [
610 owner for owner, cost in total_costs_by_owner.items()
611 if cost == lowest_cost]
dpranke@chromium.orgc591a702012-12-20 20:14:58 +0000612 return random.Random().choice(lowest_cost_owners)
Elly Fong-Jonesee8d9ce2019-08-28 20:32:21 +0000613
614 def owners_rooted_at_file(self, filename):
615 """Returns a set of all owners transitively listed in filename.
616
617 This function returns a set of all the owners either listed in filename, or
618 in a file transitively included by filename. Lines that are not plain owners
619 (i.e. per-file owners) are ignored.
620 """
621 return self._read_just_the_owners(filename)