blob: 9d16b1c6e0cd425499b0570e4eda4694274eb764 [file] [log] [blame]
Francois Dorayd42c6812017-05-30 15:10:20 -04001#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Splits a branch into smaller branches and uploads CLs."""
7
Raul Tambre80ee78e2019-05-06 22:41:05 +00008from __future__ import print_function
9
Francois Dorayd42c6812017-05-30 15:10:20 -040010import collections
11import os
Yannic Bonenberger68409632020-01-23 18:29:01 +000012import random
Francois Dorayd42c6812017-05-30 15:10:20 -040013import re
14import subprocess2
15import sys
16import tempfile
17
Edward Lemur1773f372020-02-22 00:27:14 +000018import gclient_utils
Francois Dorayd42c6812017-05-30 15:10:20 -040019import git_footers
20import owners
21import owners_finder
22
23import git_common as git
24
Yannic Bonenberger68409632020-01-23 18:29:01 +000025import third_party.pygtrie as trie
26
Francois Dorayd42c6812017-05-30 15:10:20 -040027
Stephen Martinisf53f82c2018-09-07 20:58:05 +000028# If a call to `git cl split` will generate more than this number of CLs, the
29# command will prompt the user to make sure they know what they're doing. Large
30# numbers of CLs generated by `git cl split` have caused infrastructure issues
31# in the past.
32CL_SPLIT_FORCE_LIMIT = 10
33
34
Francois Dorayd42c6812017-05-30 15:10:20 -040035def ReadFile(file_path):
36 """Returns the content of |file_path|."""
37 with open(file_path) as f:
38 content = f.read()
39 return content
40
41
42def EnsureInGitRepository():
43 """Throws an exception if the current directory is not a git repository."""
44 git.run('rev-parse')
45
46
Yannic Bonenberger68409632020-01-23 18:29:01 +000047def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
48 """Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
Francois Dorayd42c6812017-05-30 15:10:20 -040049
50 Return false if the branch already exists. |upstream| is used as upstream for
51 the created branch.
52 """
53 existing_branches = set(git.branches(use_limit = False))
Edward Lesmes4dc5b772020-02-21 21:07:14 +000054 branch_name = '_'.join([prefix, str(cl_index), directory])
Francois Dorayd42c6812017-05-30 15:10:20 -040055 if branch_name in existing_branches:
56 return False
57 git.run('checkout', '-t', upstream, '-b', branch_name)
58 return True
59
60
Yannic Bonenberger68409632020-01-23 18:29:01 +000061def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
62 """Replaces $directory with |directory|, $cl_index with |cl_index|, and
63 $num_cls with |num_cls| in |txt|."""
64 return txt.replace('$directory', '/' + directory).replace(
65 '$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
Francois Dorayd42c6812017-05-30 15:10:20 -040066
67
68def AddUploadedByGitClSplitToDescription(description):
69 """Adds a 'This CL was uploaded by git cl split.' line to |description|.
70
71 The line is added before footers, or at the end of |description| if it has no
72 footers.
73 """
74 split_footers = git_footers.split_footers(description)
75 lines = split_footers[0]
76 if not lines[-1] or lines[-1].isspace():
77 lines = lines + ['']
78 lines = lines + ['This CL was uploaded by git cl split.']
79 if split_footers[1]:
80 lines += [''] + split_footers[1]
81 return '\n'.join(lines)
82
83
Yannic Bonenberger68409632020-01-23 18:29:01 +000084def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
85 directory, files, description, comment, reviewer, changelist,
86 cmd_upload, cq_dry_run, enable_auto_submit):
Francois Dorayd42c6812017-05-30 15:10:20 -040087 """Uploads a CL with all changes to |files| in |refactor_branch|.
88
89 Args:
Yannic Bonenberger68409632020-01-23 18:29:01 +000090 cl_index: The index of this CL in the list of CLs to upload.
91 num_cls: The total number of CLs that will be uploaded.
Francois Dorayd42c6812017-05-30 15:10:20 -040092 refactor_branch: Name of the branch that contains the changes to upload.
93 refactor_branch_upstream: Name of the upstream of |refactor_branch|.
94 directory: Path to the directory that contains the OWNERS file for which
95 to upload a CL.
96 files: List of AffectedFile instances to include in the uploaded CL.
Francois Dorayd42c6812017-05-30 15:10:20 -040097 description: Description of the uploaded CL.
98 comment: Comment to post on the uploaded CL.
Yannic Bonenberger68409632020-01-23 18:29:01 +000099 reviewer: The reviewer for the CL.
Francois Dorayd42c6812017-05-30 15:10:20 -0400100 changelist: The Changelist class.
101 cmd_upload: The function associated with the git cl upload command.
Stephen Martiniscb326682018-08-29 21:06:30 +0000102 cq_dry_run: If CL uploads should also do a cq dry run.
Takuto Ikuta51eca592019-02-14 19:40:52 +0000103 enable_auto_submit: If CL uploads should also enable auto submit.
Francois Dorayd42c6812017-05-30 15:10:20 -0400104 """
Francois Dorayd42c6812017-05-30 15:10:20 -0400105 # Create a branch.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000106 if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
107 refactor_branch_upstream):
108 print('Skipping CL ' + cl_index + ' for directory "' + directory +
109 '" for which a branch already exists.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400110 return
111
112 # Checkout all changes to files in |files|.
113 deleted_files = [f.AbsoluteLocalPath() for f in files if f.Action() == 'D']
114 if deleted_files:
115 git.run(*['rm'] + deleted_files)
116 modified_files = [f.AbsoluteLocalPath() for f in files if f.Action() != 'D']
117 if modified_files:
118 git.run(*['checkout', refactor_branch, '--'] + modified_files)
119
120 # Commit changes. The temporary file is created with delete=False so that it
121 # can be deleted manually after git has read it rather than automatically
122 # when it is closed.
Edward Lemur1773f372020-02-22 00:27:14 +0000123 with gclient_utils.temporary_file() as tmp_file:
124 gclient_utils.FileWrite(
125 tmp_file,
Yannic Bonenberger68409632020-01-23 18:29:01 +0000126 FormatDescriptionOrComment(description, directory, cl_index, num_cls))
Edward Lemur1773f372020-02-22 00:27:14 +0000127 git.run('commit', '-F', tmp_file)
Francois Dorayd42c6812017-05-30 15:10:20 -0400128
129 # Upload a CL.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000130 upload_args = ['-f', '-r', reviewer]
Stephen Martiniscb326682018-08-29 21:06:30 +0000131 if cq_dry_run:
132 upload_args.append('--cq-dry-run')
Francois Dorayd42c6812017-05-30 15:10:20 -0400133 if not comment:
Aaron Gablee5adf612017-07-14 10:43:58 -0700134 upload_args.append('--send-mail')
Takuto Ikuta51eca592019-02-14 19:40:52 +0000135 if enable_auto_submit:
136 upload_args.append('--enable-auto-submit')
Raul Tambre80ee78e2019-05-06 22:41:05 +0000137 print('Uploading CL for ' + directory + '.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400138 cmd_upload(upload_args)
139 if comment:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000140 changelist().AddComment(
141 FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
142 publish=True)
Francois Dorayd42c6812017-05-30 15:10:20 -0400143
144
Yannic Bonenberger68409632020-01-23 18:29:01 +0000145class ChangeList(object):
146 """Representation of a CL and the files affected by it."""
147
148 def __init__(self, path, owners_db, author, files):
149 self._path = path
150 self._files = files
151 self._owners_db = owners_db
152 self._author = author
153 self._owners = None
154
155 def _EnsureOwners(self):
156 if not self._owners:
157 self._owners = set()
158 files = [f.LocalPath() for f in self.GetFiles()]
159 if not files:
160 files = [self.GetPath()]
161 possible_owners = self._owners_db.all_possible_owners(
162 files, self._author).keys()
163 for owner in possible_owners:
164 if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
165 self._owners |= set([owner])
166 assert len(self._owners)
167
168 def Merge(self, other):
169 self._owners = self.GetCommonOwners(other)
170 self._files |= other.GetFiles()
171
172 def GetPath(self):
173 return self._path
174
175 def GetFiles(self):
176 return self._files
177
178 def GetOwners(self):
179 self._EnsureOwners()
180 return self._owners
181
182 def GetCommonOwners(self, other):
183 return self.GetOwners() & other.GetOwners()
184
185 def HaveCommonOwners(self, other):
186 return len(self.GetCommonOwners(other)) > 0
187
188 def GetChangeSizeInBytes(self):
189 return sum(
190 [c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
191
192
193def SplitCLs(owners_database, author, files):
Francois Dorayd42c6812017-05-30 15:10:20 -0400194 """Returns a map of files split by OWNERS file.
195
196 Returns:
197 A map where keys are paths to directories containing an OWNERS file and
198 values are lists of files sharing an OWNERS file.
199 """
Yannic Bonenberger68409632020-01-23 18:29:01 +0000200
201 # The target CL size in # of changed bytes.
202 # TODO(yannic): Use # of changed lines instead and make this configurable.
203 max_cl_size = 1000
204
205 candidates = trie.Trie()
206 # Enable sorting so dry-run will split the CL the same way the CL is uploaded.
207 candidates.enable_sorting()
208
209 # 1. Create one CL candidate for every affected file.
Francois Dorayd42c6812017-05-30 15:10:20 -0400210 for f in files:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000211 path = f.LocalPath()
212 candidates[path] = ChangeList(path, owners_database, author, set([f]))
213
214 change_lists = []
215
216 # 2. Try to merge CL in common directories up to a maximum size of
217 # |max_cl_size|.
218 # This is O( len(files) * max([len(f.path) for f in files]) ).
219 edited = True
220 while edited:
221 edited = False
222
223 # 2.1. Iterate over all candidates and merge candidates into the candidate
224 # for their parent directory if the resulting CL doesn't exceed
225 # |max_cl_size|.
226 for item in candidates.items():
227 path = ''.join(item[0])
228 candidate = item[1]
229
230 # The number of CL candidates in subdirectories is equivalent to the
231 # number of nodes with prefix |path| in the Trie.
232 # Only try to merge |candidate| with the candidate for the parent
233 # directory if there are no more CLs for subdirectories.
234 sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
235 if not sub_cls:
236 parent_path = os.path.dirname(path)
237 if len(parent_path) < 1:
238 # Don't create CLs for more than one top-level directory.
239 continue
240
241 if parent_path not in candidates:
242 candidates[parent_path] = ChangeList(parent_path, owners_database,
243 author, set())
244 parent_cl = candidates[parent_path]
245
246 if not parent_cl.HaveCommonOwners(candidate):
247 # Don't merge if the resulting CL would need more than one reviewer.
248 continue
249
250 # Merge |candidate| into the CL for it's parent directory and remove
251 # candidate.
252 edited = True
253 del candidates[path]
254 parent_cl.Merge(candidate)
255
256 # Add |parent_cl| to list of CLs to submit if the CL is larger than
257 # |max_cl_size|.
258 # TODO(yannic): Doing it this way, we might end up with CLs of size
259 # 2 * max_cl_size if we merged two candidates that just don't exceed
260 # the maximal size.
261 if parent_cl.GetChangeSizeInBytes() > max_cl_size:
262 change_lists.append(parent_cl)
263 del candidates[parent_path]
264
265 # 3. Add all remaining candidates to the list of CLs.
266 for item in candidates.items():
267 change_lists.append(item[1])
268
269 return change_lists
Francois Dorayd42c6812017-05-30 15:10:20 -0400270
271
Chris Watkinsba28e462017-12-13 11:22:17 +1100272def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
Yannic Bonenberger68409632020-01-23 18:29:01 +0000273 reviewer):
Chris Watkinsba28e462017-12-13 11:22:17 +1100274 """Prints info about a CL.
275
276 Args:
277 cl_index: The index of this CL in the list of CLs to upload.
278 num_cls: The total number of CLs that will be uploaded.
279 directory: Path to the directory that contains the OWNERS file for which
280 to upload a CL.
281 file_paths: A list of files in this CL.
282 description: The CL description.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000283 reviewer: The reviewer for this CL.
Chris Watkinsba28e462017-12-13 11:22:17 +1100284 """
Yannic Bonenberger68409632020-01-23 18:29:01 +0000285 description_lines = FormatDescriptionOrComment(
286 description, directory, cl_index, num_cls).splitlines()
Chris Watkinsba28e462017-12-13 11:22:17 +1100287 indented_description = '\n'.join([' ' + l for l in description_lines])
288
Raul Tambre80ee78e2019-05-06 22:41:05 +0000289 print('CL {}/{}'.format(cl_index, num_cls))
290 print('Path: {}'.format(directory))
Yannic Bonenberger68409632020-01-23 18:29:01 +0000291 print('Reviewers: {}'.format(reviewer))
Raul Tambre80ee78e2019-05-06 22:41:05 +0000292 print('\n' + indented_description + '\n')
293 print('\n'.join(file_paths))
294 print()
Chris Watkinsba28e462017-12-13 11:22:17 +1100295
296
Yannic Bonenberger68409632020-01-23 18:29:01 +0000297def _SelectReviewer(possible_owners, used_reviewers):
298 """Select a reviewer from |owners| and adds them to the set of used reviewers.
299
300 Returns:
301 The reviewer.
302 """
303
304 # It's debatable whether we want to avoid reusing reviewers. It could be
305 # easier to ask the smallest possible amount of reviewers to become familiar
306 # with the change being split. However, doing so would mean we send all CLs to
307 # top-level owners, which might be too much to ask from them.
308 # We may revisit this decicion later.
309 unused_reviewers = possible_owners.difference(used_reviewers)
310 if len(unused_reviewers) < 1:
311 unused_reviewers = possible_owners
312 # Pick a random reviwer from the set of owners so we don't prefer owners
313 # with emails of low lexical order.
314 reviewer = random.choice(tuple(unused_reviewers))
315 used_reviewers.add(reviewer)
316 return reviewer
317
318
Stephen Martiniscb326682018-08-29 21:06:30 +0000319def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
Takuto Ikuta51eca592019-02-14 19:40:52 +0000320 cq_dry_run, enable_auto_submit):
Francois Dorayd42c6812017-05-30 15:10:20 -0400321 """"Splits a branch into smaller branches and uploads CLs.
322
323 Args:
324 description_file: File containing the description of uploaded CLs.
325 comment_file: File containing the comment of uploaded CLs.
326 changelist: The Changelist class.
327 cmd_upload: The function associated with the git cl upload command.
Chris Watkinsba28e462017-12-13 11:22:17 +1100328 dry_run: Whether this is a dry run (no branches or CLs created).
Stephen Martiniscb326682018-08-29 21:06:30 +0000329 cq_dry_run: If CL uploads should also do a cq dry run.
Takuto Ikuta51eca592019-02-14 19:40:52 +0000330 enable_auto_submit: If CL uploads should also enable auto submit.
Francois Dorayd42c6812017-05-30 15:10:20 -0400331
332 Returns:
333 0 in case of success. 1 in case of error.
334 """
335 description = AddUploadedByGitClSplitToDescription(ReadFile(description_file))
336 comment = ReadFile(comment_file) if comment_file else None
337
338 try:
Chris Watkinsba28e462017-12-13 11:22:17 +1100339 EnsureInGitRepository()
Francois Dorayd42c6812017-05-30 15:10:20 -0400340
341 cl = changelist()
Edward Lesmes7c34a222020-02-21 21:11:24 +0000342 change = cl.GetChange(cl.GetCommonAncestorWithUpstream())
Francois Dorayd42c6812017-05-30 15:10:20 -0400343 files = change.AffectedFiles()
344
345 if not files:
Raul Tambre80ee78e2019-05-06 22:41:05 +0000346 print('Cannot split an empty CL.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400347 return 1
348
349 author = git.run('config', 'user.email').strip() or None
350 refactor_branch = git.current_branch()
Gabriel Charette09baacd2017-11-09 13:30:41 -0500351 assert refactor_branch, "Can't run from detached branch."
Francois Dorayd42c6812017-05-30 15:10:20 -0400352 refactor_branch_upstream = git.upstream(refactor_branch)
Gabriel Charette09baacd2017-11-09 13:30:41 -0500353 assert refactor_branch_upstream, \
354 "Branch %s must have an upstream." % refactor_branch
Francois Dorayd42c6812017-05-30 15:10:20 -0400355
356 owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
357 owners_database.load_data_needed_for([f.LocalPath() for f in files])
358
Yannic Bonenberger68409632020-01-23 18:29:01 +0000359 change_lists = SplitCLs(owners_database, author, set(files))
Francois Dorayd42c6812017-05-30 15:10:20 -0400360
Yannic Bonenberger68409632020-01-23 18:29:01 +0000361 num_cls = len(change_lists)
Stephen Martinisf53f82c2018-09-07 20:58:05 +0000362 if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
Raul Tambre80ee78e2019-05-06 22:41:05 +0000363 print(
Stephen Martiniscb326682018-08-29 21:06:30 +0000364 'This will generate "%r" CLs. This many CLs can potentially generate'
365 ' too much load on the build infrastructure. Please email'
366 ' infra-dev@chromium.org to ensure that this won\'t break anything.'
367 ' The infra team reserves the right to cancel your jobs if they are'
Raul Tambre80ee78e2019-05-06 22:41:05 +0000368 ' overloading the CQ.' % num_cls)
Stephen Martiniscb326682018-08-29 21:06:30 +0000369 answer = raw_input('Proceed? (y/n):')
370 if answer.lower() != 'y':
371 return 0
Francois Dorayd42c6812017-05-30 15:10:20 -0400372
Yannic Bonenberger68409632020-01-23 18:29:01 +0000373 reviewers = set()
374 for cl_index, cl in enumerate(change_lists, 1):
Francois Dorayd42c6812017-05-30 15:10:20 -0400375 # Use '/' as a path separator in the branch name and the CL description
376 # and comment.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000377 directory = cl.GetPath().replace(os.path.sep, '/')
378 file_paths = [f.LocalPath() for f in cl.GetFiles()]
379 reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
Chris Watkinsba28e462017-12-13 11:22:17 +1100380
381 if dry_run:
382 PrintClInfo(cl_index, num_cls, directory, file_paths, description,
Yannic Bonenberger68409632020-01-23 18:29:01 +0000383 reviewer)
Chris Watkinsba28e462017-12-13 11:22:17 +1100384 else:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000385 UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
386 directory, files, description, comment, reviewer, changelist,
387 cmd_upload, cq_dry_run, enable_auto_submit)
Francois Dorayd42c6812017-05-30 15:10:20 -0400388
389 # Go back to the original branch.
390 git.run('checkout', refactor_branch)
391
392 except subprocess2.CalledProcessError as cpe:
393 sys.stderr.write(cpe.stderr)
394 return 1
395 return 0