blob: 27312c81dcc9fd06108e52772599da4df3be6945 [file] [log] [blame]
Francois Dorayd42c6812017-05-30 15:10:20 -04001#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Splits a branch into smaller branches and uploads CLs."""
7
Raul Tambre80ee78e2019-05-06 22:41:05 +00008from __future__ import print_function
9
Francois Dorayd42c6812017-05-30 15:10:20 -040010import collections
11import os
Yannic Bonenberger68409632020-01-23 18:29:01 +000012import random
Francois Dorayd42c6812017-05-30 15:10:20 -040013import re
14import subprocess2
15import sys
16import tempfile
17
18import git_footers
19import owners
20import owners_finder
21
22import git_common as git
23
Yannic Bonenberger68409632020-01-23 18:29:01 +000024import third_party.pygtrie as trie
25
Francois Dorayd42c6812017-05-30 15:10:20 -040026
Stephen Martinisf53f82c2018-09-07 20:58:05 +000027# If a call to `git cl split` will generate more than this number of CLs, the
28# command will prompt the user to make sure they know what they're doing. Large
29# numbers of CLs generated by `git cl split` have caused infrastructure issues
30# in the past.
31CL_SPLIT_FORCE_LIMIT = 10
32
33
Francois Dorayd42c6812017-05-30 15:10:20 -040034def ReadFile(file_path):
35 """Returns the content of |file_path|."""
36 with open(file_path) as f:
37 content = f.read()
38 return content
39
40
41def EnsureInGitRepository():
42 """Throws an exception if the current directory is not a git repository."""
43 git.run('rev-parse')
44
45
Yannic Bonenberger68409632020-01-23 18:29:01 +000046def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
47 """Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
Francois Dorayd42c6812017-05-30 15:10:20 -040048
49 Return false if the branch already exists. |upstream| is used as upstream for
50 the created branch.
51 """
52 existing_branches = set(git.branches(use_limit = False))
Yannic Bonenberger68409632020-01-23 18:29:01 +000053 branch_name = '_'.join([prefix, cl_index, directory])
Francois Dorayd42c6812017-05-30 15:10:20 -040054 if branch_name in existing_branches:
55 return False
56 git.run('checkout', '-t', upstream, '-b', branch_name)
57 return True
58
59
Yannic Bonenberger68409632020-01-23 18:29:01 +000060def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
61 """Replaces $directory with |directory|, $cl_index with |cl_index|, and
62 $num_cls with |num_cls| in |txt|."""
63 return txt.replace('$directory', '/' + directory).replace(
64 '$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
Francois Dorayd42c6812017-05-30 15:10:20 -040065
66
67def AddUploadedByGitClSplitToDescription(description):
68 """Adds a 'This CL was uploaded by git cl split.' line to |description|.
69
70 The line is added before footers, or at the end of |description| if it has no
71 footers.
72 """
73 split_footers = git_footers.split_footers(description)
74 lines = split_footers[0]
75 if not lines[-1] or lines[-1].isspace():
76 lines = lines + ['']
77 lines = lines + ['This CL was uploaded by git cl split.']
78 if split_footers[1]:
79 lines += [''] + split_footers[1]
80 return '\n'.join(lines)
81
82
Yannic Bonenberger68409632020-01-23 18:29:01 +000083def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
84 directory, files, description, comment, reviewer, changelist,
85 cmd_upload, cq_dry_run, enable_auto_submit):
Francois Dorayd42c6812017-05-30 15:10:20 -040086 """Uploads a CL with all changes to |files| in |refactor_branch|.
87
88 Args:
Yannic Bonenberger68409632020-01-23 18:29:01 +000089 cl_index: The index of this CL in the list of CLs to upload.
90 num_cls: The total number of CLs that will be uploaded.
Francois Dorayd42c6812017-05-30 15:10:20 -040091 refactor_branch: Name of the branch that contains the changes to upload.
92 refactor_branch_upstream: Name of the upstream of |refactor_branch|.
93 directory: Path to the directory that contains the OWNERS file for which
94 to upload a CL.
95 files: List of AffectedFile instances to include in the uploaded CL.
Francois Dorayd42c6812017-05-30 15:10:20 -040096 description: Description of the uploaded CL.
97 comment: Comment to post on the uploaded CL.
Yannic Bonenberger68409632020-01-23 18:29:01 +000098 reviewer: The reviewer for the CL.
Francois Dorayd42c6812017-05-30 15:10:20 -040099 changelist: The Changelist class.
100 cmd_upload: The function associated with the git cl upload command.
Stephen Martiniscb326682018-08-29 21:06:30 +0000101 cq_dry_run: If CL uploads should also do a cq dry run.
Takuto Ikuta51eca592019-02-14 19:40:52 +0000102 enable_auto_submit: If CL uploads should also enable auto submit.
Francois Dorayd42c6812017-05-30 15:10:20 -0400103 """
Francois Dorayd42c6812017-05-30 15:10:20 -0400104 # Create a branch.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000105 if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
106 refactor_branch_upstream):
107 print('Skipping CL ' + cl_index + ' for directory "' + directory +
108 '" for which a branch already exists.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400109 return
110
111 # Checkout all changes to files in |files|.
112 deleted_files = [f.AbsoluteLocalPath() for f in files if f.Action() == 'D']
113 if deleted_files:
114 git.run(*['rm'] + deleted_files)
115 modified_files = [f.AbsoluteLocalPath() for f in files if f.Action() != 'D']
116 if modified_files:
117 git.run(*['checkout', refactor_branch, '--'] + modified_files)
118
119 # Commit changes. The temporary file is created with delete=False so that it
120 # can be deleted manually after git has read it rather than automatically
121 # when it is closed.
122 with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000123 tmp_file.write(
124 FormatDescriptionOrComment(description, directory, cl_index, num_cls))
Francois Dorayd42c6812017-05-30 15:10:20 -0400125 # Close the file to let git open it at the next line.
126 tmp_file.close()
127 git.run('commit', '-F', tmp_file.name)
128 os.remove(tmp_file.name)
129
130 # Upload a CL.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000131 upload_args = ['-f', '-r', reviewer]
Stephen Martiniscb326682018-08-29 21:06:30 +0000132 if cq_dry_run:
133 upload_args.append('--cq-dry-run')
Francois Dorayd42c6812017-05-30 15:10:20 -0400134 if not comment:
Aaron Gablee5adf612017-07-14 10:43:58 -0700135 upload_args.append('--send-mail')
Takuto Ikuta51eca592019-02-14 19:40:52 +0000136 if enable_auto_submit:
137 upload_args.append('--enable-auto-submit')
Raul Tambre80ee78e2019-05-06 22:41:05 +0000138 print('Uploading CL for ' + directory + '.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400139 cmd_upload(upload_args)
140 if comment:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000141 changelist().AddComment(
142 FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
143 publish=True)
Francois Dorayd42c6812017-05-30 15:10:20 -0400144
145
Yannic Bonenberger68409632020-01-23 18:29:01 +0000146class ChangeList(object):
147 """Representation of a CL and the files affected by it."""
148
149 def __init__(self, path, owners_db, author, files):
150 self._path = path
151 self._files = files
152 self._owners_db = owners_db
153 self._author = author
154 self._owners = None
155
156 def _EnsureOwners(self):
157 if not self._owners:
158 self._owners = set()
159 files = [f.LocalPath() for f in self.GetFiles()]
160 if not files:
161 files = [self.GetPath()]
162 possible_owners = self._owners_db.all_possible_owners(
163 files, self._author).keys()
164 for owner in possible_owners:
165 if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
166 self._owners |= set([owner])
167 assert len(self._owners)
168
169 def Merge(self, other):
170 self._owners = self.GetCommonOwners(other)
171 self._files |= other.GetFiles()
172
173 def GetPath(self):
174 return self._path
175
176 def GetFiles(self):
177 return self._files
178
179 def GetOwners(self):
180 self._EnsureOwners()
181 return self._owners
182
183 def GetCommonOwners(self, other):
184 return self.GetOwners() & other.GetOwners()
185
186 def HaveCommonOwners(self, other):
187 return len(self.GetCommonOwners(other)) > 0
188
189 def GetChangeSizeInBytes(self):
190 return sum(
191 [c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
192
193
194def SplitCLs(owners_database, author, files):
Francois Dorayd42c6812017-05-30 15:10:20 -0400195 """Returns a map of files split by OWNERS file.
196
197 Returns:
198 A map where keys are paths to directories containing an OWNERS file and
199 values are lists of files sharing an OWNERS file.
200 """
Yannic Bonenberger68409632020-01-23 18:29:01 +0000201
202 # The target CL size in # of changed bytes.
203 # TODO(yannic): Use # of changed lines instead and make this configurable.
204 max_cl_size = 1000
205
206 candidates = trie.Trie()
207 # Enable sorting so dry-run will split the CL the same way the CL is uploaded.
208 candidates.enable_sorting()
209
210 # 1. Create one CL candidate for every affected file.
Francois Dorayd42c6812017-05-30 15:10:20 -0400211 for f in files:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000212 path = f.LocalPath()
213 candidates[path] = ChangeList(path, owners_database, author, set([f]))
214
215 change_lists = []
216
217 # 2. Try to merge CL in common directories up to a maximum size of
218 # |max_cl_size|.
219 # This is O( len(files) * max([len(f.path) for f in files]) ).
220 edited = True
221 while edited:
222 edited = False
223
224 # 2.1. Iterate over all candidates and merge candidates into the candidate
225 # for their parent directory if the resulting CL doesn't exceed
226 # |max_cl_size|.
227 for item in candidates.items():
228 path = ''.join(item[0])
229 candidate = item[1]
230
231 # The number of CL candidates in subdirectories is equivalent to the
232 # number of nodes with prefix |path| in the Trie.
233 # Only try to merge |candidate| with the candidate for the parent
234 # directory if there are no more CLs for subdirectories.
235 sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
236 if not sub_cls:
237 parent_path = os.path.dirname(path)
238 if len(parent_path) < 1:
239 # Don't create CLs for more than one top-level directory.
240 continue
241
242 if parent_path not in candidates:
243 candidates[parent_path] = ChangeList(parent_path, owners_database,
244 author, set())
245 parent_cl = candidates[parent_path]
246
247 if not parent_cl.HaveCommonOwners(candidate):
248 # Don't merge if the resulting CL would need more than one reviewer.
249 continue
250
251 # Merge |candidate| into the CL for it's parent directory and remove
252 # candidate.
253 edited = True
254 del candidates[path]
255 parent_cl.Merge(candidate)
256
257 # Add |parent_cl| to list of CLs to submit if the CL is larger than
258 # |max_cl_size|.
259 # TODO(yannic): Doing it this way, we might end up with CLs of size
260 # 2 * max_cl_size if we merged two candidates that just don't exceed
261 # the maximal size.
262 if parent_cl.GetChangeSizeInBytes() > max_cl_size:
263 change_lists.append(parent_cl)
264 del candidates[parent_path]
265
266 # 3. Add all remaining candidates to the list of CLs.
267 for item in candidates.items():
268 change_lists.append(item[1])
269
270 return change_lists
Francois Dorayd42c6812017-05-30 15:10:20 -0400271
272
Chris Watkinsba28e462017-12-13 11:22:17 +1100273def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
Yannic Bonenberger68409632020-01-23 18:29:01 +0000274 reviewer):
Chris Watkinsba28e462017-12-13 11:22:17 +1100275 """Prints info about a CL.
276
277 Args:
278 cl_index: The index of this CL in the list of CLs to upload.
279 num_cls: The total number of CLs that will be uploaded.
280 directory: Path to the directory that contains the OWNERS file for which
281 to upload a CL.
282 file_paths: A list of files in this CL.
283 description: The CL description.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000284 reviewer: The reviewer for this CL.
Chris Watkinsba28e462017-12-13 11:22:17 +1100285 """
Yannic Bonenberger68409632020-01-23 18:29:01 +0000286 description_lines = FormatDescriptionOrComment(
287 description, directory, cl_index, num_cls).splitlines()
Chris Watkinsba28e462017-12-13 11:22:17 +1100288 indented_description = '\n'.join([' ' + l for l in description_lines])
289
Raul Tambre80ee78e2019-05-06 22:41:05 +0000290 print('CL {}/{}'.format(cl_index, num_cls))
291 print('Path: {}'.format(directory))
Yannic Bonenberger68409632020-01-23 18:29:01 +0000292 print('Reviewers: {}'.format(reviewer))
Raul Tambre80ee78e2019-05-06 22:41:05 +0000293 print('\n' + indented_description + '\n')
294 print('\n'.join(file_paths))
295 print()
Chris Watkinsba28e462017-12-13 11:22:17 +1100296
297
Yannic Bonenberger68409632020-01-23 18:29:01 +0000298def _SelectReviewer(possible_owners, used_reviewers):
299 """Select a reviewer from |owners| and adds them to the set of used reviewers.
300
301 Returns:
302 The reviewer.
303 """
304
305 # It's debatable whether we want to avoid reusing reviewers. It could be
306 # easier to ask the smallest possible amount of reviewers to become familiar
307 # with the change being split. However, doing so would mean we send all CLs to
308 # top-level owners, which might be too much to ask from them.
309 # We may revisit this decicion later.
310 unused_reviewers = possible_owners.difference(used_reviewers)
311 if len(unused_reviewers) < 1:
312 unused_reviewers = possible_owners
313 # Pick a random reviwer from the set of owners so we don't prefer owners
314 # with emails of low lexical order.
315 reviewer = random.choice(tuple(unused_reviewers))
316 used_reviewers.add(reviewer)
317 return reviewer
318
319
Stephen Martiniscb326682018-08-29 21:06:30 +0000320def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
Takuto Ikuta51eca592019-02-14 19:40:52 +0000321 cq_dry_run, enable_auto_submit):
Francois Dorayd42c6812017-05-30 15:10:20 -0400322 """"Splits a branch into smaller branches and uploads CLs.
323
324 Args:
325 description_file: File containing the description of uploaded CLs.
326 comment_file: File containing the comment of uploaded CLs.
327 changelist: The Changelist class.
328 cmd_upload: The function associated with the git cl upload command.
Chris Watkinsba28e462017-12-13 11:22:17 +1100329 dry_run: Whether this is a dry run (no branches or CLs created).
Stephen Martiniscb326682018-08-29 21:06:30 +0000330 cq_dry_run: If CL uploads should also do a cq dry run.
Takuto Ikuta51eca592019-02-14 19:40:52 +0000331 enable_auto_submit: If CL uploads should also enable auto submit.
Francois Dorayd42c6812017-05-30 15:10:20 -0400332
333 Returns:
334 0 in case of success. 1 in case of error.
335 """
336 description = AddUploadedByGitClSplitToDescription(ReadFile(description_file))
337 comment = ReadFile(comment_file) if comment_file else None
338
339 try:
Chris Watkinsba28e462017-12-13 11:22:17 +1100340 EnsureInGitRepository()
Francois Dorayd42c6812017-05-30 15:10:20 -0400341
342 cl = changelist()
343 change = cl.GetChange(cl.GetCommonAncestorWithUpstream(), None)
344 files = change.AffectedFiles()
345
346 if not files:
Raul Tambre80ee78e2019-05-06 22:41:05 +0000347 print('Cannot split an empty CL.')
Francois Dorayd42c6812017-05-30 15:10:20 -0400348 return 1
349
350 author = git.run('config', 'user.email').strip() or None
351 refactor_branch = git.current_branch()
Gabriel Charette09baacd2017-11-09 13:30:41 -0500352 assert refactor_branch, "Can't run from detached branch."
Francois Dorayd42c6812017-05-30 15:10:20 -0400353 refactor_branch_upstream = git.upstream(refactor_branch)
Gabriel Charette09baacd2017-11-09 13:30:41 -0500354 assert refactor_branch_upstream, \
355 "Branch %s must have an upstream." % refactor_branch
Francois Dorayd42c6812017-05-30 15:10:20 -0400356
357 owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
358 owners_database.load_data_needed_for([f.LocalPath() for f in files])
359
Yannic Bonenberger68409632020-01-23 18:29:01 +0000360 change_lists = SplitCLs(owners_database, author, set(files))
Francois Dorayd42c6812017-05-30 15:10:20 -0400361
Yannic Bonenberger68409632020-01-23 18:29:01 +0000362 num_cls = len(change_lists)
Stephen Martinisf53f82c2018-09-07 20:58:05 +0000363 if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
Raul Tambre80ee78e2019-05-06 22:41:05 +0000364 print(
Stephen Martiniscb326682018-08-29 21:06:30 +0000365 'This will generate "%r" CLs. This many CLs can potentially generate'
366 ' too much load on the build infrastructure. Please email'
367 ' infra-dev@chromium.org to ensure that this won\'t break anything.'
368 ' The infra team reserves the right to cancel your jobs if they are'
Raul Tambre80ee78e2019-05-06 22:41:05 +0000369 ' overloading the CQ.' % num_cls)
Stephen Martiniscb326682018-08-29 21:06:30 +0000370 answer = raw_input('Proceed? (y/n):')
371 if answer.lower() != 'y':
372 return 0
Francois Dorayd42c6812017-05-30 15:10:20 -0400373
Yannic Bonenberger68409632020-01-23 18:29:01 +0000374 reviewers = set()
375 for cl_index, cl in enumerate(change_lists, 1):
Francois Dorayd42c6812017-05-30 15:10:20 -0400376 # Use '/' as a path separator in the branch name and the CL description
377 # and comment.
Yannic Bonenberger68409632020-01-23 18:29:01 +0000378 directory = cl.GetPath().replace(os.path.sep, '/')
379 file_paths = [f.LocalPath() for f in cl.GetFiles()]
380 reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
Chris Watkinsba28e462017-12-13 11:22:17 +1100381
382 if dry_run:
383 PrintClInfo(cl_index, num_cls, directory, file_paths, description,
Yannic Bonenberger68409632020-01-23 18:29:01 +0000384 reviewer)
Chris Watkinsba28e462017-12-13 11:22:17 +1100385 else:
Yannic Bonenberger68409632020-01-23 18:29:01 +0000386 UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
387 directory, files, description, comment, reviewer, changelist,
388 cmd_upload, cq_dry_run, enable_auto_submit)
Francois Dorayd42c6812017-05-30 15:10:20 -0400389
390 # Go back to the original branch.
391 git.run('checkout', refactor_branch)
392
393 except subprocess2.CalledProcessError as cpe:
394 sys.stderr.write(cpe.stderr)
395 return 1
396 return 0