Revert "Improve git cl split"
This reverts commit 684096347b67b1663690c118785018258acd2e9b.
Reason for revert:
Breaks git-cl split.
https://bugs.chromium.org/p/chromium/issues/detail?id=1054888
Original change's description:
> Improve git cl split
>
> This CL changes the behavior of `git cl split` to split the change
> by the size of the resulting CLs. For now, this is based on the number
> of bytes changed, and not by the number of changed lines. Depending
> on the shape of change, this may still produce more CLs than expected
> (and possibly more than before).
>
> A future change will switch the split to be based on the number
> of affected lines, and also introduce a mode to base the split
> on the number of affected files.
>
> Bug: 998922
> Change-Id: I49f868972a61b89b426ef9e2ceedc733eacb4350
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1778744
> Commit-Queue: Yannic Bonenberger <yannic.bonenberger@gmail.com>
> Reviewed-by: Dirk Pranke <dpranke@chromium.org>
TBR=fdoray@chromium.org,dpranke@chromium.org,yannic.bonenberger@gmail.com,infra-scoped@luci-project-accounts.iam.gserviceaccount.com
Bug: 998922
Change-Id: I466e1245d5f1c934443d850c48c42d3e694e71c6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2080205
Reviewed-by: Anthony Polito <apolito@google.com>
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
diff --git a/split_cl.py b/split_cl.py
index 9d16b1c..c08bf4d 100644
--- a/split_cl.py
+++ b/split_cl.py
@@ -9,7 +9,6 @@
import collections
import os
-import random
import re
import subprocess2
import sys
@@ -22,8 +21,6 @@
import git_common as git
-import third_party.pygtrie as trie
-
# If a call to `git cl split` will generate more than this number of CLs, the
# command will prompt the user to make sure they know what they're doing. Large
@@ -44,25 +41,23 @@
git.run('rev-parse')
-def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
- """Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
+def CreateBranchForDirectory(prefix, directory, upstream):
+ """Creates a branch named |prefix| + "_" + |directory| + "_split".
Return false if the branch already exists. |upstream| is used as upstream for
the created branch.
"""
existing_branches = set(git.branches(use_limit = False))
- branch_name = '_'.join([prefix, str(cl_index), directory])
+ branch_name = prefix + '_' + directory + '_split'
if branch_name in existing_branches:
return False
git.run('checkout', '-t', upstream, '-b', branch_name)
return True
-def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
- """Replaces $directory with |directory|, $cl_index with |cl_index|, and
- $num_cls with |num_cls| in |txt|."""
- return txt.replace('$directory', '/' + directory).replace(
- '$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
+def FormatDescriptionOrComment(txt, directory):
+ """Replaces $directory with |directory| in |txt|."""
+ return txt.replace('$directory', '/' + directory)
def AddUploadedByGitClSplitToDescription(description):
@@ -81,14 +76,12 @@
return '\n'.join(lines)
-def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
- directory, files, description, comment, reviewer, changelist,
- cmd_upload, cq_dry_run, enable_auto_submit):
+def UploadCl(refactor_branch, refactor_branch_upstream, directory, files,
+ description, comment, reviewers, changelist, cmd_upload,
+ cq_dry_run, enable_auto_submit):
"""Uploads a CL with all changes to |files| in |refactor_branch|.
Args:
- cl_index: The index of this CL in the list of CLs to upload.
- num_cls: The total number of CLs that will be uploaded.
refactor_branch: Name of the branch that contains the changes to upload.
refactor_branch_upstream: Name of the upstream of |refactor_branch|.
directory: Path to the directory that contains the OWNERS file for which
@@ -96,17 +89,16 @@
files: List of AffectedFile instances to include in the uploaded CL.
description: Description of the uploaded CL.
comment: Comment to post on the uploaded CL.
- reviewer: The reviewer for the CL.
+ reviewers: A set of reviewers for the CL.
changelist: The Changelist class.
cmd_upload: The function associated with the git cl upload command.
cq_dry_run: If CL uploads should also do a cq dry run.
enable_auto_submit: If CL uploads should also enable auto submit.
"""
# Create a branch.
- if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
- refactor_branch_upstream):
- print('Skipping CL ' + cl_index + ' for directory "' + directory +
- '" for which a branch already exists.')
+ if not CreateBranchForDirectory(
+ refactor_branch, directory, refactor_branch_upstream):
+ print('Skipping ' + directory + ' for which a branch already exists.')
return
# Checkout all changes to files in |files|.
@@ -122,12 +114,11 @@
# when it is closed.
with gclient_utils.temporary_file() as tmp_file:
gclient_utils.FileWrite(
- tmp_file,
- FormatDescriptionOrComment(description, directory, cl_index, num_cls))
+ tmp_file, FormatDescriptionOrComment(description, directory))
git.run('commit', '-F', tmp_file)
# Upload a CL.
- upload_args = ['-f', '-r', reviewer]
+ upload_args = ['-f', '-r', ','.join(reviewers)]
if cq_dry_run:
upload_args.append('--cq-dry-run')
if not comment:
@@ -137,140 +128,26 @@
print('Uploading CL for ' + directory + '.')
cmd_upload(upload_args)
if comment:
- changelist().AddComment(
- FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
- publish=True)
+ changelist().AddComment(FormatDescriptionOrComment(comment, directory),
+ publish=True)
-class ChangeList(object):
- """Representation of a CL and the files affected by it."""
-
- def __init__(self, path, owners_db, author, files):
- self._path = path
- self._files = files
- self._owners_db = owners_db
- self._author = author
- self._owners = None
-
- def _EnsureOwners(self):
- if not self._owners:
- self._owners = set()
- files = [f.LocalPath() for f in self.GetFiles()]
- if not files:
- files = [self.GetPath()]
- possible_owners = self._owners_db.all_possible_owners(
- files, self._author).keys()
- for owner in possible_owners:
- if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
- self._owners |= set([owner])
- assert len(self._owners)
-
- def Merge(self, other):
- self._owners = self.GetCommonOwners(other)
- self._files |= other.GetFiles()
-
- def GetPath(self):
- return self._path
-
- def GetFiles(self):
- return self._files
-
- def GetOwners(self):
- self._EnsureOwners()
- return self._owners
-
- def GetCommonOwners(self, other):
- return self.GetOwners() & other.GetOwners()
-
- def HaveCommonOwners(self, other):
- return len(self.GetCommonOwners(other)) > 0
-
- def GetChangeSizeInBytes(self):
- return sum(
- [c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
-
-
-def SplitCLs(owners_database, author, files):
+def GetFilesSplitByOwners(owners_database, files):
"""Returns a map of files split by OWNERS file.
Returns:
A map where keys are paths to directories containing an OWNERS file and
values are lists of files sharing an OWNERS file.
"""
-
- # The target CL size in # of changed bytes.
- # TODO(yannic): Use # of changed lines instead and make this configurable.
- max_cl_size = 1000
-
- candidates = trie.Trie()
- # Enable sorting so dry-run will split the CL the same way the CL is uploaded.
- candidates.enable_sorting()
-
- # 1. Create one CL candidate for every affected file.
+ files_split_by_owners = collections.defaultdict(list)
for f in files:
- path = f.LocalPath()
- candidates[path] = ChangeList(path, owners_database, author, set([f]))
-
- change_lists = []
-
- # 2. Try to merge CL in common directories up to a maximum size of
- # |max_cl_size|.
- # This is O( len(files) * max([len(f.path) for f in files]) ).
- edited = True
- while edited:
- edited = False
-
- # 2.1. Iterate over all candidates and merge candidates into the candidate
- # for their parent directory if the resulting CL doesn't exceed
- # |max_cl_size|.
- for item in candidates.items():
- path = ''.join(item[0])
- candidate = item[1]
-
- # The number of CL candidates in subdirectories is equivalent to the
- # number of nodes with prefix |path| in the Trie.
- # Only try to merge |candidate| with the candidate for the parent
- # directory if there are no more CLs for subdirectories.
- sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
- if not sub_cls:
- parent_path = os.path.dirname(path)
- if len(parent_path) < 1:
- # Don't create CLs for more than one top-level directory.
- continue
-
- if parent_path not in candidates:
- candidates[parent_path] = ChangeList(parent_path, owners_database,
- author, set())
- parent_cl = candidates[parent_path]
-
- if not parent_cl.HaveCommonOwners(candidate):
- # Don't merge if the resulting CL would need more than one reviewer.
- continue
-
- # Merge |candidate| into the CL for it's parent directory and remove
- # candidate.
- edited = True
- del candidates[path]
- parent_cl.Merge(candidate)
-
- # Add |parent_cl| to list of CLs to submit if the CL is larger than
- # |max_cl_size|.
- # TODO(yannic): Doing it this way, we might end up with CLs of size
- # 2 * max_cl_size if we merged two candidates that just don't exceed
- # the maximal size.
- if parent_cl.GetChangeSizeInBytes() > max_cl_size:
- change_lists.append(parent_cl)
- del candidates[parent_path]
-
- # 3. Add all remaining candidates to the list of CLs.
- for item in candidates.items():
- change_lists.append(item[1])
-
- return change_lists
+ files_split_by_owners[owners_database.enclosing_dir_with_owners(
+ f.LocalPath())].append(f)
+ return files_split_by_owners
def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
- reviewer):
+ reviewers):
"""Prints info about a CL.
Args:
@@ -280,42 +157,20 @@
to upload a CL.
file_paths: A list of files in this CL.
description: The CL description.
- reviewer: The reviewer for this CL.
+ reviewers: A set of reviewers for this CL.
"""
- description_lines = FormatDescriptionOrComment(
- description, directory, cl_index, num_cls).splitlines()
+ description_lines = FormatDescriptionOrComment(description,
+ directory).splitlines()
indented_description = '\n'.join([' ' + l for l in description_lines])
print('CL {}/{}'.format(cl_index, num_cls))
print('Path: {}'.format(directory))
- print('Reviewers: {}'.format(reviewer))
+ print('Reviewers: {}'.format(', '.join(reviewers)))
print('\n' + indented_description + '\n')
print('\n'.join(file_paths))
print()
-def _SelectReviewer(possible_owners, used_reviewers):
- """Select a reviewer from |owners| and adds them to the set of used reviewers.
-
- Returns:
- The reviewer.
- """
-
- # It's debatable whether we want to avoid reusing reviewers. It could be
- # easier to ask the smallest possible amount of reviewers to become familiar
- # with the change being split. However, doing so would mean we send all CLs to
- # top-level owners, which might be too much to ask from them.
- # We may revisit this decicion later.
- unused_reviewers = possible_owners.difference(used_reviewers)
- if len(unused_reviewers) < 1:
- unused_reviewers = possible_owners
- # Pick a random reviwer from the set of owners so we don't prefer owners
- # with emails of low lexical order.
- reviewer = random.choice(tuple(unused_reviewers))
- used_reviewers.add(reviewer)
- return reviewer
-
-
def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
cq_dry_run, enable_auto_submit):
""""Splits a branch into smaller branches and uploads CLs.
@@ -356,9 +211,11 @@
owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
owners_database.load_data_needed_for([f.LocalPath() for f in files])
- change_lists = SplitCLs(owners_database, author, set(files))
+ files_split_by_owners = GetFilesSplitByOwners(owners_database, files)
- num_cls = len(change_lists)
+ num_cls = len(files_split_by_owners)
+ print('Will split current branch (' + refactor_branch + ') into ' +
+ str(num_cls) + ' CLs.\n')
if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
print(
'This will generate "%r" CLs. This many CLs can potentially generate'
@@ -370,21 +227,21 @@
if answer.lower() != 'y':
return 0
- reviewers = set()
- for cl_index, cl in enumerate(change_lists, 1):
+ for cl_index, (directory, files) in \
+ enumerate(files_split_by_owners.items(), 1):
# Use '/' as a path separator in the branch name and the CL description
# and comment.
- directory = cl.GetPath().replace(os.path.sep, '/')
- file_paths = [f.LocalPath() for f in cl.GetFiles()]
- reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
+ directory = directory.replace(os.path.sep, '/')
+ file_paths = [f.LocalPath() for f in files]
+ reviewers = owners_database.reviewers_for(file_paths, author)
if dry_run:
PrintClInfo(cl_index, num_cls, directory, file_paths, description,
- reviewer)
+ reviewers)
else:
- UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
- directory, files, description, comment, reviewer, changelist,
- cmd_upload, cq_dry_run, enable_auto_submit)
+ UploadCl(refactor_branch, refactor_branch_upstream, directory, files,
+ description, comment, reviewers, changelist, cmd_upload,
+ cq_dry_run, enable_auto_submit)
# Go back to the original branch.
git.run('checkout', refactor_branch)