Revert "Improve git cl split"

This reverts commit 684096347b67b1663690c118785018258acd2e9b.

Reason for revert:
Breaks git-cl split.
https://bugs.chromium.org/p/chromium/issues/detail?id=1054888

Original change's description:
> Improve git cl split
>
> This CL changes the behavior of `git cl split` to split the change
> by the size of the resulting CLs. For now, this is based on the number
> of bytes changed, and not by the number of changed lines. Depending
> on the shape of change, this may still produce more CLs than expected
> (and possibly more than before).
>
> A future change will switch the split to be based on the number
> of affected lines, and also introduce a mode to base the split
> on the number of affected files.
>
> Bug: 998922
> Change-Id: I49f868972a61b89b426ef9e2ceedc733eacb4350
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1778744
> Commit-Queue: Yannic Bonenberger <yannic.bonenberger@gmail.com>
> Reviewed-by: Dirk Pranke <dpranke@chromium.org>

TBR=fdoray@chromium.org,dpranke@chromium.org,yannic.bonenberger@gmail.com,infra-scoped@luci-project-accounts.iam.gserviceaccount.com

Bug: 998922
Change-Id: I466e1245d5f1c934443d850c48c42d3e694e71c6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2080205
Reviewed-by: Anthony Polito <apolito@google.com>
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
diff --git a/split_cl.py b/split_cl.py
index 9d16b1c..c08bf4d 100644
--- a/split_cl.py
+++ b/split_cl.py
@@ -9,7 +9,6 @@
 
 import collections
 import os
-import random
 import re
 import subprocess2
 import sys
@@ -22,8 +21,6 @@
 
 import git_common as git
 
-import third_party.pygtrie as trie
-
 
 # If a call to `git cl split` will generate more than this number of CLs, the
 # command will prompt the user to make sure they know what they're doing. Large
@@ -44,25 +41,23 @@
   git.run('rev-parse')
 
 
-def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
-  """Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
+def CreateBranchForDirectory(prefix, directory, upstream):
+  """Creates a branch named |prefix| + "_" + |directory| + "_split".
 
   Return false if the branch already exists. |upstream| is used as upstream for
   the created branch.
   """
   existing_branches = set(git.branches(use_limit = False))
-  branch_name = '_'.join([prefix, str(cl_index), directory])
+  branch_name = prefix + '_' + directory + '_split'
   if branch_name in existing_branches:
     return False
   git.run('checkout', '-t', upstream, '-b', branch_name)
   return True
 
 
-def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
-  """Replaces $directory with |directory|, $cl_index with |cl_index|, and
-  $num_cls with |num_cls| in |txt|."""
-  return txt.replace('$directory', '/' + directory).replace(
-      '$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
+def FormatDescriptionOrComment(txt, directory):
+  """Replaces $directory with |directory| in |txt|."""
+  return txt.replace('$directory', '/' + directory)
 
 
 def AddUploadedByGitClSplitToDescription(description):
@@ -81,14 +76,12 @@
   return '\n'.join(lines)
 
 
-def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
-             directory, files, description, comment, reviewer, changelist,
-             cmd_upload, cq_dry_run, enable_auto_submit):
+def UploadCl(refactor_branch, refactor_branch_upstream, directory, files,
+             description, comment, reviewers, changelist, cmd_upload,
+             cq_dry_run, enable_auto_submit):
   """Uploads a CL with all changes to |files| in |refactor_branch|.
 
   Args:
-    cl_index: The index of this CL in the list of CLs to upload.
-    num_cls: The total number of CLs that will be uploaded.
     refactor_branch: Name of the branch that contains the changes to upload.
     refactor_branch_upstream: Name of the upstream of |refactor_branch|.
     directory: Path to the directory that contains the OWNERS file for which
@@ -96,17 +89,16 @@
     files: List of AffectedFile instances to include in the uploaded CL.
     description: Description of the uploaded CL.
     comment: Comment to post on the uploaded CL.
-    reviewer: The reviewer for the CL.
+    reviewers: A set of reviewers for the CL.
     changelist: The Changelist class.
     cmd_upload: The function associated with the git cl upload command.
     cq_dry_run: If CL uploads should also do a cq dry run.
     enable_auto_submit: If CL uploads should also enable auto submit.
   """
   # Create a branch.
-  if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
-                                  refactor_branch_upstream):
-    print('Skipping CL ' + cl_index + ' for directory "' + directory +
-          '" for which a branch already exists.')
+  if not CreateBranchForDirectory(
+      refactor_branch, directory, refactor_branch_upstream):
+    print('Skipping ' + directory + ' for which a branch already exists.')
     return
 
   # Checkout all changes to files in |files|.
@@ -122,12 +114,11 @@
   # when it is closed.
   with gclient_utils.temporary_file() as tmp_file:
     gclient_utils.FileWrite(
-        tmp_file,
-        FormatDescriptionOrComment(description, directory, cl_index, num_cls))
+        tmp_file, FormatDescriptionOrComment(description, directory))
     git.run('commit', '-F', tmp_file)
 
   # Upload a CL.
-  upload_args = ['-f', '-r', reviewer]
+  upload_args = ['-f', '-r', ','.join(reviewers)]
   if cq_dry_run:
     upload_args.append('--cq-dry-run')
   if not comment:
@@ -137,140 +128,26 @@
   print('Uploading CL for ' + directory + '.')
   cmd_upload(upload_args)
   if comment:
-    changelist().AddComment(
-        FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
-        publish=True)
+    changelist().AddComment(FormatDescriptionOrComment(comment, directory),
+                            publish=True)
 
 
-class ChangeList(object):
-  """Representation of a CL and the files affected by it."""
-
-  def __init__(self, path, owners_db, author, files):
-    self._path = path
-    self._files = files
-    self._owners_db = owners_db
-    self._author = author
-    self._owners = None
-
-  def _EnsureOwners(self):
-    if not self._owners:
-      self._owners = set()
-      files = [f.LocalPath() for f in self.GetFiles()]
-      if not files:
-        files = [self.GetPath()]
-      possible_owners = self._owners_db.all_possible_owners(
-          files, self._author).keys()
-      for owner in possible_owners:
-        if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
-          self._owners |= set([owner])
-      assert len(self._owners)
-
-  def Merge(self, other):
-    self._owners = self.GetCommonOwners(other)
-    self._files |= other.GetFiles()
-
-  def GetPath(self):
-    return self._path
-
-  def GetFiles(self):
-    return self._files
-
-  def GetOwners(self):
-    self._EnsureOwners()
-    return self._owners
-
-  def GetCommonOwners(self, other):
-    return self.GetOwners() & other.GetOwners()
-
-  def HaveCommonOwners(self, other):
-    return len(self.GetCommonOwners(other)) > 0
-
-  def GetChangeSizeInBytes(self):
-    return sum(
-        [c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
-
-
-def SplitCLs(owners_database, author, files):
+def GetFilesSplitByOwners(owners_database, files):
   """Returns a map of files split by OWNERS file.
 
   Returns:
     A map where keys are paths to directories containing an OWNERS file and
     values are lists of files sharing an OWNERS file.
   """
-
-  # The target CL size in # of changed bytes.
-  # TODO(yannic): Use # of changed lines instead and make this configurable.
-  max_cl_size = 1000
-
-  candidates = trie.Trie()
-  # Enable sorting so dry-run will split the CL the same way the CL is uploaded.
-  candidates.enable_sorting()
-
-  # 1. Create one CL candidate for every affected file.
+  files_split_by_owners = collections.defaultdict(list)
   for f in files:
-    path = f.LocalPath()
-    candidates[path] = ChangeList(path, owners_database, author, set([f]))
-
-  change_lists = []
-
-  # 2. Try to merge CL in common directories up to a maximum size of
-  # |max_cl_size|.
-  # This is O( len(files) * max([len(f.path) for f in files]) ).
-  edited = True
-  while edited:
-    edited = False
-
-    # 2.1. Iterate over all candidates and merge candidates into the candidate
-    # for their parent directory if the resulting CL doesn't exceed
-    # |max_cl_size|.
-    for item in candidates.items():
-      path = ''.join(item[0])
-      candidate = item[1]
-
-      # The number of CL candidates in subdirectories is equivalent to the
-      # number of nodes with prefix |path| in the Trie.
-      # Only try to merge |candidate| with the candidate for the parent
-      # directory if there are no more CLs for subdirectories.
-      sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
-      if not sub_cls:
-        parent_path = os.path.dirname(path)
-        if len(parent_path) < 1:
-          # Don't create CLs for more than one top-level directory.
-          continue
-
-        if parent_path not in candidates:
-          candidates[parent_path] = ChangeList(parent_path, owners_database,
-                                               author, set())
-        parent_cl = candidates[parent_path]
-
-        if not parent_cl.HaveCommonOwners(candidate):
-          # Don't merge if the resulting CL would need more than one reviewer.
-          continue
-
-        # Merge |candidate| into the CL for it's parent directory and remove
-        # candidate.
-        edited = True
-        del candidates[path]
-        parent_cl.Merge(candidate)
-
-        # Add |parent_cl| to list of CLs to submit if the CL is larger than
-        # |max_cl_size|.
-        # TODO(yannic): Doing it this way, we might end up with CLs of size
-        # 2 * max_cl_size if we merged two candidates that just don't exceed
-        # the maximal size.
-        if parent_cl.GetChangeSizeInBytes() > max_cl_size:
-          change_lists.append(parent_cl)
-          del candidates[parent_path]
-
-  # 3. Add all remaining candidates to the list of CLs.
-  for item in candidates.items():
-    change_lists.append(item[1])
-
-  return change_lists
+    files_split_by_owners[owners_database.enclosing_dir_with_owners(
+        f.LocalPath())].append(f)
+  return files_split_by_owners
 
 
 def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
-                reviewer):
+                reviewers):
   """Prints info about a CL.
 
   Args:
@@ -280,42 +157,20 @@
         to upload a CL.
     file_paths: A list of files in this CL.
     description: The CL description.
-    reviewer: The reviewer for this CL.
+    reviewers: A set of reviewers for this CL.
   """
-  description_lines = FormatDescriptionOrComment(
-      description, directory, cl_index, num_cls).splitlines()
+  description_lines = FormatDescriptionOrComment(description,
+                                                 directory).splitlines()
   indented_description = '\n'.join(['    ' + l for l in description_lines])
 
   print('CL {}/{}'.format(cl_index, num_cls))
   print('Path: {}'.format(directory))
-  print('Reviewers: {}'.format(reviewer))
+  print('Reviewers: {}'.format(', '.join(reviewers)))
   print('\n' + indented_description + '\n')
   print('\n'.join(file_paths))
   print()
 
 
-def _SelectReviewer(possible_owners, used_reviewers):
-  """Select a reviewer from |owners| and adds them to the set of used reviewers.
-
-  Returns:
-    The reviewer.
-  """
-
-  # It's debatable whether we want to avoid reusing reviewers. It could be
-  # easier to ask the smallest possible amount of reviewers to become familiar
-  # with the change being split. However, doing so would mean we send all CLs to
-  # top-level owners, which might be too much to ask from them.
-  # We may revisit this decicion later.
-  unused_reviewers = possible_owners.difference(used_reviewers)
-  if len(unused_reviewers) < 1:
-    unused_reviewers = possible_owners
-  # Pick a random reviwer from the set of owners so we don't prefer owners
-  # with emails of low lexical order.
-  reviewer = random.choice(tuple(unused_reviewers))
-  used_reviewers.add(reviewer)
-  return reviewer
-
-
 def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
             cq_dry_run, enable_auto_submit):
   """"Splits a branch into smaller branches and uploads CLs.
@@ -356,9 +211,11 @@
     owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
     owners_database.load_data_needed_for([f.LocalPath() for f in files])
 
-    change_lists = SplitCLs(owners_database, author, set(files))
+    files_split_by_owners = GetFilesSplitByOwners(owners_database, files)
 
-    num_cls = len(change_lists)
+    num_cls = len(files_split_by_owners)
+    print('Will split current branch (' + refactor_branch + ') into ' +
+          str(num_cls) + ' CLs.\n')
     if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
       print(
         'This will generate "%r" CLs. This many CLs can potentially generate'
@@ -370,21 +227,21 @@
       if answer.lower() != 'y':
         return 0
 
-    reviewers = set()
-    for cl_index, cl in enumerate(change_lists, 1):
+    for cl_index, (directory, files) in \
+        enumerate(files_split_by_owners.items(), 1):
       # Use '/' as a path separator in the branch name and the CL description
       # and comment.
-      directory = cl.GetPath().replace(os.path.sep, '/')
-      file_paths = [f.LocalPath() for f in cl.GetFiles()]
-      reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
+      directory = directory.replace(os.path.sep, '/')
+      file_paths = [f.LocalPath() for f in files]
+      reviewers = owners_database.reviewers_for(file_paths, author)
 
       if dry_run:
         PrintClInfo(cl_index, num_cls, directory, file_paths, description,
-                    reviewer)
+                    reviewers)
       else:
-        UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
-                 directory, files, description, comment, reviewer, changelist,
-                 cmd_upload, cq_dry_run, enable_auto_submit)
+        UploadCl(refactor_branch, refactor_branch_upstream, directory, files,
+                 description, comment, reviewers, changelist, cmd_upload,
+                 cq_dry_run, enable_auto_submit)
 
     # Go back to the original branch.
     git.run('checkout', refactor_branch)