bisect-kit: add chrome local build bisector for branched version
bisector: bisect_cr_localbuild_internal.py
switcher: switch_cros_cr_localbuild_internal.py
This is partial patch of bigger work. Some tasks will be addressed by
later CLs:
- Existing bisectors will be migrated to use codechange module
- This bisector can only deal with branches before 3406 because it
cannot support gclient recursedeps yet
- Add more unit tests
BUG=chromium:776314,chromium:850443
TEST=unittest and following commands
(Stupid example, to find the commit to change branch number to 3396)
$ ./bisect_cr_localbuild_internal.py init --old 67.0.3392.0 --new 67.0.3396.17 \
--chrome_root $CHROME_ROOT \
--gclient_cache_dir $GCLIENT_CACHE_DIR \
--dut $DUT
$ ./bisect_cr_localbuild_internal.py config switch ./switch_cros_cr_localbuild_internal.py
$ ./bisect_cr_localbuild_internal.py config eval bash -c \
"ssh $DUT /opt/google/chrome/chrome --version | grep -v 3396"
$ ./bisect_cr_localbuild_internal.py run
Change-Id: I9b974a19d0c6f6fd045988fde603699282aef32f
Reviewed-on: https://chromium-review.googlesource.com/1088541
Commit-Ready: Kuang-che Wu <kcwu@chromium.org>
Tested-by: Kuang-che Wu <kcwu@chromium.org>
Reviewed-by: Chung-yih Wang <cywang@chromium.org>
diff --git a/bisect_kit/codechange.py b/bisect_kit/codechange.py
new file mode 100644
index 0000000..e6e5c37
--- /dev/null
+++ b/bisect_kit/codechange.py
@@ -0,0 +1,894 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Model of source code organization and changes.
+
+This module modeled complex source code organization, i.e. nested git repos,
+and their version relationship, i.e. pinned or floating git repo. In other
+words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
+repo manifest.
+"""
+
+from __future__ import print_function
+import copy
+import json
+import logging
+import os
+import re
+import shutil
+
+from bisect_kit import cli
+from bisect_kit import git_util
+
+logger = logging.getLogger(__name__)
+
+_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
+
+SPEC_FIXED = 'fixed'
+SPEC_FLOAT = 'float'
+_DIFF_CACHE_DIR = 'bisectkit-cache'
+
+
+def make_intra_rev(a, b, index):
+ """Makes intra-rev version string.
+
+ Between two major "named" versions a and b, there are many small changes
+ (commits) in-between. bisect-kit will identify all those instances and bisect
+ them. We give names to those instances and call these names as "intra-rev"
+ which stands for minor version numbers within two major version.
+
+ Note, a+index (without b) is not enough to identify an unique change due to
+ branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
+ from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
+ in 9900.1.0 and 9901.0.0.
+
+ Args:
+ a: the start version
+ b: the end version
+ index: the index number of changes between a and b
+
+ Returns:
+ the intra-rev version string
+ """
+ return '%s~%s/%d' % (a, b, index)
+
+
+def parse_intra_rev(rev):
+ """Decomposes intra-rev string.
+
+ See comments of make_intra_rev for what is intra-rev.
+
+ Args:
+ rev: intra-rev string or normal version number
+
+ Returns:
+ (start, end, index). If rev is not intra-rev, it must be normal version
+ number and returns (rev, rev, 0).
+ """
+ m = re.match(_re_intra_rev, rev)
+ if m:
+ return m.group(1), m.group(2), int(m.group(3))
+ else:
+ return rev, rev, 0
+
+
+def argtype_intra_rev(argtype):
+ """Validates argument is intra-rev.
+
+ Args:
+ argtype: argtype function which validates major version number
+
+ Returns:
+ A new argtype function which matches intra-rev
+ """
+
+ def argtype_function(s):
+ m = re.match(_re_intra_rev, s)
+ if m:
+ try:
+ argtype(m.group(1))
+ argtype(m.group(2))
+ return s
+ except cli.ArgTypeError as e:
+ examples = []
+ for example in e.example:
+ examples.append(make_intra_rev(example, example, 10))
+ raise cli.ArgTypeError('Invalid intra rev', examples)
+ raise cli.ArgTypeError('Invalid intra rev',
+ [make_intra_rev('<rev1>', '<rev2>', 10)])
+
+ return argtype_function
+
+
+def _normalize_repo_url(repo_url):
+ repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
+ r'https://chrome-internal.googlesource.com/', repo_url)
+ repo_url = re.sub(r'\.git$', '', repo_url)
+ return repo_url
+
+
+class PathSpec(object):
+ """Specified code version of one path.
+
+ Attributes:
+ path: local path, relative to project base dir
+ repo_url: code repository location
+ at: code version, could be git hash or branch name
+ """
+
+ def __init__(self, path, repo_url, at):
+ self.path = path
+ self.repo_url = repo_url
+ self.at = at
+
+ def is_static(self):
+ return git_util.is_git_rev(self.at)
+
+ def __eq__(self, rhs):
+ if self.path != rhs.path:
+ return False
+ if self.at != rhs.at:
+ return False
+ if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
+ return False
+ return True
+
+ def __ne__(self, rhs):
+ return not self == rhs
+
+
+class Spec(object):
+ """Collection of PathSpec.
+
+ Spec is analogy to gclient's DEPS and repo's manifest.
+
+ Attributes:
+ spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
+ version is pinned and fixed. On the other hand, SPEC_FLOAT is not
+ pinned and the actual version (git commit) may change over time.
+ name: name of this spec, for debugging purpose. usually version number
+ or git hash
+ timestamp: timestamp of this spec
+ path: path of spec
+ entries: paths to PathSpec dict
+ """
+
+ def __init__(self, spec_type, name, timestamp, path, entries=None):
+ self.spec_type = spec_type
+ self.name = name
+ self.timestamp = timestamp
+ self.path = path
+ self.entries = entries
+
+ def copy(self):
+ return copy.deepcopy(self)
+
+ def similar_score(self, rhs):
+ """Calculates similar score to another Spec.
+
+ Returns:
+ score of similarity. Smaller value is more similar.
+ """
+ score = 0
+ for path in set(self.entries) & set(rhs.entries):
+ if rhs[path] == self[path]:
+ continue
+ if rhs[path].at == self[path].at:
+ # it's often that remote repo moved around but should be treated as the
+ # same one
+ score += 0.1
+ else:
+ score += 1
+ score += len(set(self.entries) ^ set(rhs.entries))
+ return score
+
+ def is_static(self):
+ return all(path_spec.is_static() for path_spec in self.entries.values())
+
+ def is_subset(self, rhs):
+ return set(self.entries.keys()) <= set(rhs.entries.keys())
+
+ def __getitem__(self, path):
+ return self.entries[path]
+
+ def __contains__(self, path):
+ return path in self.entries
+
+ def apply(self, action_group):
+ self.timestamp = action_group.timestamp
+ self.name = '(%s)' % self.timestamp
+ for action in action_group.actions:
+ if isinstance(action, GitAddRepo):
+ self.entries[action.path] = PathSpec(action.path, action.repo_url,
+ action.rev)
+ elif isinstance(action, GitCheckoutCommit):
+ self.entries[action.path].at = action.rev
+ elif isinstance(action, GitRemoveRepo):
+ del self.entries[action.path]
+ else:
+ assert 0, 'unknown action: %s' % action.__class__.__name__
+
+ def dump(self):
+ # for debugging
+ print(self.name, self.path, self.timestamp)
+ print('size', len(self.entries))
+ for path, path_spec in sorted(self.entries.items()):
+ print(path, path_spec.at)
+
+ def diff(self, rhs):
+ logger.info('diff between %s and %s', self.name, rhs.name)
+ expect = set(self.entries)
+ actual = set(rhs.entries)
+ common = 0
+ for path in sorted(expect - actual):
+ logger.info('-%s', path)
+ for path in sorted(actual - expect):
+ logger.info('+%s', path)
+ for path in sorted(expect & actual):
+ if self[path] == rhs[path]:
+ common += 1
+ continue
+ if self[path].at != rhs[path].at:
+ logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
+ if self[path].repo_url != rhs[path].repo_url:
+ logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
+ rhs[path].repo_url)
+ logger.info('and common=%s', common)
+
+
+class Action(object):
+ """Actions describe changes from one Spec to another.
+
+ Attributes:
+ timestamp: action time
+ path: action path, which is relative to project root
+ """
+
+ def __init__(self, timestamp, path):
+ self.timestamp = timestamp
+ self.path = path
+
+ def apply(self, _root_dir):
+ raise NotImplementedError
+
+ def summary(self, _code_storage):
+ raise NotImplementedError
+
+ def __eq__(self, rhs):
+ return self.__dict__ == rhs.__dict__
+
+ def serialize(self):
+ return self.__class__.__name__, self.__dict__
+
+
+def unserialize_action(data):
+ classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
+ class_name, values = data
+ assert class_name in [cls.__name__ for cls in classes
+ ], 'unknown action class: %s' % class_name
+ for cls in classes:
+ if class_name == cls.__name__:
+ return cls(**values)
+
+
+class ActionGroup(object):
+ """Atomic group of Action objects
+
+ This models atomic commits (for example, gerrit topic, or circular
+ CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
+ object.
+ """
+
+ def __init__(self, timestamp, comment=None):
+ self.timestamp = timestamp
+ self.name = None
+ self.actions = []
+ self.comment = comment
+
+ def add(self, action):
+ self.actions.append(action)
+
+ def serialize(self):
+ return (self.timestamp, self.name, [a.serialize() for a in self.actions])
+
+ def summary(self, code_storage):
+ if self.comment:
+ return self.comment
+ # TODO(kcwu): support multiple Actions
+ assert len(self.actions) == 1
+ return self.actions[0].summary(code_storage)
+
+ @staticmethod
+ def unserialize(data):
+ ag = ActionGroup(data[0])
+ ag.name = data[1]
+ for x in data[2]:
+ ag.add(unserialize_action(x))
+ return ag
+
+ def apply(self, root_dir):
+ for action in self.actions:
+ action.apply(root_dir)
+
+
+class GitCheckoutCommit(Action):
+ """Describes a git commit action.
+
+ Attributes:
+ repo_url: the corresponding url of git repo
+ rev: git commit to checkout
+ """
+
+ def __init__(self, timestamp, path, repo_url, rev):
+ super(GitCheckoutCommit, self).__init__(timestamp, path)
+ self.repo_url = repo_url
+ self.rev = rev
+
+ def apply(self, root_dir):
+ git_repo = os.path.join(root_dir, self.path)
+ assert git_util.is_git_root(git_repo)
+ git_util.checkout_version(git_repo, self.rev)
+
+ def summary(self, code_storage):
+ git_root = code_storage.cached_git_root(self.repo_url)
+ summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
+ return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
+
+
+class GitAddRepo(Action):
+ """Describes a git repo add action.
+
+ Attributes:
+ repo_url: the corresponding url of git repo to add
+ rev: git commit to checkout
+ """
+
+ def __init__(self, timestamp, path, repo_url, rev):
+ super(GitAddRepo, self).__init__(timestamp, path)
+ self.repo_url = repo_url
+ self.rev = rev
+
+ def apply(self, root_dir):
+ git_repo = os.path.join(root_dir, self.path)
+ assert os.path.exists(git_repo)
+ assert git_util.is_git_root(git_repo)
+
+ def summary(self, _code_storage):
+ return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
+
+
+class GitRemoveRepo(Action):
+ """Describes a git repo remove action."""
+
+ def __init__(self, timestamp, path):
+ super(GitRemoveRepo, self).__init__(timestamp, path)
+
+ def apply(self, root_dir):
+ assert self.path
+ git_repo = os.path.join(root_dir, self.path)
+ assert git_util.is_git_root(git_repo)
+ assert 0
+ shutil.rmtree(git_repo)
+
+ def summary(self, _code_storage):
+ return 'remove repo %s' % self.path
+
+
+def apply_actions(code_storage, action_groups, root_dir):
+ # Speed optimization: only apply the last one of consecutive commits per
+ # repo. It is possible to optimize further, but need to take care git repo
+ # add/remove within another repo.
+ commits = {}
+
+ def batch_apply(commits):
+ for i, commit_action in sorted(commits.values()):
+ logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
+ commit_action.apply(root_dir)
+
+ for i, action_group in enumerate(action_groups, 1):
+ for action in action_group:
+ if not isinstance(action, GitCheckoutCommit):
+ break
+ else:
+ # If all actions are commits, defer them for batch processing.
+ for action in action_group:
+ commits[action.path] = (i, action)
+ continue
+
+ batch_apply(commits)
+ commits = {}
+ action.apply(root_dir)
+
+ batch_apply(commits)
+
+
+class SpecManager(object):
+ """Spec related abstract operations.
+
+ This class enumerates Spec instances and switch disk state to Spec.
+
+ In other words, this class abstracts:
+ - discovery of gclient's DEPS and repo's manifest
+ - gclient sync and repo sync
+ """
+
+ def collect_float_spec(self, old, new):
+ """Collects float Spec between two versions.
+
+ This method may fetch spec from network. However, it should not switch tree
+ version state.
+ """
+ raise NotImplementedError
+
+ def collect_fixed_spec(self, old, new):
+ """Collects fixed Spec between two versions.
+
+ This method may fetch spec from network. However, it should not switch tree
+ version state.
+ """
+ raise NotImplementedError
+
+ def parse_spec(self, spec):
+ """Parses information for Spec object.
+
+ Args:
+ spec: Spec object. It specifies what to parse and the parsed information
+ is stored inside.
+ """
+ raise NotImplementedError
+
+ def sync_disk_state(self, rev):
+ """Switch source tree state to given version."""
+ raise NotImplementedError
+
+
+class CodeStorage(object):
+ """Query code history and commit relationship without checkout.
+
+ Because paths inside source tree may be deleted or map to different remote
+ repo in different versions, we cannot query git information of one version
+ but the tree state is at another version. In order to query information
+ without changing tree state and fast, we need out of tree source code
+ storage.
+
+ This class assumes all git repos are mirrored somewhere on local disk.
+ Subclasses just need to implement cached_git_root() which returns the
+ location.
+
+ In other words, this class abstracts operations upon gclient's cache-dir
+ repo's mirror.
+ """
+
+ def cached_git_root(self, repo_url):
+ """The cached path of given remote git repo.
+
+ Args:
+ repo_url: URL of git remote repo
+
+ Returns:
+ path of cache folder
+ """
+ raise NotImplementedError
+
+ def is_ancestor_commit(self, spec, path, old, new):
+ """Determine one commit is ancestor of another.
+
+ Args:
+ spec: Spec object
+ path: local path relative to project root
+ old: commit id
+ new: commit id
+
+ Returns:
+ True if `old` is ancestor of `new`
+ """
+ git_root = self.cached_git_root(spec[path].repo_url)
+ return git_util.is_ancestor_commit(git_root, old, new)
+
+ def get_rev_by_time(self, spec, path, timestamp):
+ """Get commit hash of given spec by time.
+
+ Args:
+ spec: Spec object
+ path: local path relative to project root
+ timestamp:
+
+ Returns:
+ The commit hash of given time. If there are commits with the given
+ timestamp, returns the last commit.
+ """
+ git_root = self.cached_git_root(spec[path].repo_url)
+ # spec[path].at is remote reference name. Since git_root is a mirror, it's
+ # no need to convert the name.
+ return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
+
+ def get_actions_between_two_commit(self, spec, path, old, new):
+ git_root = self.cached_git_root(spec[path].repo_url)
+ result = []
+ for timestamp, git_rev in git_util.list_commits_between_commits(
+ git_root, old, new):
+ result.append(
+ GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
+ return result
+
+ def is_containing_commit(self, spec, path, rev):
+ git_root = self.cached_git_root(spec[path].repo_url)
+ return git_util.is_containing_commit(git_root, rev)
+
+ def are_spec_commits_available(self, spec):
+ for path, path_spec in spec.entries.items():
+ if not path_spec.is_static():
+ continue
+ if not self.is_containing_commit(spec, path, path_spec.at):
+ return False
+ return True
+
+
+class CodeManager(object):
+ """Class to reconstruct historical source tree state.
+
+ This class can reconstruct all moments of source tree state and diffs between
+ them.
+
+ Attributes:
+ root_dir: root path of project source tree
+ spec_manager: SpecManager object
+ code_storage: CodeStorage object
+ """
+
+ def __init__(self, root_dir, spec_manager, code_storage):
+ self.root_dir = root_dir
+ self.spec_manager = spec_manager
+ self.code_storage = code_storage
+
+ def generate_actions_between_specs(self, prev_float, next_float):
+ """Generates actions between two float specs.
+
+ Args:
+ prev_float: start of spec object (exclusive)
+ next_float: end of spec object (inclusive)
+
+ Returns:
+ list of Action object (unordered)
+ """
+ actions = []
+ for path in set(prev_float.entries) | set(next_float.entries):
+
+ # Add repo
+ if path not in prev_float:
+ if next_float[path].is_static():
+ next_at = next_float[path].at
+ else:
+ next_at = self.code_storage.get_rev_by_time(next_float, path,
+ next_float.timestamp)
+ actions.append(
+ GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
+ next_at))
+ continue
+
+ # Existing path is floating, enumerates commits until next spec.
+ #
+ # prev_at till_at
+ # prev branch ---> o --------> o --------> o --------> o --------> ...
+ # ^ ^
+ # prev_float.timestamp next_float.timestamp
+ if not prev_float[path].is_static():
+ prev_at = self.code_storage.get_rev_by_time(prev_float, path,
+ prev_float.timestamp)
+ till_at = self.code_storage.get_rev_by_time(prev_float, path,
+ next_float.timestamp)
+
+ actions.extend(
+ self.code_storage.get_actions_between_two_commit(
+ prev_float, path, prev_at, till_at))
+ else:
+ prev_at = till_at = prev_float[path].at
+
+ # At next_float.timestamp.
+ if path not in next_float:
+ # remove repo
+ actions.append(GitRemoveRepo(next_float.timestamp, path))
+ next_at = None
+
+ elif next_float[path].is_static():
+ # pinned to certain commit on different branch
+ next_at = next_float[path].at
+
+ elif next_float[path].at == prev_float[path].at:
+ # keep floating on the same branch
+ next_at = till_at
+
+ else:
+ # switch to another branch
+ # prev_at till_at
+ # prev branch ---> o --------> o --------> o --------> o --------> ...
+ #
+ # next_at
+ # next branch ...... o ------> o --------> o -----> ...
+ # ^ ^
+ # prev_float.timestamp next_float.timestamp
+ next_at = self.code_storage.get_rev_by_time(next_float, path,
+ next_float.timestamp)
+
+ if next_at and next_at != till_at:
+ actions.append(
+ GitCheckoutCommit(next_float.timestamp, path,
+ next_float[path].repo_url, next_at))
+
+ return actions
+
+ def synthesize_fixed_spec(self, float_spec, timestamp):
+ """Synthesizes fixed spec from float spec of given time.
+
+ Args:
+ float_spec: the float spec
+ timestamp: snapshot time
+
+ Returns:
+ Spec object
+ """
+ result = {}
+ for path, path_spec in float_spec.entries.items():
+ if not path_spec.is_static():
+ at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
+ path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
+
+ result[path] = copy.deepcopy(path_spec)
+
+ name = '%s@%s' % (float_spec.path, timestamp)
+ return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
+
+ def reorder_actions(self, actions):
+ """Reorder and cluster actions.
+
+ Args:
+ actions: list of Action objects
+
+ Returns:
+ list of ActionGroup objects
+ """
+ # TODO(kcwu): support atomic commits across repos
+ actions.sort(key=lambda x: x.timestamp)
+ result = []
+ for action in actions:
+ group = ActionGroup(action.timestamp)
+ group.add(action)
+ result.append(group)
+ return result
+
+ def match_spec(self, target, specs, start_index=0):
+ threshold = 3600
+ # ideal_index is the index of last spec before target
+ # begin and end are the range of indexes within threshold (inclusive)
+ ideal_index = None
+ begin, end = None, None
+ for i, spec in enumerate(specs[start_index:], start_index):
+ if spec.timestamp <= target.timestamp:
+ ideal_index = i
+ if abs(spec.timestamp - target.timestamp) < threshold:
+ if begin is None:
+ begin = i
+ end = i
+
+ candidates = []
+ if ideal_index is not None:
+ candidates.append(ideal_index)
+ if begin is not None:
+ candidates.extend(range(begin, end + 1))
+ if not candidates:
+ logger.error('unable to match %s: all specs are after it', target.name)
+ return None
+
+ compatible_candidates = [
+ i for i in candidates if specs[i].is_subset(target)
+ ]
+ if not compatible_candidates:
+ logger.error('unable to match %s: no compatible specs', target.name)
+ spec = specs[candidates[0]]
+ target.diff(spec)
+ return None
+
+ scores = []
+ for i in compatible_candidates:
+ scores.append((specs[i].similar_score(target), i))
+ scores.sort()
+
+ score, index = scores[0]
+ if score != 0:
+ logger.warning('not exactly match (score=%s): %s', score, target.name)
+ target.diff(specs[index])
+
+ if index < ideal_index:
+ logger.warning(
+ '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
+ target.name, target.timestamp, specs[index].timestamp,
+ specs[ideal_index].timestamp,
+ specs[ideal_index].timestamp - target.timestamp)
+ if index > ideal_index:
+ logger.warning(
+ 'spec committed at %d matched later commit at %d. bad server clock?',
+ target.timestamp, specs[index].timestamp)
+
+ return index
+
+ def associate_fixed_and_synthesized_specs(self, fixed_specs,
+ synthesized_specs):
+ # All fixed specs are snapshot of float specs. Theoretically, they
+ # should be identical to one of the synthesized specs.
+ # However, it's not always true for some reasons --- maybe due to race
+ # condition, maybe due to bugs of this bisect-kit.
+ # To overcome this glitch, we try to match them by similarity instead of
+ # exact match.
+ result = []
+ last_index = 0
+ for i, fixed_spec in enumerate(fixed_specs):
+ matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
+ if matched_index is None:
+ if i in (0, len(fixed_specs) - 1):
+ logger.error('essential spec mismatch, unable to continue')
+ assert 0
+ else:
+ logger.warning('%s do not match, skip', fixed_spec.name)
+ continue
+ result.append((i, matched_index))
+ last_index = matched_index
+
+ return result
+
+ def _create_make_up_actions(self, fixed_spec, synthesized):
+ timestamp = synthesized.timestamp
+ make_up = ActionGroup(
+ timestamp, comment='make up glitch for %s' % fixed_spec.name)
+ for path in set(fixed_spec.entries) & set(synthesized.entries):
+ if fixed_spec[path].at == synthesized[path].at:
+ continue
+ action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
+ synthesized[path].at)
+ make_up.add(action)
+
+ if not make_up.actions:
+ return None
+ return make_up
+
+ def build_revlist(self, old, new):
+ """Build revlist.
+
+ Returns:
+ list of rev string
+ """
+ logger.info('build_revlist')
+ revlist = []
+
+ # step 1, find all float and fixed specs in the given range.
+ fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
+ float_specs = self.spec_manager.collect_float_spec(old, new)
+ while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
+ float_specs.pop()
+ assert float_specs
+ for spec in float_specs + fixed_specs:
+ self.spec_manager.parse_spec(spec)
+
+ # step 2, synthesize all fixed specs in the range from float specs.
+ specs = float_specs + [fixed_specs[-1]]
+ actions = []
+ logger.debug('len(specs)=%d', len(specs))
+ for i in range(len(specs) - 1):
+ prev_float = specs[i]
+ next_float = specs[i + 1]
+ logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
+ prev_float.timestamp, next_float.name, next_float.timestamp)
+ actions += self.generate_actions_between_specs(prev_float, next_float)
+ action_groups = self.reorder_actions(actions)
+
+ spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
+ synthesized = [spec.copy()]
+ for action_group in action_groups:
+ spec.apply(action_group)
+ synthesized.append(spec.copy())
+
+ # step 3, associate fixed specs with synthesized specs.
+ associated_pairs = self.associate_fixed_and_synthesized_specs(
+ fixed_specs, synthesized)
+
+ # step 4, group actions and cache them
+ for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
+ next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
+ revlist.append(fixed_specs[fixed_index].name)
+ this_action_groups = []
+
+ # handle glitch
+ if fixed_specs[fixed_index].similar_score(
+ synthesized[synthesized_index]) != 0:
+ assert synthesized[synthesized_index].is_subset(
+ fixed_specs[fixed_index])
+ skipped = set(fixed_specs[fixed_index].entries) - set(
+ synthesized[synthesized_index].entries)
+ if skipped:
+ logger.warning(
+ 'between %s and %s, '
+ 'bisect-kit cannot analyze commit history of following paths:',
+ fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
+ for path in sorted(skipped):
+ logger.warning(' %s', path)
+
+ make_up = self._create_make_up_actions(fixed_specs[fixed_index],
+ synthesized[synthesized_index])
+ if make_up:
+ this_action_groups.append(make_up)
+
+ this_action_groups.extend(
+ action_groups[synthesized_index:next_synthesized_index])
+ for idx, ag in enumerate(this_action_groups, 1):
+ rev = make_intra_rev(fixed_specs[fixed_index].name,
+ fixed_specs[next_fixed_index].name, idx)
+ ag.name = rev
+ revlist.append(rev)
+
+ self.save_action_groups_between_releases(
+ fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
+ this_action_groups)
+ revlist.append(fixed_specs[associated_pairs[-1][0]].name)
+
+ return revlist
+
+ def save_action_groups_between_releases(self, old, new, action_groups):
+ data = [ag.serialize() for ag in action_groups]
+
+ cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
+ if not os.path.exists(cache_dir):
+ os.makedirs(cache_dir)
+ cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
+ with file(cache_filename, 'w') as fp:
+ json.dump(data, fp, indent=4, sort_keys=True)
+
+ def load_action_groups_between_releases(self, old, new):
+ cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
+ cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
+ if not os.path.exists(cache_filename):
+ raise Exception('cached revlist not found: %s' % cache_filename)
+
+ result = []
+ for data in json.load(file(cache_filename)):
+ result.append(ActionGroup.unserialize(data))
+
+ return result
+
+ def view_rev_diff(self, old, new):
+ old_base, _, _ = parse_intra_rev(old)
+ _, new_next, _ = parse_intra_rev(new)
+ assert old_base != new_next
+
+ revlist = []
+ rev_summary = {}
+ fixed_specs = self.spec_manager.collect_fixed_spec(old_base, new_next)
+ for i, spec in enumerate(fixed_specs[:-1]):
+ action_groups = self.load_action_groups_between_releases(
+ fixed_specs[i].name, fixed_specs[i + 1].name)
+ revlist.append(spec.name)
+ rev_summary[spec.name] = ''
+ for action_group in action_groups:
+ revlist.append(action_group.name)
+ rev_summary[action_group.name] = action_group.summary(self.code_storage)
+
+ revlist.append(fixed_specs[-1].name)
+ rev_summary[fixed_specs[-1].name] = ''
+
+ old_index = revlist.index(old)
+ new_index = revlist.index(new)
+ for rev in revlist[old_index:new_index + 1]:
+ logger.info('%s %s', rev, rev_summary[rev])
+
+ def switch(self, rev):
+ # easy case
+ if not re.match(_re_intra_rev, rev):
+ self.spec_manager.sync_disk_state(rev)
+ return
+
+ rev_old, rev_new, idx = parse_intra_rev(rev)
+ action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
+ assert 0 <= idx <= len(action_groups)
+ action_groups = action_groups[:idx]
+
+ self.spec_manager.sync_disk_state(rev_old)
+
+ apply_actions(self.code_storage, action_groups, self.root_dir)