blob: b57956bdb96abb6654b170a23800d4b102dd1787 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
22from bisect_kit import git_util
23
24logger = logging.getLogger(__name__)
25
26_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
27
28SPEC_FIXED = 'fixed'
29SPEC_FLOAT = 'float'
30_DIFF_CACHE_DIR = 'bisectkit-cache'
31
32
33def make_intra_rev(a, b, index):
34 """Makes intra-rev version string.
35
36 Between two major "named" versions a and b, there are many small changes
37 (commits) in-between. bisect-kit will identify all those instances and bisect
38 them. We give names to those instances and call these names as "intra-rev"
39 which stands for minor version numbers within two major version.
40
41 Note, a+index (without b) is not enough to identify an unique change due to
42 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
43 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
44 in 9900.1.0 and 9901.0.0.
45
46 Args:
47 a: the start version
48 b: the end version
49 index: the index number of changes between a and b
50
51 Returns:
52 the intra-rev version string
53 """
54 return '%s~%s/%d' % (a, b, index)
55
56
57def parse_intra_rev(rev):
58 """Decomposes intra-rev string.
59
60 See comments of make_intra_rev for what is intra-rev.
61
62 Args:
63 rev: intra-rev string or normal version number
64
65 Returns:
66 (start, end, index). If rev is not intra-rev, it must be normal version
67 number and returns (rev, rev, 0).
68 """
69 m = re.match(_re_intra_rev, rev)
70 if m:
71 return m.group(1), m.group(2), int(m.group(3))
72 else:
73 return rev, rev, 0
74
75
76def argtype_intra_rev(argtype):
77 """Validates argument is intra-rev.
78
79 Args:
80 argtype: argtype function which validates major version number
81
82 Returns:
83 A new argtype function which matches intra-rev
84 """
85
86 def argtype_function(s):
87 m = re.match(_re_intra_rev, s)
88 if m:
89 try:
90 argtype(m.group(1))
91 argtype(m.group(2))
92 return s
93 except cli.ArgTypeError as e:
94 examples = []
95 for example in e.example:
96 examples.append(make_intra_rev(example, example, 10))
97 raise cli.ArgTypeError('Invalid intra rev', examples)
98 raise cli.ArgTypeError('Invalid intra rev',
99 [make_intra_rev('<rev1>', '<rev2>', 10)])
100
101 return argtype_function
102
103
104def _normalize_repo_url(repo_url):
105 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
106 r'https://chrome-internal.googlesource.com/', repo_url)
107 repo_url = re.sub(r'\.git$', '', repo_url)
108 return repo_url
109
110
111class PathSpec(object):
112 """Specified code version of one path.
113
114 Attributes:
115 path: local path, relative to project base dir
116 repo_url: code repository location
117 at: code version, could be git hash or branch name
118 """
119
120 def __init__(self, path, repo_url, at):
121 self.path = path
122 self.repo_url = repo_url
123 self.at = at
124
125 def is_static(self):
126 return git_util.is_git_rev(self.at)
127
128 def __eq__(self, rhs):
129 if self.path != rhs.path:
130 return False
131 if self.at != rhs.at:
132 return False
133 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
134 return False
135 return True
136
137 def __ne__(self, rhs):
138 return not self == rhs
139
140
141class Spec(object):
142 """Collection of PathSpec.
143
144 Spec is analogy to gclient's DEPS and repo's manifest.
145
146 Attributes:
147 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
148 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
149 pinned and the actual version (git commit) may change over time.
150 name: name of this spec, for debugging purpose. usually version number
151 or git hash
152 timestamp: timestamp of this spec
153 path: path of spec
154 entries: paths to PathSpec dict
155 """
156
157 def __init__(self, spec_type, name, timestamp, path, entries=None):
158 self.spec_type = spec_type
159 self.name = name
160 self.timestamp = timestamp
161 self.path = path
162 self.entries = entries
163
164 def copy(self):
165 return copy.deepcopy(self)
166
167 def similar_score(self, rhs):
168 """Calculates similar score to another Spec.
169
170 Returns:
171 score of similarity. Smaller value is more similar.
172 """
173 score = 0
174 for path in set(self.entries) & set(rhs.entries):
175 if rhs[path] == self[path]:
176 continue
177 if rhs[path].at == self[path].at:
178 # it's often that remote repo moved around but should be treated as the
179 # same one
180 score += 0.1
181 else:
182 score += 1
183 score += len(set(self.entries) ^ set(rhs.entries))
184 return score
185
186 def is_static(self):
187 return all(path_spec.is_static() for path_spec in self.entries.values())
188
189 def is_subset(self, rhs):
190 return set(self.entries.keys()) <= set(rhs.entries.keys())
191
192 def __getitem__(self, path):
193 return self.entries[path]
194
195 def __contains__(self, path):
196 return path in self.entries
197
198 def apply(self, action_group):
199 self.timestamp = action_group.timestamp
200 self.name = '(%s)' % self.timestamp
201 for action in action_group.actions:
202 if isinstance(action, GitAddRepo):
203 self.entries[action.path] = PathSpec(action.path, action.repo_url,
204 action.rev)
205 elif isinstance(action, GitCheckoutCommit):
206 self.entries[action.path].at = action.rev
207 elif isinstance(action, GitRemoveRepo):
208 del self.entries[action.path]
209 else:
210 assert 0, 'unknown action: %s' % action.__class__.__name__
211
212 def dump(self):
213 # for debugging
214 print(self.name, self.path, self.timestamp)
215 print('size', len(self.entries))
216 for path, path_spec in sorted(self.entries.items()):
217 print(path, path_spec.at)
218
219 def diff(self, rhs):
220 logger.info('diff between %s and %s', self.name, rhs.name)
221 expect = set(self.entries)
222 actual = set(rhs.entries)
223 common = 0
224 for path in sorted(expect - actual):
225 logger.info('-%s', path)
226 for path in sorted(actual - expect):
227 logger.info('+%s', path)
228 for path in sorted(expect & actual):
229 if self[path] == rhs[path]:
230 common += 1
231 continue
232 if self[path].at != rhs[path].at:
233 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
234 if self[path].repo_url != rhs[path].repo_url:
235 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
236 rhs[path].repo_url)
237 logger.info('and common=%s', common)
238
239
240class Action(object):
241 """Actions describe changes from one Spec to another.
242
243 Attributes:
244 timestamp: action time
245 path: action path, which is relative to project root
246 """
247
248 def __init__(self, timestamp, path):
249 self.timestamp = timestamp
250 self.path = path
251
252 def apply(self, _root_dir):
253 raise NotImplementedError
254
255 def summary(self, _code_storage):
256 raise NotImplementedError
257
258 def __eq__(self, rhs):
259 return self.__dict__ == rhs.__dict__
260
261 def serialize(self):
262 return self.__class__.__name__, self.__dict__
263
264
265def unserialize_action(data):
266 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
267 class_name, values = data
268 assert class_name in [cls.__name__ for cls in classes
269 ], 'unknown action class: %s' % class_name
270 for cls in classes:
271 if class_name == cls.__name__:
272 return cls(**values)
273
274
275class ActionGroup(object):
276 """Atomic group of Action objects
277
278 This models atomic commits (for example, gerrit topic, or circular
279 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
280 object.
281 """
282
283 def __init__(self, timestamp, comment=None):
284 self.timestamp = timestamp
285 self.name = None
286 self.actions = []
287 self.comment = comment
288
289 def add(self, action):
290 self.actions.append(action)
291
292 def serialize(self):
293 return (self.timestamp, self.name, [a.serialize() for a in self.actions])
294
295 def summary(self, code_storage):
296 if self.comment:
297 return self.comment
298 # TODO(kcwu): support multiple Actions
299 assert len(self.actions) == 1
300 return self.actions[0].summary(code_storage)
301
302 @staticmethod
303 def unserialize(data):
304 ag = ActionGroup(data[0])
305 ag.name = data[1]
306 for x in data[2]:
307 ag.add(unserialize_action(x))
308 return ag
309
310 def apply(self, root_dir):
311 for action in self.actions:
312 action.apply(root_dir)
313
314
315class GitCheckoutCommit(Action):
316 """Describes a git commit action.
317
318 Attributes:
319 repo_url: the corresponding url of git repo
320 rev: git commit to checkout
321 """
322
323 def __init__(self, timestamp, path, repo_url, rev):
324 super(GitCheckoutCommit, self).__init__(timestamp, path)
325 self.repo_url = repo_url
326 self.rev = rev
327
328 def apply(self, root_dir):
329 git_repo = os.path.join(root_dir, self.path)
330 assert git_util.is_git_root(git_repo)
331 git_util.checkout_version(git_repo, self.rev)
332
333 def summary(self, code_storage):
334 git_root = code_storage.cached_git_root(self.repo_url)
335 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
336 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
337
338
339class GitAddRepo(Action):
340 """Describes a git repo add action.
341
342 Attributes:
343 repo_url: the corresponding url of git repo to add
344 rev: git commit to checkout
345 """
346
347 def __init__(self, timestamp, path, repo_url, rev):
348 super(GitAddRepo, self).__init__(timestamp, path)
349 self.repo_url = repo_url
350 self.rev = rev
351
352 def apply(self, root_dir):
353 git_repo = os.path.join(root_dir, self.path)
354 assert os.path.exists(git_repo)
355 assert git_util.is_git_root(git_repo)
356
357 def summary(self, _code_storage):
358 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
359
360
361class GitRemoveRepo(Action):
362 """Describes a git repo remove action."""
363
364 def __init__(self, timestamp, path):
365 super(GitRemoveRepo, self).__init__(timestamp, path)
366
367 def apply(self, root_dir):
368 assert self.path
369 git_repo = os.path.join(root_dir, self.path)
370 assert git_util.is_git_root(git_repo)
371 assert 0
372 shutil.rmtree(git_repo)
373
374 def summary(self, _code_storage):
375 return 'remove repo %s' % self.path
376
377
378def apply_actions(code_storage, action_groups, root_dir):
379 # Speed optimization: only apply the last one of consecutive commits per
380 # repo. It is possible to optimize further, but need to take care git repo
381 # add/remove within another repo.
382 commits = {}
383
384 def batch_apply(commits):
385 for i, commit_action in sorted(commits.values()):
386 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
387 commit_action.apply(root_dir)
388
389 for i, action_group in enumerate(action_groups, 1):
390 for action in action_group:
391 if not isinstance(action, GitCheckoutCommit):
392 break
393 else:
394 # If all actions are commits, defer them for batch processing.
395 for action in action_group:
396 commits[action.path] = (i, action)
397 continue
398
399 batch_apply(commits)
400 commits = {}
401 action.apply(root_dir)
402
403 batch_apply(commits)
404
405
406class SpecManager(object):
407 """Spec related abstract operations.
408
409 This class enumerates Spec instances and switch disk state to Spec.
410
411 In other words, this class abstracts:
412 - discovery of gclient's DEPS and repo's manifest
413 - gclient sync and repo sync
414 """
415
416 def collect_float_spec(self, old, new):
417 """Collects float Spec between two versions.
418
419 This method may fetch spec from network. However, it should not switch tree
420 version state.
421 """
422 raise NotImplementedError
423
424 def collect_fixed_spec(self, old, new):
425 """Collects fixed Spec between two versions.
426
427 This method may fetch spec from network. However, it should not switch tree
428 version state.
429 """
430 raise NotImplementedError
431
432 def parse_spec(self, spec):
433 """Parses information for Spec object.
434
435 Args:
436 spec: Spec object. It specifies what to parse and the parsed information
437 is stored inside.
438 """
439 raise NotImplementedError
440
441 def sync_disk_state(self, rev):
442 """Switch source tree state to given version."""
443 raise NotImplementedError
444
445
446class CodeStorage(object):
447 """Query code history and commit relationship without checkout.
448
449 Because paths inside source tree may be deleted or map to different remote
450 repo in different versions, we cannot query git information of one version
451 but the tree state is at another version. In order to query information
452 without changing tree state and fast, we need out of tree source code
453 storage.
454
455 This class assumes all git repos are mirrored somewhere on local disk.
456 Subclasses just need to implement cached_git_root() which returns the
457 location.
458
459 In other words, this class abstracts operations upon gclient's cache-dir
460 repo's mirror.
461 """
462
463 def cached_git_root(self, repo_url):
464 """The cached path of given remote git repo.
465
466 Args:
467 repo_url: URL of git remote repo
468
469 Returns:
470 path of cache folder
471 """
472 raise NotImplementedError
473
474 def is_ancestor_commit(self, spec, path, old, new):
475 """Determine one commit is ancestor of another.
476
477 Args:
478 spec: Spec object
479 path: local path relative to project root
480 old: commit id
481 new: commit id
482
483 Returns:
484 True if `old` is ancestor of `new`
485 """
486 git_root = self.cached_git_root(spec[path].repo_url)
487 return git_util.is_ancestor_commit(git_root, old, new)
488
489 def get_rev_by_time(self, spec, path, timestamp):
490 """Get commit hash of given spec by time.
491
492 Args:
493 spec: Spec object
494 path: local path relative to project root
495 timestamp:
496
497 Returns:
498 The commit hash of given time. If there are commits with the given
499 timestamp, returns the last commit.
500 """
501 git_root = self.cached_git_root(spec[path].repo_url)
502 # spec[path].at is remote reference name. Since git_root is a mirror, it's
503 # no need to convert the name.
504 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
505
506 def get_actions_between_two_commit(self, spec, path, old, new):
507 git_root = self.cached_git_root(spec[path].repo_url)
508 result = []
509 for timestamp, git_rev in git_util.list_commits_between_commits(
510 git_root, old, new):
511 result.append(
512 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
513 return result
514
515 def is_containing_commit(self, spec, path, rev):
516 git_root = self.cached_git_root(spec[path].repo_url)
517 return git_util.is_containing_commit(git_root, rev)
518
519 def are_spec_commits_available(self, spec):
520 for path, path_spec in spec.entries.items():
521 if not path_spec.is_static():
522 continue
523 if not self.is_containing_commit(spec, path, path_spec.at):
524 return False
525 return True
526
527
528class CodeManager(object):
529 """Class to reconstruct historical source tree state.
530
531 This class can reconstruct all moments of source tree state and diffs between
532 them.
533
534 Attributes:
535 root_dir: root path of project source tree
536 spec_manager: SpecManager object
537 code_storage: CodeStorage object
538 """
539
540 def __init__(self, root_dir, spec_manager, code_storage):
541 self.root_dir = root_dir
542 self.spec_manager = spec_manager
543 self.code_storage = code_storage
544
545 def generate_actions_between_specs(self, prev_float, next_float):
546 """Generates actions between two float specs.
547
548 Args:
549 prev_float: start of spec object (exclusive)
550 next_float: end of spec object (inclusive)
551
552 Returns:
553 list of Action object (unordered)
554 """
555 actions = []
556 for path in set(prev_float.entries) | set(next_float.entries):
557
558 # Add repo
559 if path not in prev_float:
560 if next_float[path].is_static():
561 next_at = next_float[path].at
562 else:
563 next_at = self.code_storage.get_rev_by_time(next_float, path,
564 next_float.timestamp)
565 actions.append(
566 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
567 next_at))
568 continue
569
570 # Existing path is floating, enumerates commits until next spec.
571 #
572 # prev_at till_at
573 # prev branch ---> o --------> o --------> o --------> o --------> ...
574 # ^ ^
575 # prev_float.timestamp next_float.timestamp
576 if not prev_float[path].is_static():
577 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
578 prev_float.timestamp)
579 till_at = self.code_storage.get_rev_by_time(prev_float, path,
580 next_float.timestamp)
581
582 actions.extend(
583 self.code_storage.get_actions_between_two_commit(
584 prev_float, path, prev_at, till_at))
585 else:
586 prev_at = till_at = prev_float[path].at
587
588 # At next_float.timestamp.
589 if path not in next_float:
590 # remove repo
591 actions.append(GitRemoveRepo(next_float.timestamp, path))
592 next_at = None
593
594 elif next_float[path].is_static():
595 # pinned to certain commit on different branch
596 next_at = next_float[path].at
597
598 elif next_float[path].at == prev_float[path].at:
599 # keep floating on the same branch
600 next_at = till_at
601
602 else:
603 # switch to another branch
604 # prev_at till_at
605 # prev branch ---> o --------> o --------> o --------> o --------> ...
606 #
607 # next_at
608 # next branch ...... o ------> o --------> o -----> ...
609 # ^ ^
610 # prev_float.timestamp next_float.timestamp
611 next_at = self.code_storage.get_rev_by_time(next_float, path,
612 next_float.timestamp)
613
614 if next_at and next_at != till_at:
615 actions.append(
616 GitCheckoutCommit(next_float.timestamp, path,
617 next_float[path].repo_url, next_at))
618
619 return actions
620
621 def synthesize_fixed_spec(self, float_spec, timestamp):
622 """Synthesizes fixed spec from float spec of given time.
623
624 Args:
625 float_spec: the float spec
626 timestamp: snapshot time
627
628 Returns:
629 Spec object
630 """
631 result = {}
632 for path, path_spec in float_spec.entries.items():
633 if not path_spec.is_static():
634 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
635 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
636
637 result[path] = copy.deepcopy(path_spec)
638
639 name = '%s@%s' % (float_spec.path, timestamp)
640 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
641
642 def reorder_actions(self, actions):
643 """Reorder and cluster actions.
644
645 Args:
646 actions: list of Action objects
647
648 Returns:
649 list of ActionGroup objects
650 """
651 # TODO(kcwu): support atomic commits across repos
652 actions.sort(key=lambda x: x.timestamp)
653 result = []
654 for action in actions:
655 group = ActionGroup(action.timestamp)
656 group.add(action)
657 result.append(group)
658 return result
659
660 def match_spec(self, target, specs, start_index=0):
661 threshold = 3600
662 # ideal_index is the index of last spec before target
663 # begin and end are the range of indexes within threshold (inclusive)
664 ideal_index = None
665 begin, end = None, None
666 for i, spec in enumerate(specs[start_index:], start_index):
667 if spec.timestamp <= target.timestamp:
668 ideal_index = i
669 if abs(spec.timestamp - target.timestamp) < threshold:
670 if begin is None:
671 begin = i
672 end = i
673
674 candidates = []
675 if ideal_index is not None:
676 candidates.append(ideal_index)
677 if begin is not None:
678 candidates.extend(range(begin, end + 1))
679 if not candidates:
680 logger.error('unable to match %s: all specs are after it', target.name)
681 return None
682
683 compatible_candidates = [
684 i for i in candidates if specs[i].is_subset(target)
685 ]
686 if not compatible_candidates:
687 logger.error('unable to match %s: no compatible specs', target.name)
688 spec = specs[candidates[0]]
689 target.diff(spec)
690 return None
691
692 scores = []
693 for i in compatible_candidates:
694 scores.append((specs[i].similar_score(target), i))
695 scores.sort()
696
697 score, index = scores[0]
698 if score != 0:
699 logger.warning('not exactly match (score=%s): %s', score, target.name)
700 target.diff(specs[index])
701
702 if index < ideal_index:
703 logger.warning(
704 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
705 target.name, target.timestamp, specs[index].timestamp,
706 specs[ideal_index].timestamp,
707 specs[ideal_index].timestamp - target.timestamp)
708 if index > ideal_index:
709 logger.warning(
710 'spec committed at %d matched later commit at %d. bad server clock?',
711 target.timestamp, specs[index].timestamp)
712
713 return index
714
715 def associate_fixed_and_synthesized_specs(self, fixed_specs,
716 synthesized_specs):
717 # All fixed specs are snapshot of float specs. Theoretically, they
718 # should be identical to one of the synthesized specs.
719 # However, it's not always true for some reasons --- maybe due to race
720 # condition, maybe due to bugs of this bisect-kit.
721 # To overcome this glitch, we try to match them by similarity instead of
722 # exact match.
723 result = []
724 last_index = 0
725 for i, fixed_spec in enumerate(fixed_specs):
726 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
727 if matched_index is None:
728 if i in (0, len(fixed_specs) - 1):
729 logger.error('essential spec mismatch, unable to continue')
730 assert 0
731 else:
732 logger.warning('%s do not match, skip', fixed_spec.name)
733 continue
734 result.append((i, matched_index))
735 last_index = matched_index
736
737 return result
738
739 def _create_make_up_actions(self, fixed_spec, synthesized):
740 timestamp = synthesized.timestamp
741 make_up = ActionGroup(
742 timestamp, comment='make up glitch for %s' % fixed_spec.name)
743 for path in set(fixed_spec.entries) & set(synthesized.entries):
744 if fixed_spec[path].at == synthesized[path].at:
745 continue
746 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
747 synthesized[path].at)
748 make_up.add(action)
749
750 if not make_up.actions:
751 return None
752 return make_up
753
754 def build_revlist(self, old, new):
755 """Build revlist.
756
757 Returns:
758 list of rev string
759 """
760 logger.info('build_revlist')
761 revlist = []
762
763 # step 1, find all float and fixed specs in the given range.
764 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
765 float_specs = self.spec_manager.collect_float_spec(old, new)
766 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
767 float_specs.pop()
768 assert float_specs
769 for spec in float_specs + fixed_specs:
770 self.spec_manager.parse_spec(spec)
771
772 # step 2, synthesize all fixed specs in the range from float specs.
773 specs = float_specs + [fixed_specs[-1]]
774 actions = []
775 logger.debug('len(specs)=%d', len(specs))
776 for i in range(len(specs) - 1):
777 prev_float = specs[i]
778 next_float = specs[i + 1]
779 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
780 prev_float.timestamp, next_float.name, next_float.timestamp)
781 actions += self.generate_actions_between_specs(prev_float, next_float)
782 action_groups = self.reorder_actions(actions)
783
784 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
785 synthesized = [spec.copy()]
786 for action_group in action_groups:
787 spec.apply(action_group)
788 synthesized.append(spec.copy())
789
790 # step 3, associate fixed specs with synthesized specs.
791 associated_pairs = self.associate_fixed_and_synthesized_specs(
792 fixed_specs, synthesized)
793
794 # step 4, group actions and cache them
795 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
796 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
797 revlist.append(fixed_specs[fixed_index].name)
798 this_action_groups = []
799
800 # handle glitch
801 if fixed_specs[fixed_index].similar_score(
802 synthesized[synthesized_index]) != 0:
803 assert synthesized[synthesized_index].is_subset(
804 fixed_specs[fixed_index])
805 skipped = set(fixed_specs[fixed_index].entries) - set(
806 synthesized[synthesized_index].entries)
807 if skipped:
808 logger.warning(
809 'between %s and %s, '
810 'bisect-kit cannot analyze commit history of following paths:',
811 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
812 for path in sorted(skipped):
813 logger.warning(' %s', path)
814
815 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
816 synthesized[synthesized_index])
817 if make_up:
818 this_action_groups.append(make_up)
819
820 this_action_groups.extend(
821 action_groups[synthesized_index:next_synthesized_index])
822 for idx, ag in enumerate(this_action_groups, 1):
823 rev = make_intra_rev(fixed_specs[fixed_index].name,
824 fixed_specs[next_fixed_index].name, idx)
825 ag.name = rev
826 revlist.append(rev)
827
828 self.save_action_groups_between_releases(
829 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
830 this_action_groups)
831 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
832
833 return revlist
834
835 def save_action_groups_between_releases(self, old, new, action_groups):
836 data = [ag.serialize() for ag in action_groups]
837
838 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
839 if not os.path.exists(cache_dir):
840 os.makedirs(cache_dir)
841 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
842 with file(cache_filename, 'w') as fp:
843 json.dump(data, fp, indent=4, sort_keys=True)
844
845 def load_action_groups_between_releases(self, old, new):
846 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
847 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
848 if not os.path.exists(cache_filename):
849 raise Exception('cached revlist not found: %s' % cache_filename)
850
851 result = []
852 for data in json.load(file(cache_filename)):
853 result.append(ActionGroup.unserialize(data))
854
855 return result
856
857 def view_rev_diff(self, old, new):
858 old_base, _, _ = parse_intra_rev(old)
859 _, new_next, _ = parse_intra_rev(new)
860 assert old_base != new_next
861
862 revlist = []
863 rev_summary = {}
864 fixed_specs = self.spec_manager.collect_fixed_spec(old_base, new_next)
865 for i, spec in enumerate(fixed_specs[:-1]):
866 action_groups = self.load_action_groups_between_releases(
867 fixed_specs[i].name, fixed_specs[i + 1].name)
868 revlist.append(spec.name)
869 rev_summary[spec.name] = ''
870 for action_group in action_groups:
871 revlist.append(action_group.name)
872 rev_summary[action_group.name] = action_group.summary(self.code_storage)
873
874 revlist.append(fixed_specs[-1].name)
875 rev_summary[fixed_specs[-1].name] = ''
876
877 old_index = revlist.index(old)
878 new_index = revlist.index(new)
879 for rev in revlist[old_index:new_index + 1]:
880 logger.info('%s %s', rev, rev_summary[rev])
881
882 def switch(self, rev):
883 # easy case
884 if not re.match(_re_intra_rev, rev):
885 self.spec_manager.sync_disk_state(rev)
886 return
887
888 rev_old, rev_new, idx = parse_intra_rev(rev)
889 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
890 assert 0 <= idx <= len(action_groups)
891 action_groups = action_groups[:idx]
892
893 self.spec_manager.sync_disk_state(rev_old)
894
895 apply_actions(self.code_storage, action_groups, self.root_dir)