blob: c50407f71bf4e312545c530bec4229cda7dec208 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
22from bisect_kit import git_util
23
24logger = logging.getLogger(__name__)
25
26_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
27
28SPEC_FIXED = 'fixed'
29SPEC_FLOAT = 'float'
30_DIFF_CACHE_DIR = 'bisectkit-cache'
31
32
33def make_intra_rev(a, b, index):
34 """Makes intra-rev version string.
35
36 Between two major "named" versions a and b, there are many small changes
37 (commits) in-between. bisect-kit will identify all those instances and bisect
38 them. We give names to those instances and call these names as "intra-rev"
39 which stands for minor version numbers within two major version.
40
41 Note, a+index (without b) is not enough to identify an unique change due to
42 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
43 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
44 in 9900.1.0 and 9901.0.0.
45
46 Args:
47 a: the start version
48 b: the end version
49 index: the index number of changes between a and b
50
51 Returns:
52 the intra-rev version string
53 """
54 return '%s~%s/%d' % (a, b, index)
55
56
57def parse_intra_rev(rev):
58 """Decomposes intra-rev string.
59
60 See comments of make_intra_rev for what is intra-rev.
61
62 Args:
63 rev: intra-rev string or normal version number
64
65 Returns:
66 (start, end, index). If rev is not intra-rev, it must be normal version
67 number and returns (rev, rev, 0).
68 """
69 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080070 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080071 return rev, rev, 0
72
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080073 return m.group(1), m.group(2), int(m.group(3))
74
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080075
76def argtype_intra_rev(argtype):
77 """Validates argument is intra-rev.
78
79 Args:
80 argtype: argtype function which validates major version number
81
82 Returns:
83 A new argtype function which matches intra-rev
84 """
85
86 def argtype_function(s):
87 m = re.match(_re_intra_rev, s)
88 if m:
89 try:
90 argtype(m.group(1))
91 argtype(m.group(2))
92 return s
93 except cli.ArgTypeError as e:
94 examples = []
95 for example in e.example:
96 examples.append(make_intra_rev(example, example, 10))
97 raise cli.ArgTypeError('Invalid intra rev', examples)
98 raise cli.ArgTypeError('Invalid intra rev',
99 [make_intra_rev('<rev1>', '<rev2>', 10)])
100
101 return argtype_function
102
103
104def _normalize_repo_url(repo_url):
105 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
106 r'https://chrome-internal.googlesource.com/', repo_url)
107 repo_url = re.sub(r'\.git$', '', repo_url)
108 return repo_url
109
110
111class PathSpec(object):
112 """Specified code version of one path.
113
114 Attributes:
115 path: local path, relative to project base dir
116 repo_url: code repository location
117 at: code version, could be git hash or branch name
118 """
119
120 def __init__(self, path, repo_url, at):
121 self.path = path
122 self.repo_url = repo_url
123 self.at = at
124
125 def is_static(self):
126 return git_util.is_git_rev(self.at)
127
128 def __eq__(self, rhs):
129 if self.path != rhs.path:
130 return False
131 if self.at != rhs.at:
132 return False
133 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
134 return False
135 return True
136
137 def __ne__(self, rhs):
138 return not self == rhs
139
140
141class Spec(object):
142 """Collection of PathSpec.
143
144 Spec is analogy to gclient's DEPS and repo's manifest.
145
146 Attributes:
147 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
148 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
149 pinned and the actual version (git commit) may change over time.
150 name: name of this spec, for debugging purpose. usually version number
151 or git hash
152 timestamp: timestamp of this spec
153 path: path of spec
154 entries: paths to PathSpec dict
155 """
156
157 def __init__(self, spec_type, name, timestamp, path, entries=None):
158 self.spec_type = spec_type
159 self.name = name
160 self.timestamp = timestamp
161 self.path = path
162 self.entries = entries
163
164 def copy(self):
165 return copy.deepcopy(self)
166
167 def similar_score(self, rhs):
168 """Calculates similar score to another Spec.
169
170 Returns:
171 score of similarity. Smaller value is more similar.
172 """
173 score = 0
174 for path in set(self.entries) & set(rhs.entries):
175 if rhs[path] == self[path]:
176 continue
177 if rhs[path].at == self[path].at:
178 # it's often that remote repo moved around but should be treated as the
179 # same one
180 score += 0.1
181 else:
182 score += 1
183 score += len(set(self.entries) ^ set(rhs.entries))
184 return score
185
186 def is_static(self):
187 return all(path_spec.is_static() for path_spec in self.entries.values())
188
189 def is_subset(self, rhs):
190 return set(self.entries.keys()) <= set(rhs.entries.keys())
191
192 def __getitem__(self, path):
193 return self.entries[path]
194
195 def __contains__(self, path):
196 return path in self.entries
197
198 def apply(self, action_group):
199 self.timestamp = action_group.timestamp
200 self.name = '(%s)' % self.timestamp
201 for action in action_group.actions:
202 if isinstance(action, GitAddRepo):
203 self.entries[action.path] = PathSpec(action.path, action.repo_url,
204 action.rev)
205 elif isinstance(action, GitCheckoutCommit):
206 self.entries[action.path].at = action.rev
207 elif isinstance(action, GitRemoveRepo):
208 del self.entries[action.path]
209 else:
210 assert 0, 'unknown action: %s' % action.__class__.__name__
211
212 def dump(self):
213 # for debugging
214 print(self.name, self.path, self.timestamp)
215 print('size', len(self.entries))
216 for path, path_spec in sorted(self.entries.items()):
217 print(path, path_spec.at)
218
219 def diff(self, rhs):
220 logger.info('diff between %s and %s', self.name, rhs.name)
221 expect = set(self.entries)
222 actual = set(rhs.entries)
223 common = 0
224 for path in sorted(expect - actual):
225 logger.info('-%s', path)
226 for path in sorted(actual - expect):
227 logger.info('+%s', path)
228 for path in sorted(expect & actual):
229 if self[path] == rhs[path]:
230 common += 1
231 continue
232 if self[path].at != rhs[path].at:
233 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
234 if self[path].repo_url != rhs[path].repo_url:
235 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
236 rhs[path].repo_url)
237 logger.info('and common=%s', common)
238
239
240class Action(object):
241 """Actions describe changes from one Spec to another.
242
243 Attributes:
244 timestamp: action time
245 path: action path, which is relative to project root
246 """
247
248 def __init__(self, timestamp, path):
249 self.timestamp = timestamp
250 self.path = path
251
252 def apply(self, _root_dir):
253 raise NotImplementedError
254
255 def summary(self, _code_storage):
256 raise NotImplementedError
257
258 def __eq__(self, rhs):
259 return self.__dict__ == rhs.__dict__
260
261 def serialize(self):
262 return self.__class__.__name__, self.__dict__
263
264
265def unserialize_action(data):
266 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
267 class_name, values = data
268 assert class_name in [cls.__name__ for cls in classes
269 ], 'unknown action class: %s' % class_name
270 for cls in classes:
271 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800272 action = cls(**values)
273 break
274 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800275
276
277class ActionGroup(object):
278 """Atomic group of Action objects
279
280 This models atomic commits (for example, gerrit topic, or circular
281 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
282 object.
283 """
284
285 def __init__(self, timestamp, comment=None):
286 self.timestamp = timestamp
287 self.name = None
288 self.actions = []
289 self.comment = comment
290
291 def add(self, action):
292 self.actions.append(action)
293
294 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800295 return dict(
296 timestamp=self.timestamp,
297 name=self.name,
298 comment=self.comment,
299 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800300
301 def summary(self, code_storage):
302 if self.comment:
303 return self.comment
Kuang-che Wu22455262018-08-03 15:38:29 +0800304 assert self.actions
305 if len(self.actions) > 1:
306 # TODO(kcwu): show details for multiple Actions
307 return '(%d actions)' % len(self.actions)
308 else:
309 return self.actions[0].summary(code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800310
311 @staticmethod
312 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800313 ag = ActionGroup(data['timestamp'])
314 ag.name = data['name']
315 ag.comment = data['comment']
316 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317 ag.add(unserialize_action(x))
318 return ag
319
320 def apply(self, root_dir):
321 for action in self.actions:
322 action.apply(root_dir)
323
324
325class GitCheckoutCommit(Action):
326 """Describes a git commit action.
327
328 Attributes:
329 repo_url: the corresponding url of git repo
330 rev: git commit to checkout
331 """
332
333 def __init__(self, timestamp, path, repo_url, rev):
334 super(GitCheckoutCommit, self).__init__(timestamp, path)
335 self.repo_url = repo_url
336 self.rev = rev
337
338 def apply(self, root_dir):
339 git_repo = os.path.join(root_dir, self.path)
340 assert git_util.is_git_root(git_repo)
341 git_util.checkout_version(git_repo, self.rev)
342
343 def summary(self, code_storage):
344 git_root = code_storage.cached_git_root(self.repo_url)
345 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
346 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
347
348
349class GitAddRepo(Action):
350 """Describes a git repo add action.
351
352 Attributes:
353 repo_url: the corresponding url of git repo to add
354 rev: git commit to checkout
355 """
356
357 def __init__(self, timestamp, path, repo_url, rev):
358 super(GitAddRepo, self).__init__(timestamp, path)
359 self.repo_url = repo_url
360 self.rev = rev
361
362 def apply(self, root_dir):
363 git_repo = os.path.join(root_dir, self.path)
364 assert os.path.exists(git_repo)
365 assert git_util.is_git_root(git_repo)
366
367 def summary(self, _code_storage):
368 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
369
370
371class GitRemoveRepo(Action):
372 """Describes a git repo remove action."""
373
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800374 def apply(self, root_dir):
375 assert self.path
376 git_repo = os.path.join(root_dir, self.path)
377 assert git_util.is_git_root(git_repo)
378 assert 0
379 shutil.rmtree(git_repo)
380
381 def summary(self, _code_storage):
382 return 'remove repo %s' % self.path
383
384
385def apply_actions(code_storage, action_groups, root_dir):
386 # Speed optimization: only apply the last one of consecutive commits per
387 # repo. It is possible to optimize further, but need to take care git repo
388 # add/remove within another repo.
389 commits = {}
390
391 def batch_apply(commits):
392 for i, commit_action in sorted(commits.values()):
393 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
394 commit_action.apply(root_dir)
395
396 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800397 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800398 if not isinstance(action, GitCheckoutCommit):
399 break
400 else:
401 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800402 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800403 commits[action.path] = (i, action)
404 continue
405
406 batch_apply(commits)
407 commits = {}
408 action.apply(root_dir)
409
410 batch_apply(commits)
411
412
413class SpecManager(object):
414 """Spec related abstract operations.
415
416 This class enumerates Spec instances and switch disk state to Spec.
417
418 In other words, this class abstracts:
419 - discovery of gclient's DEPS and repo's manifest
420 - gclient sync and repo sync
421 """
422
423 def collect_float_spec(self, old, new):
424 """Collects float Spec between two versions.
425
426 This method may fetch spec from network. However, it should not switch tree
427 version state.
428 """
429 raise NotImplementedError
430
431 def collect_fixed_spec(self, old, new):
432 """Collects fixed Spec between two versions.
433
434 This method may fetch spec from network. However, it should not switch tree
435 version state.
436 """
437 raise NotImplementedError
438
439 def parse_spec(self, spec):
440 """Parses information for Spec object.
441
442 Args:
443 spec: Spec object. It specifies what to parse and the parsed information
444 is stored inside.
445 """
446 raise NotImplementedError
447
448 def sync_disk_state(self, rev):
449 """Switch source tree state to given version."""
450 raise NotImplementedError
451
452
453class CodeStorage(object):
454 """Query code history and commit relationship without checkout.
455
456 Because paths inside source tree may be deleted or map to different remote
457 repo in different versions, we cannot query git information of one version
458 but the tree state is at another version. In order to query information
459 without changing tree state and fast, we need out of tree source code
460 storage.
461
462 This class assumes all git repos are mirrored somewhere on local disk.
463 Subclasses just need to implement cached_git_root() which returns the
464 location.
465
466 In other words, this class abstracts operations upon gclient's cache-dir
467 repo's mirror.
468 """
469
470 def cached_git_root(self, repo_url):
471 """The cached path of given remote git repo.
472
473 Args:
474 repo_url: URL of git remote repo
475
476 Returns:
477 path of cache folder
478 """
479 raise NotImplementedError
480
481 def is_ancestor_commit(self, spec, path, old, new):
482 """Determine one commit is ancestor of another.
483
484 Args:
485 spec: Spec object
486 path: local path relative to project root
487 old: commit id
488 new: commit id
489
490 Returns:
491 True if `old` is ancestor of `new`
492 """
493 git_root = self.cached_git_root(spec[path].repo_url)
494 return git_util.is_ancestor_commit(git_root, old, new)
495
496 def get_rev_by_time(self, spec, path, timestamp):
497 """Get commit hash of given spec by time.
498
499 Args:
500 spec: Spec object
501 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800502 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800503
504 Returns:
505 The commit hash of given time. If there are commits with the given
506 timestamp, returns the last commit.
507 """
508 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800509 # spec[path].at is remote reference name. Since git_root is a mirror (not
510 # a local checkout), there is no need to convert the name.
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800511 return git_util.get_rev_by_time(git_root, timestamp, branch=spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800512
513 def get_actions_between_two_commit(self, spec, path, old, new):
514 git_root = self.cached_git_root(spec[path].repo_url)
515 result = []
516 for timestamp, git_rev in git_util.list_commits_between_commits(
517 git_root, old, new):
518 result.append(
519 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
520 return result
521
522 def is_containing_commit(self, spec, path, rev):
523 git_root = self.cached_git_root(spec[path].repo_url)
524 return git_util.is_containing_commit(git_root, rev)
525
526 def are_spec_commits_available(self, spec):
527 for path, path_spec in spec.entries.items():
528 if not path_spec.is_static():
529 continue
530 if not self.is_containing_commit(spec, path, path_spec.at):
531 return False
532 return True
533
534
535class CodeManager(object):
536 """Class to reconstruct historical source tree state.
537
538 This class can reconstruct all moments of source tree state and diffs between
539 them.
540
541 Attributes:
542 root_dir: root path of project source tree
543 spec_manager: SpecManager object
544 code_storage: CodeStorage object
545 """
546
547 def __init__(self, root_dir, spec_manager, code_storage):
548 self.root_dir = root_dir
549 self.spec_manager = spec_manager
550 self.code_storage = code_storage
551
552 def generate_actions_between_specs(self, prev_float, next_float):
553 """Generates actions between two float specs.
554
555 Args:
556 prev_float: start of spec object (exclusive)
557 next_float: end of spec object (inclusive)
558
559 Returns:
560 list of Action object (unordered)
561 """
562 actions = []
563 for path in set(prev_float.entries) | set(next_float.entries):
564
565 # Add repo
566 if path not in prev_float:
567 if next_float[path].is_static():
568 next_at = next_float[path].at
569 else:
570 next_at = self.code_storage.get_rev_by_time(next_float, path,
571 next_float.timestamp)
572 actions.append(
573 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
574 next_at))
575 continue
576
577 # Existing path is floating, enumerates commits until next spec.
578 #
579 # prev_at till_at
580 # prev branch ---> o --------> o --------> o --------> o --------> ...
581 # ^ ^
582 # prev_float.timestamp next_float.timestamp
583 if not prev_float[path].is_static():
584 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
585 prev_float.timestamp)
586 till_at = self.code_storage.get_rev_by_time(prev_float, path,
587 next_float.timestamp)
588
589 actions.extend(
590 self.code_storage.get_actions_between_two_commit(
591 prev_float, path, prev_at, till_at))
592 else:
593 prev_at = till_at = prev_float[path].at
594
595 # At next_float.timestamp.
596 if path not in next_float:
597 # remove repo
598 actions.append(GitRemoveRepo(next_float.timestamp, path))
599 next_at = None
600
601 elif next_float[path].is_static():
602 # pinned to certain commit on different branch
603 next_at = next_float[path].at
604
605 elif next_float[path].at == prev_float[path].at:
606 # keep floating on the same branch
607 next_at = till_at
608
609 else:
610 # switch to another branch
611 # prev_at till_at
612 # prev branch ---> o --------> o --------> o --------> o --------> ...
613 #
614 # next_at
615 # next branch ...... o ------> o --------> o -----> ...
616 # ^ ^
617 # prev_float.timestamp next_float.timestamp
618 next_at = self.code_storage.get_rev_by_time(next_float, path,
619 next_float.timestamp)
620
621 if next_at and next_at != till_at:
622 actions.append(
623 GitCheckoutCommit(next_float.timestamp, path,
624 next_float[path].repo_url, next_at))
625
626 return actions
627
628 def synthesize_fixed_spec(self, float_spec, timestamp):
629 """Synthesizes fixed spec from float spec of given time.
630
631 Args:
632 float_spec: the float spec
633 timestamp: snapshot time
634
635 Returns:
636 Spec object
637 """
638 result = {}
639 for path, path_spec in float_spec.entries.items():
640 if not path_spec.is_static():
641 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
642 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
643
644 result[path] = copy.deepcopy(path_spec)
645
646 name = '%s@%s' % (float_spec.path, timestamp)
647 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
648
649 def reorder_actions(self, actions):
650 """Reorder and cluster actions.
651
652 Args:
653 actions: list of Action objects
654
655 Returns:
656 list of ActionGroup objects
657 """
658 # TODO(kcwu): support atomic commits across repos
659 actions.sort(key=lambda x: x.timestamp)
660 result = []
661 for action in actions:
662 group = ActionGroup(action.timestamp)
663 group.add(action)
664 result.append(group)
665 return result
666
667 def match_spec(self, target, specs, start_index=0):
668 threshold = 3600
669 # ideal_index is the index of last spec before target
670 # begin and end are the range of indexes within threshold (inclusive)
671 ideal_index = None
672 begin, end = None, None
673 for i, spec in enumerate(specs[start_index:], start_index):
674 if spec.timestamp <= target.timestamp:
675 ideal_index = i
676 if abs(spec.timestamp - target.timestamp) < threshold:
677 if begin is None:
678 begin = i
679 end = i
680
681 candidates = []
682 if ideal_index is not None:
683 candidates.append(ideal_index)
684 if begin is not None:
685 candidates.extend(range(begin, end + 1))
686 if not candidates:
687 logger.error('unable to match %s: all specs are after it', target.name)
688 return None
689
690 compatible_candidates = [
691 i for i in candidates if specs[i].is_subset(target)
692 ]
693 if not compatible_candidates:
694 logger.error('unable to match %s: no compatible specs', target.name)
695 spec = specs[candidates[0]]
696 target.diff(spec)
697 return None
698
699 scores = []
700 for i in compatible_candidates:
701 scores.append((specs[i].similar_score(target), i))
702 scores.sort()
703
704 score, index = scores[0]
705 if score != 0:
706 logger.warning('not exactly match (score=%s): %s', score, target.name)
707 target.diff(specs[index])
708
709 if index < ideal_index:
710 logger.warning(
711 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
712 target.name, target.timestamp, specs[index].timestamp,
713 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800714 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800715 if index > ideal_index:
716 logger.warning(
717 'spec committed at %d matched later commit at %d. bad server clock?',
718 target.timestamp, specs[index].timestamp)
719
720 return index
721
722 def associate_fixed_and_synthesized_specs(self, fixed_specs,
723 synthesized_specs):
724 # All fixed specs are snapshot of float specs. Theoretically, they
725 # should be identical to one of the synthesized specs.
726 # However, it's not always true for some reasons --- maybe due to race
727 # condition, maybe due to bugs of this bisect-kit.
728 # To overcome this glitch, we try to match them by similarity instead of
729 # exact match.
730 result = []
731 last_index = 0
732 for i, fixed_spec in enumerate(fixed_specs):
733 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
734 if matched_index is None:
735 if i in (0, len(fixed_specs) - 1):
736 logger.error('essential spec mismatch, unable to continue')
737 assert 0
738 else:
739 logger.warning('%s do not match, skip', fixed_spec.name)
740 continue
741 result.append((i, matched_index))
742 last_index = matched_index
743
744 return result
745
746 def _create_make_up_actions(self, fixed_spec, synthesized):
747 timestamp = synthesized.timestamp
748 make_up = ActionGroup(
749 timestamp, comment='make up glitch for %s' % fixed_spec.name)
750 for path in set(fixed_spec.entries) & set(synthesized.entries):
751 if fixed_spec[path].at == synthesized[path].at:
752 continue
753 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
754 synthesized[path].at)
755 make_up.add(action)
756
757 if not make_up.actions:
758 return None
759 return make_up
760
761 def build_revlist(self, old, new):
762 """Build revlist.
763
764 Returns:
765 list of rev string
766 """
767 logger.info('build_revlist')
768 revlist = []
769
770 # step 1, find all float and fixed specs in the given range.
771 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800772 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800773 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800774 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800775 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
776 float_specs.pop()
777 assert float_specs
778 for spec in float_specs + fixed_specs:
779 self.spec_manager.parse_spec(spec)
780
781 # step 2, synthesize all fixed specs in the range from float specs.
782 specs = float_specs + [fixed_specs[-1]]
783 actions = []
784 logger.debug('len(specs)=%d', len(specs))
785 for i in range(len(specs) - 1):
786 prev_float = specs[i]
787 next_float = specs[i + 1]
788 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
789 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800790 for action in self.generate_actions_between_specs(prev_float, next_float):
791 if action.timestamp < fixed_specs[0].timestamp:
792 continue
793 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800794 action_groups = self.reorder_actions(actions)
795
796 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
797 synthesized = [spec.copy()]
798 for action_group in action_groups:
799 spec.apply(action_group)
800 synthesized.append(spec.copy())
801
802 # step 3, associate fixed specs with synthesized specs.
803 associated_pairs = self.associate_fixed_and_synthesized_specs(
804 fixed_specs, synthesized)
805
806 # step 4, group actions and cache them
807 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
808 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
809 revlist.append(fixed_specs[fixed_index].name)
810 this_action_groups = []
811
812 # handle glitch
813 if fixed_specs[fixed_index].similar_score(
814 synthesized[synthesized_index]) != 0:
815 assert synthesized[synthesized_index].is_subset(
816 fixed_specs[fixed_index])
817 skipped = set(fixed_specs[fixed_index].entries) - set(
818 synthesized[synthesized_index].entries)
819 if skipped:
820 logger.warning(
821 'between %s and %s, '
822 'bisect-kit cannot analyze commit history of following paths:',
823 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
824 for path in sorted(skipped):
825 logger.warning(' %s', path)
826
827 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
828 synthesized[synthesized_index])
829 if make_up:
830 this_action_groups.append(make_up)
831
832 this_action_groups.extend(
833 action_groups[synthesized_index:next_synthesized_index])
834 for idx, ag in enumerate(this_action_groups, 1):
835 rev = make_intra_rev(fixed_specs[fixed_index].name,
836 fixed_specs[next_fixed_index].name, idx)
837 ag.name = rev
838 revlist.append(rev)
839
840 self.save_action_groups_between_releases(
841 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
842 this_action_groups)
843 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
844
845 return revlist
846
847 def save_action_groups_between_releases(self, old, new, action_groups):
848 data = [ag.serialize() for ag in action_groups]
849
850 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
851 if not os.path.exists(cache_dir):
852 os.makedirs(cache_dir)
853 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
854 with file(cache_filename, 'w') as fp:
855 json.dump(data, fp, indent=4, sort_keys=True)
856
857 def load_action_groups_between_releases(self, old, new):
858 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
859 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
860 if not os.path.exists(cache_filename):
861 raise Exception('cached revlist not found: %s' % cache_filename)
862
863 result = []
864 for data in json.load(file(cache_filename)):
865 result.append(ActionGroup.unserialize(data))
866
867 return result
868
869 def view_rev_diff(self, old, new):
870 old_base, _, _ = parse_intra_rev(old)
871 _, new_next, _ = parse_intra_rev(new)
872 assert old_base != new_next
873
874 revlist = []
875 rev_summary = {}
876 fixed_specs = self.spec_manager.collect_fixed_spec(old_base, new_next)
877 for i, spec in enumerate(fixed_specs[:-1]):
878 action_groups = self.load_action_groups_between_releases(
879 fixed_specs[i].name, fixed_specs[i + 1].name)
880 revlist.append(spec.name)
881 rev_summary[spec.name] = ''
882 for action_group in action_groups:
883 revlist.append(action_group.name)
884 rev_summary[action_group.name] = action_group.summary(self.code_storage)
885
886 revlist.append(fixed_specs[-1].name)
887 rev_summary[fixed_specs[-1].name] = ''
888
889 old_index = revlist.index(old)
890 new_index = revlist.index(new)
891 for rev in revlist[old_index:new_index + 1]:
892 logger.info('%s %s', rev, rev_summary[rev])
893
894 def switch(self, rev):
895 # easy case
896 if not re.match(_re_intra_rev, rev):
897 self.spec_manager.sync_disk_state(rev)
898 return
899
900 rev_old, rev_new, idx = parse_intra_rev(rev)
901 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
902 assert 0 <= idx <= len(action_groups)
903 action_groups = action_groups[:idx]
904
905 self.spec_manager.sync_disk_state(rev_old)
906
907 apply_actions(self.code_storage, action_groups, self.root_dir)