blob: a25bf8cb3e155e845383eeb29dc3b3e6800f64d2 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
22from bisect_kit import git_util
23
24logger = logging.getLogger(__name__)
25
26_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
27
28SPEC_FIXED = 'fixed'
29SPEC_FLOAT = 'float'
30_DIFF_CACHE_DIR = 'bisectkit-cache'
31
32
33def make_intra_rev(a, b, index):
34 """Makes intra-rev version string.
35
36 Between two major "named" versions a and b, there are many small changes
37 (commits) in-between. bisect-kit will identify all those instances and bisect
38 them. We give names to those instances and call these names as "intra-rev"
39 which stands for minor version numbers within two major version.
40
41 Note, a+index (without b) is not enough to identify an unique change due to
42 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
43 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
44 in 9900.1.0 and 9901.0.0.
45
46 Args:
47 a: the start version
48 b: the end version
49 index: the index number of changes between a and b
50
51 Returns:
52 the intra-rev version string
53 """
54 return '%s~%s/%d' % (a, b, index)
55
56
57def parse_intra_rev(rev):
58 """Decomposes intra-rev string.
59
60 See comments of make_intra_rev for what is intra-rev.
61
62 Args:
63 rev: intra-rev string or normal version number
64
65 Returns:
66 (start, end, index). If rev is not intra-rev, it must be normal version
67 number and returns (rev, rev, 0).
68 """
69 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080070 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080071 return rev, rev, 0
72
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080073 return m.group(1), m.group(2), int(m.group(3))
74
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080075
76def argtype_intra_rev(argtype):
77 """Validates argument is intra-rev.
78
79 Args:
80 argtype: argtype function which validates major version number
81
82 Returns:
83 A new argtype function which matches intra-rev
84 """
85
86 def argtype_function(s):
87 m = re.match(_re_intra_rev, s)
88 if m:
89 try:
90 argtype(m.group(1))
91 argtype(m.group(2))
92 return s
93 except cli.ArgTypeError as e:
94 examples = []
95 for example in e.example:
96 examples.append(make_intra_rev(example, example, 10))
97 raise cli.ArgTypeError('Invalid intra rev', examples)
98 raise cli.ArgTypeError('Invalid intra rev',
99 [make_intra_rev('<rev1>', '<rev2>', 10)])
100
101 return argtype_function
102
103
104def _normalize_repo_url(repo_url):
105 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
106 r'https://chrome-internal.googlesource.com/', repo_url)
107 repo_url = re.sub(r'\.git$', '', repo_url)
108 return repo_url
109
110
111class PathSpec(object):
112 """Specified code version of one path.
113
114 Attributes:
115 path: local path, relative to project base dir
116 repo_url: code repository location
117 at: code version, could be git hash or branch name
118 """
119
120 def __init__(self, path, repo_url, at):
121 self.path = path
122 self.repo_url = repo_url
123 self.at = at
124
125 def is_static(self):
126 return git_util.is_git_rev(self.at)
127
128 def __eq__(self, rhs):
129 if self.path != rhs.path:
130 return False
131 if self.at != rhs.at:
132 return False
133 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
134 return False
135 return True
136
137 def __ne__(self, rhs):
138 return not self == rhs
139
140
141class Spec(object):
142 """Collection of PathSpec.
143
144 Spec is analogy to gclient's DEPS and repo's manifest.
145
146 Attributes:
147 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
148 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
149 pinned and the actual version (git commit) may change over time.
150 name: name of this spec, for debugging purpose. usually version number
151 or git hash
152 timestamp: timestamp of this spec
153 path: path of spec
154 entries: paths to PathSpec dict
155 """
156
157 def __init__(self, spec_type, name, timestamp, path, entries=None):
158 self.spec_type = spec_type
159 self.name = name
160 self.timestamp = timestamp
161 self.path = path
162 self.entries = entries
163
164 def copy(self):
165 return copy.deepcopy(self)
166
167 def similar_score(self, rhs):
168 """Calculates similar score to another Spec.
169
170 Returns:
171 score of similarity. Smaller value is more similar.
172 """
173 score = 0
174 for path in set(self.entries) & set(rhs.entries):
175 if rhs[path] == self[path]:
176 continue
177 if rhs[path].at == self[path].at:
178 # it's often that remote repo moved around but should be treated as the
179 # same one
180 score += 0.1
181 else:
182 score += 1
183 score += len(set(self.entries) ^ set(rhs.entries))
184 return score
185
186 def is_static(self):
187 return all(path_spec.is_static() for path_spec in self.entries.values())
188
189 def is_subset(self, rhs):
190 return set(self.entries.keys()) <= set(rhs.entries.keys())
191
192 def __getitem__(self, path):
193 return self.entries[path]
194
195 def __contains__(self, path):
196 return path in self.entries
197
198 def apply(self, action_group):
199 self.timestamp = action_group.timestamp
200 self.name = '(%s)' % self.timestamp
201 for action in action_group.actions:
202 if isinstance(action, GitAddRepo):
203 self.entries[action.path] = PathSpec(action.path, action.repo_url,
204 action.rev)
205 elif isinstance(action, GitCheckoutCommit):
206 self.entries[action.path].at = action.rev
207 elif isinstance(action, GitRemoveRepo):
208 del self.entries[action.path]
209 else:
210 assert 0, 'unknown action: %s' % action.__class__.__name__
211
212 def dump(self):
213 # for debugging
214 print(self.name, self.path, self.timestamp)
215 print('size', len(self.entries))
216 for path, path_spec in sorted(self.entries.items()):
217 print(path, path_spec.at)
218
219 def diff(self, rhs):
220 logger.info('diff between %s and %s', self.name, rhs.name)
221 expect = set(self.entries)
222 actual = set(rhs.entries)
223 common = 0
224 for path in sorted(expect - actual):
225 logger.info('-%s', path)
226 for path in sorted(actual - expect):
227 logger.info('+%s', path)
228 for path in sorted(expect & actual):
229 if self[path] == rhs[path]:
230 common += 1
231 continue
232 if self[path].at != rhs[path].at:
233 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
234 if self[path].repo_url != rhs[path].repo_url:
235 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
236 rhs[path].repo_url)
237 logger.info('and common=%s', common)
238
239
240class Action(object):
241 """Actions describe changes from one Spec to another.
242
243 Attributes:
244 timestamp: action time
245 path: action path, which is relative to project root
246 """
247
248 def __init__(self, timestamp, path):
249 self.timestamp = timestamp
250 self.path = path
251
252 def apply(self, _root_dir):
253 raise NotImplementedError
254
255 def summary(self, _code_storage):
256 raise NotImplementedError
257
258 def __eq__(self, rhs):
259 return self.__dict__ == rhs.__dict__
260
261 def serialize(self):
262 return self.__class__.__name__, self.__dict__
263
264
265def unserialize_action(data):
266 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
267 class_name, values = data
268 assert class_name in [cls.__name__ for cls in classes
269 ], 'unknown action class: %s' % class_name
270 for cls in classes:
271 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800272 action = cls(**values)
273 break
274 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800275
276
277class ActionGroup(object):
278 """Atomic group of Action objects
279
280 This models atomic commits (for example, gerrit topic, or circular
281 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
282 object.
283 """
284
285 def __init__(self, timestamp, comment=None):
286 self.timestamp = timestamp
287 self.name = None
288 self.actions = []
289 self.comment = comment
290
291 def add(self, action):
292 self.actions.append(action)
293
294 def serialize(self):
295 return (self.timestamp, self.name, [a.serialize() for a in self.actions])
296
297 def summary(self, code_storage):
298 if self.comment:
299 return self.comment
300 # TODO(kcwu): support multiple Actions
301 assert len(self.actions) == 1
302 return self.actions[0].summary(code_storage)
303
304 @staticmethod
305 def unserialize(data):
306 ag = ActionGroup(data[0])
307 ag.name = data[1]
308 for x in data[2]:
309 ag.add(unserialize_action(x))
310 return ag
311
312 def apply(self, root_dir):
313 for action in self.actions:
314 action.apply(root_dir)
315
316
317class GitCheckoutCommit(Action):
318 """Describes a git commit action.
319
320 Attributes:
321 repo_url: the corresponding url of git repo
322 rev: git commit to checkout
323 """
324
325 def __init__(self, timestamp, path, repo_url, rev):
326 super(GitCheckoutCommit, self).__init__(timestamp, path)
327 self.repo_url = repo_url
328 self.rev = rev
329
330 def apply(self, root_dir):
331 git_repo = os.path.join(root_dir, self.path)
332 assert git_util.is_git_root(git_repo)
333 git_util.checkout_version(git_repo, self.rev)
334
335 def summary(self, code_storage):
336 git_root = code_storage.cached_git_root(self.repo_url)
337 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
338 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
339
340
341class GitAddRepo(Action):
342 """Describes a git repo add action.
343
344 Attributes:
345 repo_url: the corresponding url of git repo to add
346 rev: git commit to checkout
347 """
348
349 def __init__(self, timestamp, path, repo_url, rev):
350 super(GitAddRepo, self).__init__(timestamp, path)
351 self.repo_url = repo_url
352 self.rev = rev
353
354 def apply(self, root_dir):
355 git_repo = os.path.join(root_dir, self.path)
356 assert os.path.exists(git_repo)
357 assert git_util.is_git_root(git_repo)
358
359 def summary(self, _code_storage):
360 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
361
362
363class GitRemoveRepo(Action):
364 """Describes a git repo remove action."""
365
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800366 def apply(self, root_dir):
367 assert self.path
368 git_repo = os.path.join(root_dir, self.path)
369 assert git_util.is_git_root(git_repo)
370 assert 0
371 shutil.rmtree(git_repo)
372
373 def summary(self, _code_storage):
374 return 'remove repo %s' % self.path
375
376
377def apply_actions(code_storage, action_groups, root_dir):
378 # Speed optimization: only apply the last one of consecutive commits per
379 # repo. It is possible to optimize further, but need to take care git repo
380 # add/remove within another repo.
381 commits = {}
382
383 def batch_apply(commits):
384 for i, commit_action in sorted(commits.values()):
385 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
386 commit_action.apply(root_dir)
387
388 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800389 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800390 if not isinstance(action, GitCheckoutCommit):
391 break
392 else:
393 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800394 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800395 commits[action.path] = (i, action)
396 continue
397
398 batch_apply(commits)
399 commits = {}
400 action.apply(root_dir)
401
402 batch_apply(commits)
403
404
405class SpecManager(object):
406 """Spec related abstract operations.
407
408 This class enumerates Spec instances and switch disk state to Spec.
409
410 In other words, this class abstracts:
411 - discovery of gclient's DEPS and repo's manifest
412 - gclient sync and repo sync
413 """
414
415 def collect_float_spec(self, old, new):
416 """Collects float Spec between two versions.
417
418 This method may fetch spec from network. However, it should not switch tree
419 version state.
420 """
421 raise NotImplementedError
422
423 def collect_fixed_spec(self, old, new):
424 """Collects fixed Spec between two versions.
425
426 This method may fetch spec from network. However, it should not switch tree
427 version state.
428 """
429 raise NotImplementedError
430
431 def parse_spec(self, spec):
432 """Parses information for Spec object.
433
434 Args:
435 spec: Spec object. It specifies what to parse and the parsed information
436 is stored inside.
437 """
438 raise NotImplementedError
439
440 def sync_disk_state(self, rev):
441 """Switch source tree state to given version."""
442 raise NotImplementedError
443
444
445class CodeStorage(object):
446 """Query code history and commit relationship without checkout.
447
448 Because paths inside source tree may be deleted or map to different remote
449 repo in different versions, we cannot query git information of one version
450 but the tree state is at another version. In order to query information
451 without changing tree state and fast, we need out of tree source code
452 storage.
453
454 This class assumes all git repos are mirrored somewhere on local disk.
455 Subclasses just need to implement cached_git_root() which returns the
456 location.
457
458 In other words, this class abstracts operations upon gclient's cache-dir
459 repo's mirror.
460 """
461
462 def cached_git_root(self, repo_url):
463 """The cached path of given remote git repo.
464
465 Args:
466 repo_url: URL of git remote repo
467
468 Returns:
469 path of cache folder
470 """
471 raise NotImplementedError
472
473 def is_ancestor_commit(self, spec, path, old, new):
474 """Determine one commit is ancestor of another.
475
476 Args:
477 spec: Spec object
478 path: local path relative to project root
479 old: commit id
480 new: commit id
481
482 Returns:
483 True if `old` is ancestor of `new`
484 """
485 git_root = self.cached_git_root(spec[path].repo_url)
486 return git_util.is_ancestor_commit(git_root, old, new)
487
488 def get_rev_by_time(self, spec, path, timestamp):
489 """Get commit hash of given spec by time.
490
491 Args:
492 spec: Spec object
493 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800494 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800495
496 Returns:
497 The commit hash of given time. If there are commits with the given
498 timestamp, returns the last commit.
499 """
500 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800501 # spec[path].at is remote reference name. Since git_root is a mirror (not
502 # a local checkout), there is no need to convert the name.
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800503 return git_util.get_rev_by_time(git_root, timestamp, branch=spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800504
505 def get_actions_between_two_commit(self, spec, path, old, new):
506 git_root = self.cached_git_root(spec[path].repo_url)
507 result = []
508 for timestamp, git_rev in git_util.list_commits_between_commits(
509 git_root, old, new):
510 result.append(
511 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
512 return result
513
514 def is_containing_commit(self, spec, path, rev):
515 git_root = self.cached_git_root(spec[path].repo_url)
516 return git_util.is_containing_commit(git_root, rev)
517
518 def are_spec_commits_available(self, spec):
519 for path, path_spec in spec.entries.items():
520 if not path_spec.is_static():
521 continue
522 if not self.is_containing_commit(spec, path, path_spec.at):
523 return False
524 return True
525
526
527class CodeManager(object):
528 """Class to reconstruct historical source tree state.
529
530 This class can reconstruct all moments of source tree state and diffs between
531 them.
532
533 Attributes:
534 root_dir: root path of project source tree
535 spec_manager: SpecManager object
536 code_storage: CodeStorage object
537 """
538
539 def __init__(self, root_dir, spec_manager, code_storage):
540 self.root_dir = root_dir
541 self.spec_manager = spec_manager
542 self.code_storage = code_storage
543
544 def generate_actions_between_specs(self, prev_float, next_float):
545 """Generates actions between two float specs.
546
547 Args:
548 prev_float: start of spec object (exclusive)
549 next_float: end of spec object (inclusive)
550
551 Returns:
552 list of Action object (unordered)
553 """
554 actions = []
555 for path in set(prev_float.entries) | set(next_float.entries):
556
557 # Add repo
558 if path not in prev_float:
559 if next_float[path].is_static():
560 next_at = next_float[path].at
561 else:
562 next_at = self.code_storage.get_rev_by_time(next_float, path,
563 next_float.timestamp)
564 actions.append(
565 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
566 next_at))
567 continue
568
569 # Existing path is floating, enumerates commits until next spec.
570 #
571 # prev_at till_at
572 # prev branch ---> o --------> o --------> o --------> o --------> ...
573 # ^ ^
574 # prev_float.timestamp next_float.timestamp
575 if not prev_float[path].is_static():
576 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
577 prev_float.timestamp)
578 till_at = self.code_storage.get_rev_by_time(prev_float, path,
579 next_float.timestamp)
580
581 actions.extend(
582 self.code_storage.get_actions_between_two_commit(
583 prev_float, path, prev_at, till_at))
584 else:
585 prev_at = till_at = prev_float[path].at
586
587 # At next_float.timestamp.
588 if path not in next_float:
589 # remove repo
590 actions.append(GitRemoveRepo(next_float.timestamp, path))
591 next_at = None
592
593 elif next_float[path].is_static():
594 # pinned to certain commit on different branch
595 next_at = next_float[path].at
596
597 elif next_float[path].at == prev_float[path].at:
598 # keep floating on the same branch
599 next_at = till_at
600
601 else:
602 # switch to another branch
603 # prev_at till_at
604 # prev branch ---> o --------> o --------> o --------> o --------> ...
605 #
606 # next_at
607 # next branch ...... o ------> o --------> o -----> ...
608 # ^ ^
609 # prev_float.timestamp next_float.timestamp
610 next_at = self.code_storage.get_rev_by_time(next_float, path,
611 next_float.timestamp)
612
613 if next_at and next_at != till_at:
614 actions.append(
615 GitCheckoutCommit(next_float.timestamp, path,
616 next_float[path].repo_url, next_at))
617
618 return actions
619
620 def synthesize_fixed_spec(self, float_spec, timestamp):
621 """Synthesizes fixed spec from float spec of given time.
622
623 Args:
624 float_spec: the float spec
625 timestamp: snapshot time
626
627 Returns:
628 Spec object
629 """
630 result = {}
631 for path, path_spec in float_spec.entries.items():
632 if not path_spec.is_static():
633 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
634 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
635
636 result[path] = copy.deepcopy(path_spec)
637
638 name = '%s@%s' % (float_spec.path, timestamp)
639 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
640
641 def reorder_actions(self, actions):
642 """Reorder and cluster actions.
643
644 Args:
645 actions: list of Action objects
646
647 Returns:
648 list of ActionGroup objects
649 """
650 # TODO(kcwu): support atomic commits across repos
651 actions.sort(key=lambda x: x.timestamp)
652 result = []
653 for action in actions:
654 group = ActionGroup(action.timestamp)
655 group.add(action)
656 result.append(group)
657 return result
658
659 def match_spec(self, target, specs, start_index=0):
660 threshold = 3600
661 # ideal_index is the index of last spec before target
662 # begin and end are the range of indexes within threshold (inclusive)
663 ideal_index = None
664 begin, end = None, None
665 for i, spec in enumerate(specs[start_index:], start_index):
666 if spec.timestamp <= target.timestamp:
667 ideal_index = i
668 if abs(spec.timestamp - target.timestamp) < threshold:
669 if begin is None:
670 begin = i
671 end = i
672
673 candidates = []
674 if ideal_index is not None:
675 candidates.append(ideal_index)
676 if begin is not None:
677 candidates.extend(range(begin, end + 1))
678 if not candidates:
679 logger.error('unable to match %s: all specs are after it', target.name)
680 return None
681
682 compatible_candidates = [
683 i for i in candidates if specs[i].is_subset(target)
684 ]
685 if not compatible_candidates:
686 logger.error('unable to match %s: no compatible specs', target.name)
687 spec = specs[candidates[0]]
688 target.diff(spec)
689 return None
690
691 scores = []
692 for i in compatible_candidates:
693 scores.append((specs[i].similar_score(target), i))
694 scores.sort()
695
696 score, index = scores[0]
697 if score != 0:
698 logger.warning('not exactly match (score=%s): %s', score, target.name)
699 target.diff(specs[index])
700
701 if index < ideal_index:
702 logger.warning(
703 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
704 target.name, target.timestamp, specs[index].timestamp,
705 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800706 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800707 if index > ideal_index:
708 logger.warning(
709 'spec committed at %d matched later commit at %d. bad server clock?',
710 target.timestamp, specs[index].timestamp)
711
712 return index
713
714 def associate_fixed_and_synthesized_specs(self, fixed_specs,
715 synthesized_specs):
716 # All fixed specs are snapshot of float specs. Theoretically, they
717 # should be identical to one of the synthesized specs.
718 # However, it's not always true for some reasons --- maybe due to race
719 # condition, maybe due to bugs of this bisect-kit.
720 # To overcome this glitch, we try to match them by similarity instead of
721 # exact match.
722 result = []
723 last_index = 0
724 for i, fixed_spec in enumerate(fixed_specs):
725 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
726 if matched_index is None:
727 if i in (0, len(fixed_specs) - 1):
728 logger.error('essential spec mismatch, unable to continue')
729 assert 0
730 else:
731 logger.warning('%s do not match, skip', fixed_spec.name)
732 continue
733 result.append((i, matched_index))
734 last_index = matched_index
735
736 return result
737
738 def _create_make_up_actions(self, fixed_spec, synthesized):
739 timestamp = synthesized.timestamp
740 make_up = ActionGroup(
741 timestamp, comment='make up glitch for %s' % fixed_spec.name)
742 for path in set(fixed_spec.entries) & set(synthesized.entries):
743 if fixed_spec[path].at == synthesized[path].at:
744 continue
745 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
746 synthesized[path].at)
747 make_up.add(action)
748
749 if not make_up.actions:
750 return None
751 return make_up
752
753 def build_revlist(self, old, new):
754 """Build revlist.
755
756 Returns:
757 list of rev string
758 """
759 logger.info('build_revlist')
760 revlist = []
761
762 # step 1, find all float and fixed specs in the given range.
763 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800764 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800765 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800766 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800767 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
768 float_specs.pop()
769 assert float_specs
770 for spec in float_specs + fixed_specs:
771 self.spec_manager.parse_spec(spec)
772
773 # step 2, synthesize all fixed specs in the range from float specs.
774 specs = float_specs + [fixed_specs[-1]]
775 actions = []
776 logger.debug('len(specs)=%d', len(specs))
777 for i in range(len(specs) - 1):
778 prev_float = specs[i]
779 next_float = specs[i + 1]
780 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
781 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800782 for action in self.generate_actions_between_specs(prev_float, next_float):
783 if action.timestamp < fixed_specs[0].timestamp:
784 continue
785 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800786 action_groups = self.reorder_actions(actions)
787
788 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
789 synthesized = [spec.copy()]
790 for action_group in action_groups:
791 spec.apply(action_group)
792 synthesized.append(spec.copy())
793
794 # step 3, associate fixed specs with synthesized specs.
795 associated_pairs = self.associate_fixed_and_synthesized_specs(
796 fixed_specs, synthesized)
797
798 # step 4, group actions and cache them
799 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
800 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
801 revlist.append(fixed_specs[fixed_index].name)
802 this_action_groups = []
803
804 # handle glitch
805 if fixed_specs[fixed_index].similar_score(
806 synthesized[synthesized_index]) != 0:
807 assert synthesized[synthesized_index].is_subset(
808 fixed_specs[fixed_index])
809 skipped = set(fixed_specs[fixed_index].entries) - set(
810 synthesized[synthesized_index].entries)
811 if skipped:
812 logger.warning(
813 'between %s and %s, '
814 'bisect-kit cannot analyze commit history of following paths:',
815 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
816 for path in sorted(skipped):
817 logger.warning(' %s', path)
818
819 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
820 synthesized[synthesized_index])
821 if make_up:
822 this_action_groups.append(make_up)
823
824 this_action_groups.extend(
825 action_groups[synthesized_index:next_synthesized_index])
826 for idx, ag in enumerate(this_action_groups, 1):
827 rev = make_intra_rev(fixed_specs[fixed_index].name,
828 fixed_specs[next_fixed_index].name, idx)
829 ag.name = rev
830 revlist.append(rev)
831
832 self.save_action_groups_between_releases(
833 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
834 this_action_groups)
835 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
836
837 return revlist
838
839 def save_action_groups_between_releases(self, old, new, action_groups):
840 data = [ag.serialize() for ag in action_groups]
841
842 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
843 if not os.path.exists(cache_dir):
844 os.makedirs(cache_dir)
845 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
846 with file(cache_filename, 'w') as fp:
847 json.dump(data, fp, indent=4, sort_keys=True)
848
849 def load_action_groups_between_releases(self, old, new):
850 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
851 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
852 if not os.path.exists(cache_filename):
853 raise Exception('cached revlist not found: %s' % cache_filename)
854
855 result = []
856 for data in json.load(file(cache_filename)):
857 result.append(ActionGroup.unserialize(data))
858
859 return result
860
861 def view_rev_diff(self, old, new):
862 old_base, _, _ = parse_intra_rev(old)
863 _, new_next, _ = parse_intra_rev(new)
864 assert old_base != new_next
865
866 revlist = []
867 rev_summary = {}
868 fixed_specs = self.spec_manager.collect_fixed_spec(old_base, new_next)
869 for i, spec in enumerate(fixed_specs[:-1]):
870 action_groups = self.load_action_groups_between_releases(
871 fixed_specs[i].name, fixed_specs[i + 1].name)
872 revlist.append(spec.name)
873 rev_summary[spec.name] = ''
874 for action_group in action_groups:
875 revlist.append(action_group.name)
876 rev_summary[action_group.name] = action_group.summary(self.code_storage)
877
878 revlist.append(fixed_specs[-1].name)
879 rev_summary[fixed_specs[-1].name] = ''
880
881 old_index = revlist.index(old)
882 new_index = revlist.index(new)
883 for rev in revlist[old_index:new_index + 1]:
884 logger.info('%s %s', rev, rev_summary[rev])
885
886 def switch(self, rev):
887 # easy case
888 if not re.match(_re_intra_rev, rev):
889 self.spec_manager.sync_disk_state(rev)
890 return
891
892 rev_old, rev_new, idx = parse_intra_rev(rev)
893 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
894 assert 0 <= idx <= len(action_groups)
895 action_groups = action_groups[:idx]
896
897 self.spec_manager.sync_disk_state(rev_old)
898
899 apply_actions(self.code_storage, action_groups, self.root_dir)