blob: 6ff23f93057b6a947bbbb9d733642f9e37724414 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080022from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080023from bisect_kit import git_util
24
25logger = logging.getLogger(__name__)
26
27_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
28
29SPEC_FIXED = 'fixed'
30SPEC_FLOAT = 'float'
31_DIFF_CACHE_DIR = 'bisectkit-cache'
32
33
34def make_intra_rev(a, b, index):
35 """Makes intra-rev version string.
36
37 Between two major "named" versions a and b, there are many small changes
38 (commits) in-between. bisect-kit will identify all those instances and bisect
39 them. We give names to those instances and call these names as "intra-rev"
40 which stands for minor version numbers within two major version.
41
42 Note, a+index (without b) is not enough to identify an unique change due to
43 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
44 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
45 in 9900.1.0 and 9901.0.0.
46
47 Args:
48 a: the start version
49 b: the end version
50 index: the index number of changes between a and b
51
52 Returns:
53 the intra-rev version string
54 """
55 return '%s~%s/%d' % (a, b, index)
56
57
58def parse_intra_rev(rev):
59 """Decomposes intra-rev string.
60
61 See comments of make_intra_rev for what is intra-rev.
62
63 Args:
64 rev: intra-rev string or normal version number
65
66 Returns:
67 (start, end, index). If rev is not intra-rev, it must be normal version
68 number and returns (rev, rev, 0).
69 """
70 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080071 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080072 return rev, rev, 0
73
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080074 return m.group(1), m.group(2), int(m.group(3))
75
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080076
77def argtype_intra_rev(argtype):
78 """Validates argument is intra-rev.
79
80 Args:
81 argtype: argtype function which validates major version number
82
83 Returns:
84 A new argtype function which matches intra-rev
85 """
86
87 def argtype_function(s):
88 m = re.match(_re_intra_rev, s)
89 if m:
90 try:
91 argtype(m.group(1))
92 argtype(m.group(2))
93 return s
94 except cli.ArgTypeError as e:
95 examples = []
96 for example in e.example:
97 examples.append(make_intra_rev(example, example, 10))
98 raise cli.ArgTypeError('Invalid intra rev', examples)
99 raise cli.ArgTypeError('Invalid intra rev',
100 [make_intra_rev('<rev1>', '<rev2>', 10)])
101
102 return argtype_function
103
104
105def _normalize_repo_url(repo_url):
106 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
107 r'https://chrome-internal.googlesource.com/', repo_url)
108 repo_url = re.sub(r'\.git$', '', repo_url)
109 return repo_url
110
111
112class PathSpec(object):
113 """Specified code version of one path.
114
115 Attributes:
116 path: local path, relative to project base dir
117 repo_url: code repository location
118 at: code version, could be git hash or branch name
119 """
120
121 def __init__(self, path, repo_url, at):
122 self.path = path
123 self.repo_url = repo_url
124 self.at = at
125
126 def is_static(self):
127 return git_util.is_git_rev(self.at)
128
129 def __eq__(self, rhs):
130 if self.path != rhs.path:
131 return False
132 if self.at != rhs.at:
133 return False
134 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
135 return False
136 return True
137
138 def __ne__(self, rhs):
139 return not self == rhs
140
141
142class Spec(object):
143 """Collection of PathSpec.
144
145 Spec is analogy to gclient's DEPS and repo's manifest.
146
147 Attributes:
148 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
149 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
150 pinned and the actual version (git commit) may change over time.
151 name: name of this spec, for debugging purpose. usually version number
152 or git hash
153 timestamp: timestamp of this spec
154 path: path of spec
155 entries: paths to PathSpec dict
156 """
157
158 def __init__(self, spec_type, name, timestamp, path, entries=None):
159 self.spec_type = spec_type
160 self.name = name
161 self.timestamp = timestamp
162 self.path = path
163 self.entries = entries
164
165 def copy(self):
166 return copy.deepcopy(self)
167
168 def similar_score(self, rhs):
169 """Calculates similar score to another Spec.
170
171 Returns:
172 score of similarity. Smaller value is more similar.
173 """
174 score = 0
175 for path in set(self.entries) & set(rhs.entries):
176 if rhs[path] == self[path]:
177 continue
178 if rhs[path].at == self[path].at:
179 # it's often that remote repo moved around but should be treated as the
180 # same one
181 score += 0.1
182 else:
183 score += 1
184 score += len(set(self.entries) ^ set(rhs.entries))
185 return score
186
187 def is_static(self):
188 return all(path_spec.is_static() for path_spec in self.entries.values())
189
190 def is_subset(self, rhs):
191 return set(self.entries.keys()) <= set(rhs.entries.keys())
192
193 def __getitem__(self, path):
194 return self.entries[path]
195
196 def __contains__(self, path):
197 return path in self.entries
198
199 def apply(self, action_group):
200 self.timestamp = action_group.timestamp
201 self.name = '(%s)' % self.timestamp
202 for action in action_group.actions:
203 if isinstance(action, GitAddRepo):
204 self.entries[action.path] = PathSpec(action.path, action.repo_url,
205 action.rev)
206 elif isinstance(action, GitCheckoutCommit):
207 self.entries[action.path].at = action.rev
208 elif isinstance(action, GitRemoveRepo):
209 del self.entries[action.path]
210 else:
211 assert 0, 'unknown action: %s' % action.__class__.__name__
212
213 def dump(self):
214 # for debugging
215 print(self.name, self.path, self.timestamp)
216 print('size', len(self.entries))
217 for path, path_spec in sorted(self.entries.items()):
218 print(path, path_spec.at)
219
220 def diff(self, rhs):
221 logger.info('diff between %s and %s', self.name, rhs.name)
222 expect = set(self.entries)
223 actual = set(rhs.entries)
224 common = 0
225 for path in sorted(expect - actual):
226 logger.info('-%s', path)
227 for path in sorted(actual - expect):
228 logger.info('+%s', path)
229 for path in sorted(expect & actual):
230 if self[path] == rhs[path]:
231 common += 1
232 continue
233 if self[path].at != rhs[path].at:
234 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
235 if self[path].repo_url != rhs[path].repo_url:
236 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
237 rhs[path].repo_url)
238 logger.info('and common=%s', common)
239
240
241class Action(object):
242 """Actions describe changes from one Spec to another.
243
244 Attributes:
245 timestamp: action time
246 path: action path, which is relative to project root
247 """
248
249 def __init__(self, timestamp, path):
250 self.timestamp = timestamp
251 self.path = path
252
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800253 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800254 raise NotImplementedError
255
256 def summary(self, _code_storage):
257 raise NotImplementedError
258
259 def __eq__(self, rhs):
260 return self.__dict__ == rhs.__dict__
261
262 def serialize(self):
263 return self.__class__.__name__, self.__dict__
264
265
266def unserialize_action(data):
267 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
268 class_name, values = data
269 assert class_name in [cls.__name__ for cls in classes
270 ], 'unknown action class: %s' % class_name
271 for cls in classes:
272 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800273 action = cls(**values)
274 break
275 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800276
277
278class ActionGroup(object):
279 """Atomic group of Action objects
280
281 This models atomic commits (for example, gerrit topic, or circular
282 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
283 object.
284 """
285
286 def __init__(self, timestamp, comment=None):
287 self.timestamp = timestamp
288 self.name = None
289 self.actions = []
290 self.comment = comment
291
292 def add(self, action):
293 self.actions.append(action)
294
295 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800296 return dict(
297 timestamp=self.timestamp,
298 name=self.name,
299 comment=self.comment,
300 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800301
302 def summary(self, code_storage):
303 if self.comment:
304 return self.comment
Kuang-che Wu22455262018-08-03 15:38:29 +0800305 assert self.actions
306 if len(self.actions) > 1:
307 # TODO(kcwu): show details for multiple Actions
308 return '(%d actions)' % len(self.actions)
309 else:
310 return self.actions[0].summary(code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311
312 @staticmethod
313 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800314 ag = ActionGroup(data['timestamp'])
315 ag.name = data['name']
316 ag.comment = data['comment']
317 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800318 ag.add(unserialize_action(x))
319 return ag
320
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800321 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800322 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800323 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324
325
326class GitCheckoutCommit(Action):
327 """Describes a git commit action.
328
329 Attributes:
330 repo_url: the corresponding url of git repo
331 rev: git commit to checkout
332 """
333
334 def __init__(self, timestamp, path, repo_url, rev):
335 super(GitCheckoutCommit, self).__init__(timestamp, path)
336 self.repo_url = repo_url
337 self.rev = rev
338
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800339 def apply(self, code_storage, root_dir):
340 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800341 git_repo = os.path.join(root_dir, self.path)
342 assert git_util.is_git_root(git_repo)
343 git_util.checkout_version(git_repo, self.rev)
344
345 def summary(self, code_storage):
346 git_root = code_storage.cached_git_root(self.repo_url)
347 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
348 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
349
350
351class GitAddRepo(Action):
352 """Describes a git repo add action.
353
354 Attributes:
355 repo_url: the corresponding url of git repo to add
356 rev: git commit to checkout
357 """
358
359 def __init__(self, timestamp, path, repo_url, rev):
360 super(GitAddRepo, self).__init__(timestamp, path)
361 self.repo_url = repo_url
362 self.rev = rev
363
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800364 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800365 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800366 assert not os.path.exists(git_repo)
367
368 reference = code_storage.cached_git_root(self.repo_url)
369 git_util.clone(git_repo, self.repo_url, reference=reference)
370 git_util.checkout_version(git_repo, self.rev)
371
372 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800373
374 def summary(self, _code_storage):
375 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
376
377
378class GitRemoveRepo(Action):
379 """Describes a git repo remove action."""
380
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800381 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800382 assert self.path
383 git_repo = os.path.join(root_dir, self.path)
384 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800385 shutil.rmtree(git_repo)
386
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800387 code_storage.remove_from_project_list(root_dir, self.path)
388
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800389 def summary(self, _code_storage):
390 return 'remove repo %s' % self.path
391
392
393def apply_actions(code_storage, action_groups, root_dir):
394 # Speed optimization: only apply the last one of consecutive commits per
395 # repo. It is possible to optimize further, but need to take care git repo
396 # add/remove within another repo.
397 commits = {}
398
399 def batch_apply(commits):
400 for i, commit_action in sorted(commits.values()):
401 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800402 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800403
404 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800405 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800406 if not isinstance(action, GitCheckoutCommit):
407 break
408 else:
409 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800410 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800411 commits[action.path] = (i, action)
412 continue
413
414 batch_apply(commits)
415 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800416 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800417
418 batch_apply(commits)
419
420
421class SpecManager(object):
422 """Spec related abstract operations.
423
424 This class enumerates Spec instances and switch disk state to Spec.
425
426 In other words, this class abstracts:
427 - discovery of gclient's DEPS and repo's manifest
428 - gclient sync and repo sync
429 """
430
431 def collect_float_spec(self, old, new):
432 """Collects float Spec between two versions.
433
434 This method may fetch spec from network. However, it should not switch tree
435 version state.
436 """
437 raise NotImplementedError
438
439 def collect_fixed_spec(self, old, new):
440 """Collects fixed Spec between two versions.
441
442 This method may fetch spec from network. However, it should not switch tree
443 version state.
444 """
445 raise NotImplementedError
446
447 def parse_spec(self, spec):
448 """Parses information for Spec object.
449
450 Args:
451 spec: Spec object. It specifies what to parse and the parsed information
452 is stored inside.
453 """
454 raise NotImplementedError
455
456 def sync_disk_state(self, rev):
457 """Switch source tree state to given version."""
458 raise NotImplementedError
459
460
461class CodeStorage(object):
462 """Query code history and commit relationship without checkout.
463
464 Because paths inside source tree may be deleted or map to different remote
465 repo in different versions, we cannot query git information of one version
466 but the tree state is at another version. In order to query information
467 without changing tree state and fast, we need out of tree source code
468 storage.
469
470 This class assumes all git repos are mirrored somewhere on local disk.
471 Subclasses just need to implement cached_git_root() which returns the
472 location.
473
474 In other words, this class abstracts operations upon gclient's cache-dir
475 repo's mirror.
476 """
477
478 def cached_git_root(self, repo_url):
479 """The cached path of given remote git repo.
480
481 Args:
482 repo_url: URL of git remote repo
483
484 Returns:
485 path of cache folder
486 """
487 raise NotImplementedError
488
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800489 def add_to_project_list(self, project_root, path, repo_url):
490 raise NotImplementedError
491
492 def remove_from_project_list(self, project_root, path):
493 raise NotImplementedError
494
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800495 def is_ancestor_commit(self, spec, path, old, new):
496 """Determine one commit is ancestor of another.
497
498 Args:
499 spec: Spec object
500 path: local path relative to project root
501 old: commit id
502 new: commit id
503
504 Returns:
505 True if `old` is ancestor of `new`
506 """
507 git_root = self.cached_git_root(spec[path].repo_url)
508 return git_util.is_ancestor_commit(git_root, old, new)
509
510 def get_rev_by_time(self, spec, path, timestamp):
511 """Get commit hash of given spec by time.
512
513 Args:
514 spec: Spec object
515 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800516 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800517
518 Returns:
519 The commit hash of given time. If there are commits with the given
520 timestamp, returns the last commit.
521 """
522 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800523 # spec[path].at is remote reference name. Since git_root is a mirror (not
524 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800525 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800526
527 def get_actions_between_two_commit(self, spec, path, old, new):
528 git_root = self.cached_git_root(spec[path].repo_url)
529 result = []
530 for timestamp, git_rev in git_util.list_commits_between_commits(
531 git_root, old, new):
532 result.append(
533 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
534 return result
535
536 def is_containing_commit(self, spec, path, rev):
537 git_root = self.cached_git_root(spec[path].repo_url)
538 return git_util.is_containing_commit(git_root, rev)
539
540 def are_spec_commits_available(self, spec):
541 for path, path_spec in spec.entries.items():
542 if not path_spec.is_static():
543 continue
544 if not self.is_containing_commit(spec, path, path_spec.at):
545 return False
546 return True
547
548
549class CodeManager(object):
550 """Class to reconstruct historical source tree state.
551
552 This class can reconstruct all moments of source tree state and diffs between
553 them.
554
555 Attributes:
556 root_dir: root path of project source tree
557 spec_manager: SpecManager object
558 code_storage: CodeStorage object
559 """
560
561 def __init__(self, root_dir, spec_manager, code_storage):
562 self.root_dir = root_dir
563 self.spec_manager = spec_manager
564 self.code_storage = code_storage
565
566 def generate_actions_between_specs(self, prev_float, next_float):
567 """Generates actions between two float specs.
568
569 Args:
570 prev_float: start of spec object (exclusive)
571 next_float: end of spec object (inclusive)
572
573 Returns:
574 list of Action object (unordered)
575 """
576 actions = []
577 for path in set(prev_float.entries) | set(next_float.entries):
578
579 # Add repo
580 if path not in prev_float:
581 if next_float[path].is_static():
582 next_at = next_float[path].at
583 else:
584 next_at = self.code_storage.get_rev_by_time(next_float, path,
585 next_float.timestamp)
586 actions.append(
587 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
588 next_at))
589 continue
590
591 # Existing path is floating, enumerates commits until next spec.
592 #
593 # prev_at till_at
594 # prev branch ---> o --------> o --------> o --------> o --------> ...
595 # ^ ^
596 # prev_float.timestamp next_float.timestamp
597 if not prev_float[path].is_static():
598 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
599 prev_float.timestamp)
600 till_at = self.code_storage.get_rev_by_time(prev_float, path,
601 next_float.timestamp)
602
603 actions.extend(
604 self.code_storage.get_actions_between_two_commit(
605 prev_float, path, prev_at, till_at))
606 else:
607 prev_at = till_at = prev_float[path].at
608
609 # At next_float.timestamp.
610 if path not in next_float:
611 # remove repo
612 actions.append(GitRemoveRepo(next_float.timestamp, path))
613 next_at = None
614
615 elif next_float[path].is_static():
616 # pinned to certain commit on different branch
617 next_at = next_float[path].at
618
619 elif next_float[path].at == prev_float[path].at:
620 # keep floating on the same branch
621 next_at = till_at
622
623 else:
624 # switch to another branch
625 # prev_at till_at
626 # prev branch ---> o --------> o --------> o --------> o --------> ...
627 #
628 # next_at
629 # next branch ...... o ------> o --------> o -----> ...
630 # ^ ^
631 # prev_float.timestamp next_float.timestamp
632 next_at = self.code_storage.get_rev_by_time(next_float, path,
633 next_float.timestamp)
634
635 if next_at and next_at != till_at:
636 actions.append(
637 GitCheckoutCommit(next_float.timestamp, path,
638 next_float[path].repo_url, next_at))
639
640 return actions
641
642 def synthesize_fixed_spec(self, float_spec, timestamp):
643 """Synthesizes fixed spec from float spec of given time.
644
645 Args:
646 float_spec: the float spec
647 timestamp: snapshot time
648
649 Returns:
650 Spec object
651 """
652 result = {}
653 for path, path_spec in float_spec.entries.items():
654 if not path_spec.is_static():
655 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
656 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
657
658 result[path] = copy.deepcopy(path_spec)
659
660 name = '%s@%s' % (float_spec.path, timestamp)
661 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
662
663 def reorder_actions(self, actions):
664 """Reorder and cluster actions.
665
666 Args:
667 actions: list of Action objects
668
669 Returns:
670 list of ActionGroup objects
671 """
672 # TODO(kcwu): support atomic commits across repos
673 actions.sort(key=lambda x: x.timestamp)
674 result = []
675 for action in actions:
676 group = ActionGroup(action.timestamp)
677 group.add(action)
678 result.append(group)
679 return result
680
681 def match_spec(self, target, specs, start_index=0):
682 threshold = 3600
683 # ideal_index is the index of last spec before target
684 # begin and end are the range of indexes within threshold (inclusive)
685 ideal_index = None
686 begin, end = None, None
687 for i, spec in enumerate(specs[start_index:], start_index):
688 if spec.timestamp <= target.timestamp:
689 ideal_index = i
690 if abs(spec.timestamp - target.timestamp) < threshold:
691 if begin is None:
692 begin = i
693 end = i
694
695 candidates = []
696 if ideal_index is not None:
697 candidates.append(ideal_index)
698 if begin is not None:
699 candidates.extend(range(begin, end + 1))
700 if not candidates:
701 logger.error('unable to match %s: all specs are after it', target.name)
702 return None
703
704 compatible_candidates = [
705 i for i in candidates if specs[i].is_subset(target)
706 ]
707 if not compatible_candidates:
708 logger.error('unable to match %s: no compatible specs', target.name)
709 spec = specs[candidates[0]]
710 target.diff(spec)
711 return None
712
713 scores = []
714 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800715 # Tie-break: prefer earlier timestamp and smaller difference.
716 if specs[i].timestamp <= target.timestamp:
717 timediff = 0, target.timestamp - specs[i].timestamp
718 else:
719 timediff = 1, specs[i].timestamp - target.timestamp
720 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800721 scores.sort()
722
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800723 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800724 if score != 0:
725 logger.warning('not exactly match (score=%s): %s', score, target.name)
726 target.diff(specs[index])
727
728 if index < ideal_index:
729 logger.warning(
730 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
731 target.name, target.timestamp, specs[index].timestamp,
732 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800733 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800734 if index > ideal_index:
735 logger.warning(
736 'spec committed at %d matched later commit at %d. bad server clock?',
737 target.timestamp, specs[index].timestamp)
738
739 return index
740
741 def associate_fixed_and_synthesized_specs(self, fixed_specs,
742 synthesized_specs):
743 # All fixed specs are snapshot of float specs. Theoretically, they
744 # should be identical to one of the synthesized specs.
745 # However, it's not always true for some reasons --- maybe due to race
746 # condition, maybe due to bugs of this bisect-kit.
747 # To overcome this glitch, we try to match them by similarity instead of
748 # exact match.
749 result = []
750 last_index = 0
751 for i, fixed_spec in enumerate(fixed_specs):
752 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
753 if matched_index is None:
754 if i in (0, len(fixed_specs) - 1):
755 logger.error('essential spec mismatch, unable to continue')
756 assert 0
757 else:
758 logger.warning('%s do not match, skip', fixed_spec.name)
759 continue
760 result.append((i, matched_index))
761 last_index = matched_index
762
763 return result
764
765 def _create_make_up_actions(self, fixed_spec, synthesized):
766 timestamp = synthesized.timestamp
767 make_up = ActionGroup(
768 timestamp, comment='make up glitch for %s' % fixed_spec.name)
769 for path in set(fixed_spec.entries) & set(synthesized.entries):
770 if fixed_spec[path].at == synthesized[path].at:
771 continue
772 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
773 synthesized[path].at)
774 make_up.add(action)
775
776 if not make_up.actions:
777 return None
778 return make_up
779
780 def build_revlist(self, old, new):
781 """Build revlist.
782
783 Returns:
784 list of rev string
785 """
786 logger.info('build_revlist')
787 revlist = []
788
789 # step 1, find all float and fixed specs in the given range.
790 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800791 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800792 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800793 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800794 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
795 float_specs.pop()
796 assert float_specs
797 for spec in float_specs + fixed_specs:
798 self.spec_manager.parse_spec(spec)
799
800 # step 2, synthesize all fixed specs in the range from float specs.
801 specs = float_specs + [fixed_specs[-1]]
802 actions = []
803 logger.debug('len(specs)=%d', len(specs))
804 for i in range(len(specs) - 1):
805 prev_float = specs[i]
806 next_float = specs[i + 1]
807 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
808 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800809 for action in self.generate_actions_between_specs(prev_float, next_float):
810 if action.timestamp < fixed_specs[0].timestamp:
811 continue
812 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800813 action_groups = self.reorder_actions(actions)
814
815 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
816 synthesized = [spec.copy()]
817 for action_group in action_groups:
818 spec.apply(action_group)
819 synthesized.append(spec.copy())
820
821 # step 3, associate fixed specs with synthesized specs.
822 associated_pairs = self.associate_fixed_and_synthesized_specs(
823 fixed_specs, synthesized)
824
825 # step 4, group actions and cache them
826 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
827 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
828 revlist.append(fixed_specs[fixed_index].name)
829 this_action_groups = []
830
831 # handle glitch
832 if fixed_specs[fixed_index].similar_score(
833 synthesized[synthesized_index]) != 0:
834 assert synthesized[synthesized_index].is_subset(
835 fixed_specs[fixed_index])
836 skipped = set(fixed_specs[fixed_index].entries) - set(
837 synthesized[synthesized_index].entries)
838 if skipped:
839 logger.warning(
840 'between %s and %s, '
841 'bisect-kit cannot analyze commit history of following paths:',
842 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
843 for path in sorted(skipped):
844 logger.warning(' %s', path)
845
846 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
847 synthesized[synthesized_index])
848 if make_up:
849 this_action_groups.append(make_up)
850
851 this_action_groups.extend(
852 action_groups[synthesized_index:next_synthesized_index])
853 for idx, ag in enumerate(this_action_groups, 1):
854 rev = make_intra_rev(fixed_specs[fixed_index].name,
855 fixed_specs[next_fixed_index].name, idx)
856 ag.name = rev
857 revlist.append(rev)
858
859 self.save_action_groups_between_releases(
860 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
861 this_action_groups)
862 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
863
864 return revlist
865
866 def save_action_groups_between_releases(self, old, new, action_groups):
867 data = [ag.serialize() for ag in action_groups]
868
869 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
870 if not os.path.exists(cache_dir):
871 os.makedirs(cache_dir)
872 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
873 with file(cache_filename, 'w') as fp:
874 json.dump(data, fp, indent=4, sort_keys=True)
875
876 def load_action_groups_between_releases(self, old, new):
877 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
878 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
879 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800880 raise errors.InternalError(
881 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800882
883 result = []
884 for data in json.load(file(cache_filename)):
885 result.append(ActionGroup.unserialize(data))
886
887 return result
888
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800889 def view_rev_diff(self, revlist, old, new):
890 assert old in revlist
891 assert new in revlist
892 loaded_action_groups = None
893 old_idx = revlist.index(old)
894 new_idx = revlist.index(new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800895
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800896 for i, rev in enumerate(revlist[old_idx:new_idx + 1], old_idx):
897 rev_old, rev_new, index = parse_intra_rev(rev)
898 if rev_old == rev_new:
899 logger.info('[%d] %s', i, rev)
900 else:
901 if loaded_action_groups != (rev_old, rev_new):
902 action_groups = self.load_action_groups_between_releases(
903 rev_old, rev_new)
904 loaded_action_groups = (rev_old, rev_new)
905 # Indexes inside intra_rev are 1 based.
906 action_group = action_groups[index - 1]
907 summary = action_group.summary(self.code_storage)
908 logger.info('[%d] %s %s', i, rev, summary)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800909
910 def switch(self, rev):
911 # easy case
912 if not re.match(_re_intra_rev, rev):
913 self.spec_manager.sync_disk_state(rev)
914 return
915
916 rev_old, rev_new, idx = parse_intra_rev(rev)
917 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
918 assert 0 <= idx <= len(action_groups)
919 action_groups = action_groups[:idx]
920
921 self.spec_manager.sync_disk_state(rev_old)
922
923 apply_actions(self.code_storage, action_groups, self.root_dir)