blob: 30950dfa3707caf3f667aa52438e74c76071a2b6 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
89 m = re.match(_re_intra_rev, s)
90 if m:
91 try:
92 argtype(m.group(1))
93 argtype(m.group(2))
94 return s
95 except cli.ArgTypeError as e:
96 examples = []
97 for example in e.example:
98 examples.append(make_intra_rev(example, example, 10))
99 raise cli.ArgTypeError('Invalid intra rev', examples)
100 raise cli.ArgTypeError('Invalid intra rev',
101 [make_intra_rev('<rev1>', '<rev2>', 10)])
102
103 return argtype_function
104
105
106def _normalize_repo_url(repo_url):
107 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
108 r'https://chrome-internal.googlesource.com/', repo_url)
109 repo_url = re.sub(r'\.git$', '', repo_url)
110 return repo_url
111
112
113class PathSpec(object):
114 """Specified code version of one path.
115
116 Attributes:
117 path: local path, relative to project base dir
118 repo_url: code repository location
119 at: code version, could be git hash or branch name
120 """
121
122 def __init__(self, path, repo_url, at):
123 self.path = path
124 self.repo_url = repo_url
125 self.at = at
126
127 def is_static(self):
128 return git_util.is_git_rev(self.at)
129
130 def __eq__(self, rhs):
131 if self.path != rhs.path:
132 return False
133 if self.at != rhs.at:
134 return False
135 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
136 return False
137 return True
138
139 def __ne__(self, rhs):
140 return not self == rhs
141
142
143class Spec(object):
144 """Collection of PathSpec.
145
146 Spec is analogy to gclient's DEPS and repo's manifest.
147
148 Attributes:
149 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
150 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
151 pinned and the actual version (git commit) may change over time.
152 name: name of this spec, for debugging purpose. usually version number
153 or git hash
154 timestamp: timestamp of this spec
155 path: path of spec
156 entries: paths to PathSpec dict
157 """
158
159 def __init__(self, spec_type, name, timestamp, path, entries=None):
160 self.spec_type = spec_type
161 self.name = name
162 self.timestamp = timestamp
163 self.path = path
164 self.entries = entries
165
166 def copy(self):
167 return copy.deepcopy(self)
168
169 def similar_score(self, rhs):
170 """Calculates similar score to another Spec.
171
172 Returns:
173 score of similarity. Smaller value is more similar.
174 """
175 score = 0
176 for path in set(self.entries) & set(rhs.entries):
177 if rhs[path] == self[path]:
178 continue
179 if rhs[path].at == self[path].at:
180 # it's often that remote repo moved around but should be treated as the
181 # same one
182 score += 0.1
183 else:
184 score += 1
185 score += len(set(self.entries) ^ set(rhs.entries))
186 return score
187
188 def is_static(self):
189 return all(path_spec.is_static() for path_spec in self.entries.values())
190
191 def is_subset(self, rhs):
192 return set(self.entries.keys()) <= set(rhs.entries.keys())
193
194 def __getitem__(self, path):
195 return self.entries[path]
196
197 def __contains__(self, path):
198 return path in self.entries
199
200 def apply(self, action_group):
201 self.timestamp = action_group.timestamp
202 self.name = '(%s)' % self.timestamp
203 for action in action_group.actions:
204 if isinstance(action, GitAddRepo):
205 self.entries[action.path] = PathSpec(action.path, action.repo_url,
206 action.rev)
207 elif isinstance(action, GitCheckoutCommit):
208 self.entries[action.path].at = action.rev
209 elif isinstance(action, GitRemoveRepo):
210 del self.entries[action.path]
211 else:
212 assert 0, 'unknown action: %s' % action.__class__.__name__
213
214 def dump(self):
215 # for debugging
216 print(self.name, self.path, self.timestamp)
217 print('size', len(self.entries))
218 for path, path_spec in sorted(self.entries.items()):
219 print(path, path_spec.at)
220
221 def diff(self, rhs):
222 logger.info('diff between %s and %s', self.name, rhs.name)
223 expect = set(self.entries)
224 actual = set(rhs.entries)
225 common = 0
226 for path in sorted(expect - actual):
227 logger.info('-%s', path)
228 for path in sorted(actual - expect):
229 logger.info('+%s', path)
230 for path in sorted(expect & actual):
231 if self[path] == rhs[path]:
232 common += 1
233 continue
234 if self[path].at != rhs[path].at:
235 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
236 if self[path].repo_url != rhs[path].repo_url:
237 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
238 rhs[path].repo_url)
239 logger.info('and common=%s', common)
240
241
242class Action(object):
243 """Actions describe changes from one Spec to another.
244
245 Attributes:
246 timestamp: action time
247 path: action path, which is relative to project root
248 """
249
250 def __init__(self, timestamp, path):
251 self.timestamp = timestamp
252 self.path = path
253
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800254 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800255 raise NotImplementedError
256
257 def summary(self, _code_storage):
258 raise NotImplementedError
259
260 def __eq__(self, rhs):
261 return self.__dict__ == rhs.__dict__
262
263 def serialize(self):
264 return self.__class__.__name__, self.__dict__
265
266
267def unserialize_action(data):
268 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
269 class_name, values = data
270 assert class_name in [cls.__name__ for cls in classes
271 ], 'unknown action class: %s' % class_name
272 for cls in classes:
273 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800274 action = cls(**values)
275 break
276 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800277
278
279class ActionGroup(object):
280 """Atomic group of Action objects
281
282 This models atomic commits (for example, gerrit topic, or circular
283 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
284 object.
285 """
286
287 def __init__(self, timestamp, comment=None):
288 self.timestamp = timestamp
289 self.name = None
290 self.actions = []
291 self.comment = comment
292
293 def add(self, action):
294 self.actions.append(action)
295
296 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800297 return dict(
298 timestamp=self.timestamp,
299 name=self.name,
300 comment=self.comment,
301 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800302
303 def summary(self, code_storage):
304 if self.comment:
305 return self.comment
Kuang-che Wu22455262018-08-03 15:38:29 +0800306 assert self.actions
307 if len(self.actions) > 1:
308 # TODO(kcwu): show details for multiple Actions
309 return '(%d actions)' % len(self.actions)
310 else:
311 return self.actions[0].summary(code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800312
313 @staticmethod
314 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800315 ag = ActionGroup(data['timestamp'])
316 ag.name = data['name']
317 ag.comment = data['comment']
318 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800319 ag.add(unserialize_action(x))
320 return ag
321
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800322 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800324 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800325
326
327class GitCheckoutCommit(Action):
328 """Describes a git commit action.
329
330 Attributes:
331 repo_url: the corresponding url of git repo
332 rev: git commit to checkout
333 """
334
335 def __init__(self, timestamp, path, repo_url, rev):
336 super(GitCheckoutCommit, self).__init__(timestamp, path)
337 self.repo_url = repo_url
338 self.rev = rev
339
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800340 def apply(self, code_storage, root_dir):
341 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800342 git_repo = os.path.join(root_dir, self.path)
343 assert git_util.is_git_root(git_repo)
344 git_util.checkout_version(git_repo, self.rev)
345
346 def summary(self, code_storage):
347 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800348 try:
349 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
350 except subprocess.CalledProcessError:
351 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
352 git_root)
353 summary = '(unknown)'
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800354 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
355
356
357class GitAddRepo(Action):
358 """Describes a git repo add action.
359
360 Attributes:
361 repo_url: the corresponding url of git repo to add
362 rev: git commit to checkout
363 """
364
365 def __init__(self, timestamp, path, repo_url, rev):
366 super(GitAddRepo, self).__init__(timestamp, path)
367 self.repo_url = repo_url
368 self.rev = rev
369
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800370 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800371 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800372 assert not os.path.exists(git_repo)
373
374 reference = code_storage.cached_git_root(self.repo_url)
375 git_util.clone(git_repo, self.repo_url, reference=reference)
376 git_util.checkout_version(git_repo, self.rev)
377
378 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800379
380 def summary(self, _code_storage):
381 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
382
383
384class GitRemoveRepo(Action):
385 """Describes a git repo remove action."""
386
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800387 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800388 assert self.path
389 git_repo = os.path.join(root_dir, self.path)
390 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800391 shutil.rmtree(git_repo)
392
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800393 code_storage.remove_from_project_list(root_dir, self.path)
394
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800395 def summary(self, _code_storage):
396 return 'remove repo %s' % self.path
397
398
399def apply_actions(code_storage, action_groups, root_dir):
400 # Speed optimization: only apply the last one of consecutive commits per
401 # repo. It is possible to optimize further, but need to take care git repo
402 # add/remove within another repo.
403 commits = {}
404
405 def batch_apply(commits):
406 for i, commit_action in sorted(commits.values()):
407 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800408 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800409
410 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800411 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800412 if not isinstance(action, GitCheckoutCommit):
413 break
414 else:
415 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800416 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800417 commits[action.path] = (i, action)
418 continue
419
420 batch_apply(commits)
421 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800422 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800423
424 batch_apply(commits)
425
426
427class SpecManager(object):
428 """Spec related abstract operations.
429
430 This class enumerates Spec instances and switch disk state to Spec.
431
432 In other words, this class abstracts:
433 - discovery of gclient's DEPS and repo's manifest
434 - gclient sync and repo sync
435 """
436
437 def collect_float_spec(self, old, new):
438 """Collects float Spec between two versions.
439
440 This method may fetch spec from network. However, it should not switch tree
441 version state.
442 """
443 raise NotImplementedError
444
445 def collect_fixed_spec(self, old, new):
446 """Collects fixed Spec between two versions.
447
448 This method may fetch spec from network. However, it should not switch tree
449 version state.
450 """
451 raise NotImplementedError
452
453 def parse_spec(self, spec):
454 """Parses information for Spec object.
455
456 Args:
457 spec: Spec object. It specifies what to parse and the parsed information
458 is stored inside.
459 """
460 raise NotImplementedError
461
462 def sync_disk_state(self, rev):
463 """Switch source tree state to given version."""
464 raise NotImplementedError
465
466
467class CodeStorage(object):
468 """Query code history and commit relationship without checkout.
469
470 Because paths inside source tree may be deleted or map to different remote
471 repo in different versions, we cannot query git information of one version
472 but the tree state is at another version. In order to query information
473 without changing tree state and fast, we need out of tree source code
474 storage.
475
476 This class assumes all git repos are mirrored somewhere on local disk.
477 Subclasses just need to implement cached_git_root() which returns the
478 location.
479
480 In other words, this class abstracts operations upon gclient's cache-dir
481 repo's mirror.
482 """
483
484 def cached_git_root(self, repo_url):
485 """The cached path of given remote git repo.
486
487 Args:
488 repo_url: URL of git remote repo
489
490 Returns:
491 path of cache folder
492 """
493 raise NotImplementedError
494
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800495 def add_to_project_list(self, project_root, path, repo_url):
496 raise NotImplementedError
497
498 def remove_from_project_list(self, project_root, path):
499 raise NotImplementedError
500
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800501 def is_ancestor_commit(self, spec, path, old, new):
502 """Determine one commit is ancestor of another.
503
504 Args:
505 spec: Spec object
506 path: local path relative to project root
507 old: commit id
508 new: commit id
509
510 Returns:
511 True if `old` is ancestor of `new`
512 """
513 git_root = self.cached_git_root(spec[path].repo_url)
514 return git_util.is_ancestor_commit(git_root, old, new)
515
516 def get_rev_by_time(self, spec, path, timestamp):
517 """Get commit hash of given spec by time.
518
519 Args:
520 spec: Spec object
521 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800522 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800523
524 Returns:
525 The commit hash of given time. If there are commits with the given
526 timestamp, returns the last commit.
527 """
528 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800529 # spec[path].at is remote reference name. Since git_root is a mirror (not
530 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800531 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800532
533 def get_actions_between_two_commit(self, spec, path, old, new):
534 git_root = self.cached_git_root(spec[path].repo_url)
535 result = []
536 for timestamp, git_rev in git_util.list_commits_between_commits(
537 git_root, old, new):
538 result.append(
539 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
540 return result
541
542 def is_containing_commit(self, spec, path, rev):
543 git_root = self.cached_git_root(spec[path].repo_url)
544 return git_util.is_containing_commit(git_root, rev)
545
546 def are_spec_commits_available(self, spec):
547 for path, path_spec in spec.entries.items():
548 if not path_spec.is_static():
549 continue
550 if not self.is_containing_commit(spec, path, path_spec.at):
551 return False
552 return True
553
554
555class CodeManager(object):
556 """Class to reconstruct historical source tree state.
557
558 This class can reconstruct all moments of source tree state and diffs between
559 them.
560
561 Attributes:
562 root_dir: root path of project source tree
563 spec_manager: SpecManager object
564 code_storage: CodeStorage object
565 """
566
567 def __init__(self, root_dir, spec_manager, code_storage):
568 self.root_dir = root_dir
569 self.spec_manager = spec_manager
570 self.code_storage = code_storage
571
572 def generate_actions_between_specs(self, prev_float, next_float):
573 """Generates actions between two float specs.
574
575 Args:
576 prev_float: start of spec object (exclusive)
577 next_float: end of spec object (inclusive)
578
579 Returns:
580 list of Action object (unordered)
581 """
582 actions = []
583 for path in set(prev_float.entries) | set(next_float.entries):
584
585 # Add repo
586 if path not in prev_float:
587 if next_float[path].is_static():
588 next_at = next_float[path].at
589 else:
590 next_at = self.code_storage.get_rev_by_time(next_float, path,
591 next_float.timestamp)
592 actions.append(
593 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
594 next_at))
595 continue
596
597 # Existing path is floating, enumerates commits until next spec.
598 #
599 # prev_at till_at
600 # prev branch ---> o --------> o --------> o --------> o --------> ...
601 # ^ ^
602 # prev_float.timestamp next_float.timestamp
603 if not prev_float[path].is_static():
604 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
605 prev_float.timestamp)
606 till_at = self.code_storage.get_rev_by_time(prev_float, path,
607 next_float.timestamp)
608
609 actions.extend(
610 self.code_storage.get_actions_between_two_commit(
611 prev_float, path, prev_at, till_at))
612 else:
613 prev_at = till_at = prev_float[path].at
614
615 # At next_float.timestamp.
616 if path not in next_float:
617 # remove repo
618 actions.append(GitRemoveRepo(next_float.timestamp, path))
619 next_at = None
620
621 elif next_float[path].is_static():
622 # pinned to certain commit on different branch
623 next_at = next_float[path].at
624
625 elif next_float[path].at == prev_float[path].at:
626 # keep floating on the same branch
627 next_at = till_at
628
629 else:
630 # switch to another branch
631 # prev_at till_at
632 # prev branch ---> o --------> o --------> o --------> o --------> ...
633 #
634 # next_at
635 # next branch ...... o ------> o --------> o -----> ...
636 # ^ ^
637 # prev_float.timestamp next_float.timestamp
638 next_at = self.code_storage.get_rev_by_time(next_float, path,
639 next_float.timestamp)
640
641 if next_at and next_at != till_at:
642 actions.append(
643 GitCheckoutCommit(next_float.timestamp, path,
644 next_float[path].repo_url, next_at))
645
646 return actions
647
648 def synthesize_fixed_spec(self, float_spec, timestamp):
649 """Synthesizes fixed spec from float spec of given time.
650
651 Args:
652 float_spec: the float spec
653 timestamp: snapshot time
654
655 Returns:
656 Spec object
657 """
658 result = {}
659 for path, path_spec in float_spec.entries.items():
660 if not path_spec.is_static():
661 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
662 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
663
664 result[path] = copy.deepcopy(path_spec)
665
666 name = '%s@%s' % (float_spec.path, timestamp)
667 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
668
669 def reorder_actions(self, actions):
670 """Reorder and cluster actions.
671
672 Args:
673 actions: list of Action objects
674
675 Returns:
676 list of ActionGroup objects
677 """
678 # TODO(kcwu): support atomic commits across repos
679 actions.sort(key=lambda x: x.timestamp)
680 result = []
681 for action in actions:
682 group = ActionGroup(action.timestamp)
683 group.add(action)
684 result.append(group)
685 return result
686
687 def match_spec(self, target, specs, start_index=0):
688 threshold = 3600
689 # ideal_index is the index of last spec before target
690 # begin and end are the range of indexes within threshold (inclusive)
691 ideal_index = None
692 begin, end = None, None
693 for i, spec in enumerate(specs[start_index:], start_index):
694 if spec.timestamp <= target.timestamp:
695 ideal_index = i
696 if abs(spec.timestamp - target.timestamp) < threshold:
697 if begin is None:
698 begin = i
699 end = i
700
701 candidates = []
702 if ideal_index is not None:
703 candidates.append(ideal_index)
704 if begin is not None:
705 candidates.extend(range(begin, end + 1))
706 if not candidates:
707 logger.error('unable to match %s: all specs are after it', target.name)
708 return None
709
710 compatible_candidates = [
711 i for i in candidates if specs[i].is_subset(target)
712 ]
713 if not compatible_candidates:
714 logger.error('unable to match %s: no compatible specs', target.name)
715 spec = specs[candidates[0]]
716 target.diff(spec)
717 return None
718
719 scores = []
720 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800721 # Tie-break: prefer earlier timestamp and smaller difference.
722 if specs[i].timestamp <= target.timestamp:
723 timediff = 0, target.timestamp - specs[i].timestamp
724 else:
725 timediff = 1, specs[i].timestamp - target.timestamp
726 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800727 scores.sort()
728
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800729 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800730 if score != 0:
731 logger.warning('not exactly match (score=%s): %s', score, target.name)
732 target.diff(specs[index])
733
734 if index < ideal_index:
735 logger.warning(
736 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
737 target.name, target.timestamp, specs[index].timestamp,
738 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800739 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800740 if index > ideal_index:
741 logger.warning(
742 'spec committed at %d matched later commit at %d. bad server clock?',
743 target.timestamp, specs[index].timestamp)
744
745 return index
746
747 def associate_fixed_and_synthesized_specs(self, fixed_specs,
748 synthesized_specs):
749 # All fixed specs are snapshot of float specs. Theoretically, they
750 # should be identical to one of the synthesized specs.
751 # However, it's not always true for some reasons --- maybe due to race
752 # condition, maybe due to bugs of this bisect-kit.
753 # To overcome this glitch, we try to match them by similarity instead of
754 # exact match.
755 result = []
756 last_index = 0
757 for i, fixed_spec in enumerate(fixed_specs):
758 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
759 if matched_index is None:
760 if i in (0, len(fixed_specs) - 1):
761 logger.error('essential spec mismatch, unable to continue')
762 assert 0
763 else:
764 logger.warning('%s do not match, skip', fixed_spec.name)
765 continue
766 result.append((i, matched_index))
767 last_index = matched_index
768
769 return result
770
771 def _create_make_up_actions(self, fixed_spec, synthesized):
772 timestamp = synthesized.timestamp
773 make_up = ActionGroup(
774 timestamp, comment='make up glitch for %s' % fixed_spec.name)
775 for path in set(fixed_spec.entries) & set(synthesized.entries):
776 if fixed_spec[path].at == synthesized[path].at:
777 continue
778 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
779 synthesized[path].at)
780 make_up.add(action)
781
782 if not make_up.actions:
783 return None
784 return make_up
785
786 def build_revlist(self, old, new):
787 """Build revlist.
788
789 Returns:
790 list of rev string
791 """
792 logger.info('build_revlist')
793 revlist = []
794
795 # step 1, find all float and fixed specs in the given range.
796 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800797 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800798 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800799 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800800 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
801 float_specs.pop()
802 assert float_specs
803 for spec in float_specs + fixed_specs:
804 self.spec_manager.parse_spec(spec)
805
806 # step 2, synthesize all fixed specs in the range from float specs.
807 specs = float_specs + [fixed_specs[-1]]
808 actions = []
809 logger.debug('len(specs)=%d', len(specs))
810 for i in range(len(specs) - 1):
811 prev_float = specs[i]
812 next_float = specs[i + 1]
813 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
814 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800815 for action in self.generate_actions_between_specs(prev_float, next_float):
816 if action.timestamp < fixed_specs[0].timestamp:
817 continue
818 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800819 action_groups = self.reorder_actions(actions)
820
821 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
822 synthesized = [spec.copy()]
823 for action_group in action_groups:
824 spec.apply(action_group)
825 synthesized.append(spec.copy())
826
827 # step 3, associate fixed specs with synthesized specs.
828 associated_pairs = self.associate_fixed_and_synthesized_specs(
829 fixed_specs, synthesized)
830
831 # step 4, group actions and cache them
832 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
833 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
834 revlist.append(fixed_specs[fixed_index].name)
835 this_action_groups = []
836
837 # handle glitch
838 if fixed_specs[fixed_index].similar_score(
839 synthesized[synthesized_index]) != 0:
840 assert synthesized[synthesized_index].is_subset(
841 fixed_specs[fixed_index])
842 skipped = set(fixed_specs[fixed_index].entries) - set(
843 synthesized[synthesized_index].entries)
844 if skipped:
845 logger.warning(
846 'between %s and %s, '
847 'bisect-kit cannot analyze commit history of following paths:',
848 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
849 for path in sorted(skipped):
850 logger.warning(' %s', path)
851
852 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
853 synthesized[synthesized_index])
854 if make_up:
855 this_action_groups.append(make_up)
856
857 this_action_groups.extend(
858 action_groups[synthesized_index:next_synthesized_index])
859 for idx, ag in enumerate(this_action_groups, 1):
860 rev = make_intra_rev(fixed_specs[fixed_index].name,
861 fixed_specs[next_fixed_index].name, idx)
862 ag.name = rev
863 revlist.append(rev)
864
865 self.save_action_groups_between_releases(
866 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
867 this_action_groups)
868 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
869
870 return revlist
871
872 def save_action_groups_between_releases(self, old, new, action_groups):
873 data = [ag.serialize() for ag in action_groups]
874
875 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
876 if not os.path.exists(cache_dir):
877 os.makedirs(cache_dir)
878 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
879 with file(cache_filename, 'w') as fp:
880 json.dump(data, fp, indent=4, sort_keys=True)
881
882 def load_action_groups_between_releases(self, old, new):
883 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
884 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
885 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800886 raise errors.InternalError(
887 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800888
889 result = []
890 for data in json.load(file(cache_filename)):
891 result.append(ActionGroup.unserialize(data))
892
893 return result
894
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800895 def view_rev_diff(self, revlist, old, new):
896 assert old in revlist
897 assert new in revlist
898 loaded_action_groups = None
899 old_idx = revlist.index(old)
900 new_idx = revlist.index(new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800901
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800902 for i, rev in enumerate(revlist[old_idx:new_idx + 1], old_idx):
903 rev_old, rev_new, index = parse_intra_rev(rev)
904 if rev_old == rev_new:
905 logger.info('[%d] %s', i, rev)
906 else:
907 if loaded_action_groups != (rev_old, rev_new):
908 action_groups = self.load_action_groups_between_releases(
909 rev_old, rev_new)
910 loaded_action_groups = (rev_old, rev_new)
911 # Indexes inside intra_rev are 1 based.
912 action_group = action_groups[index - 1]
913 summary = action_group.summary(self.code_storage)
914 logger.info('[%d] %s %s', i, rev, summary)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800915
916 def switch(self, rev):
917 # easy case
918 if not re.match(_re_intra_rev, rev):
919 self.spec_manager.sync_disk_state(rev)
920 return
921
922 rev_old, rev_new, idx = parse_intra_rev(rev)
923 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
924 assert 0 <= idx <= len(action_groups)
925 action_groups = action_groups[:idx]
926
927 self.spec_manager.sync_disk_state(rev_old)
928
929 apply_actions(self.code_storage, action_groups, self.root_dir)