blob: a0b4ad041056a465c9cdbc4a1b4676ef6b64b5d3 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
22from bisect_kit import git_util
23
24logger = logging.getLogger(__name__)
25
26_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
27
28SPEC_FIXED = 'fixed'
29SPEC_FLOAT = 'float'
30_DIFF_CACHE_DIR = 'bisectkit-cache'
31
32
33def make_intra_rev(a, b, index):
34 """Makes intra-rev version string.
35
36 Between two major "named" versions a and b, there are many small changes
37 (commits) in-between. bisect-kit will identify all those instances and bisect
38 them. We give names to those instances and call these names as "intra-rev"
39 which stands for minor version numbers within two major version.
40
41 Note, a+index (without b) is not enough to identify an unique change due to
42 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
43 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
44 in 9900.1.0 and 9901.0.0.
45
46 Args:
47 a: the start version
48 b: the end version
49 index: the index number of changes between a and b
50
51 Returns:
52 the intra-rev version string
53 """
54 return '%s~%s/%d' % (a, b, index)
55
56
57def parse_intra_rev(rev):
58 """Decomposes intra-rev string.
59
60 See comments of make_intra_rev for what is intra-rev.
61
62 Args:
63 rev: intra-rev string or normal version number
64
65 Returns:
66 (start, end, index). If rev is not intra-rev, it must be normal version
67 number and returns (rev, rev, 0).
68 """
69 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080070 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080071 return rev, rev, 0
72
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080073 return m.group(1), m.group(2), int(m.group(3))
74
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080075
76def argtype_intra_rev(argtype):
77 """Validates argument is intra-rev.
78
79 Args:
80 argtype: argtype function which validates major version number
81
82 Returns:
83 A new argtype function which matches intra-rev
84 """
85
86 def argtype_function(s):
87 m = re.match(_re_intra_rev, s)
88 if m:
89 try:
90 argtype(m.group(1))
91 argtype(m.group(2))
92 return s
93 except cli.ArgTypeError as e:
94 examples = []
95 for example in e.example:
96 examples.append(make_intra_rev(example, example, 10))
97 raise cli.ArgTypeError('Invalid intra rev', examples)
98 raise cli.ArgTypeError('Invalid intra rev',
99 [make_intra_rev('<rev1>', '<rev2>', 10)])
100
101 return argtype_function
102
103
104def _normalize_repo_url(repo_url):
105 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
106 r'https://chrome-internal.googlesource.com/', repo_url)
107 repo_url = re.sub(r'\.git$', '', repo_url)
108 return repo_url
109
110
111class PathSpec(object):
112 """Specified code version of one path.
113
114 Attributes:
115 path: local path, relative to project base dir
116 repo_url: code repository location
117 at: code version, could be git hash or branch name
118 """
119
120 def __init__(self, path, repo_url, at):
121 self.path = path
122 self.repo_url = repo_url
123 self.at = at
124
125 def is_static(self):
126 return git_util.is_git_rev(self.at)
127
128 def __eq__(self, rhs):
129 if self.path != rhs.path:
130 return False
131 if self.at != rhs.at:
132 return False
133 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
134 return False
135 return True
136
137 def __ne__(self, rhs):
138 return not self == rhs
139
140
141class Spec(object):
142 """Collection of PathSpec.
143
144 Spec is analogy to gclient's DEPS and repo's manifest.
145
146 Attributes:
147 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
148 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
149 pinned and the actual version (git commit) may change over time.
150 name: name of this spec, for debugging purpose. usually version number
151 or git hash
152 timestamp: timestamp of this spec
153 path: path of spec
154 entries: paths to PathSpec dict
155 """
156
157 def __init__(self, spec_type, name, timestamp, path, entries=None):
158 self.spec_type = spec_type
159 self.name = name
160 self.timestamp = timestamp
161 self.path = path
162 self.entries = entries
163
164 def copy(self):
165 return copy.deepcopy(self)
166
167 def similar_score(self, rhs):
168 """Calculates similar score to another Spec.
169
170 Returns:
171 score of similarity. Smaller value is more similar.
172 """
173 score = 0
174 for path in set(self.entries) & set(rhs.entries):
175 if rhs[path] == self[path]:
176 continue
177 if rhs[path].at == self[path].at:
178 # it's often that remote repo moved around but should be treated as the
179 # same one
180 score += 0.1
181 else:
182 score += 1
183 score += len(set(self.entries) ^ set(rhs.entries))
184 return score
185
186 def is_static(self):
187 return all(path_spec.is_static() for path_spec in self.entries.values())
188
189 def is_subset(self, rhs):
190 return set(self.entries.keys()) <= set(rhs.entries.keys())
191
192 def __getitem__(self, path):
193 return self.entries[path]
194
195 def __contains__(self, path):
196 return path in self.entries
197
198 def apply(self, action_group):
199 self.timestamp = action_group.timestamp
200 self.name = '(%s)' % self.timestamp
201 for action in action_group.actions:
202 if isinstance(action, GitAddRepo):
203 self.entries[action.path] = PathSpec(action.path, action.repo_url,
204 action.rev)
205 elif isinstance(action, GitCheckoutCommit):
206 self.entries[action.path].at = action.rev
207 elif isinstance(action, GitRemoveRepo):
208 del self.entries[action.path]
209 else:
210 assert 0, 'unknown action: %s' % action.__class__.__name__
211
212 def dump(self):
213 # for debugging
214 print(self.name, self.path, self.timestamp)
215 print('size', len(self.entries))
216 for path, path_spec in sorted(self.entries.items()):
217 print(path, path_spec.at)
218
219 def diff(self, rhs):
220 logger.info('diff between %s and %s', self.name, rhs.name)
221 expect = set(self.entries)
222 actual = set(rhs.entries)
223 common = 0
224 for path in sorted(expect - actual):
225 logger.info('-%s', path)
226 for path in sorted(actual - expect):
227 logger.info('+%s', path)
228 for path in sorted(expect & actual):
229 if self[path] == rhs[path]:
230 common += 1
231 continue
232 if self[path].at != rhs[path].at:
233 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
234 if self[path].repo_url != rhs[path].repo_url:
235 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
236 rhs[path].repo_url)
237 logger.info('and common=%s', common)
238
239
240class Action(object):
241 """Actions describe changes from one Spec to another.
242
243 Attributes:
244 timestamp: action time
245 path: action path, which is relative to project root
246 """
247
248 def __init__(self, timestamp, path):
249 self.timestamp = timestamp
250 self.path = path
251
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800252 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800253 raise NotImplementedError
254
255 def summary(self, _code_storage):
256 raise NotImplementedError
257
258 def __eq__(self, rhs):
259 return self.__dict__ == rhs.__dict__
260
261 def serialize(self):
262 return self.__class__.__name__, self.__dict__
263
264
265def unserialize_action(data):
266 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
267 class_name, values = data
268 assert class_name in [cls.__name__ for cls in classes
269 ], 'unknown action class: %s' % class_name
270 for cls in classes:
271 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800272 action = cls(**values)
273 break
274 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800275
276
277class ActionGroup(object):
278 """Atomic group of Action objects
279
280 This models atomic commits (for example, gerrit topic, or circular
281 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
282 object.
283 """
284
285 def __init__(self, timestamp, comment=None):
286 self.timestamp = timestamp
287 self.name = None
288 self.actions = []
289 self.comment = comment
290
291 def add(self, action):
292 self.actions.append(action)
293
294 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800295 return dict(
296 timestamp=self.timestamp,
297 name=self.name,
298 comment=self.comment,
299 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800300
301 def summary(self, code_storage):
302 if self.comment:
303 return self.comment
Kuang-che Wu22455262018-08-03 15:38:29 +0800304 assert self.actions
305 if len(self.actions) > 1:
306 # TODO(kcwu): show details for multiple Actions
307 return '(%d actions)' % len(self.actions)
308 else:
309 return self.actions[0].summary(code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800310
311 @staticmethod
312 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800313 ag = ActionGroup(data['timestamp'])
314 ag.name = data['name']
315 ag.comment = data['comment']
316 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317 ag.add(unserialize_action(x))
318 return ag
319
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800320 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800321 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800322 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323
324
325class GitCheckoutCommit(Action):
326 """Describes a git commit action.
327
328 Attributes:
329 repo_url: the corresponding url of git repo
330 rev: git commit to checkout
331 """
332
333 def __init__(self, timestamp, path, repo_url, rev):
334 super(GitCheckoutCommit, self).__init__(timestamp, path)
335 self.repo_url = repo_url
336 self.rev = rev
337
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800338 def apply(self, code_storage, root_dir):
339 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800340 git_repo = os.path.join(root_dir, self.path)
341 assert git_util.is_git_root(git_repo)
342 git_util.checkout_version(git_repo, self.rev)
343
344 def summary(self, code_storage):
345 git_root = code_storage.cached_git_root(self.repo_url)
346 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
347 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
348
349
350class GitAddRepo(Action):
351 """Describes a git repo add action.
352
353 Attributes:
354 repo_url: the corresponding url of git repo to add
355 rev: git commit to checkout
356 """
357
358 def __init__(self, timestamp, path, repo_url, rev):
359 super(GitAddRepo, self).__init__(timestamp, path)
360 self.repo_url = repo_url
361 self.rev = rev
362
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800363 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800364 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800365 assert not os.path.exists(git_repo)
366
367 reference = code_storage.cached_git_root(self.repo_url)
368 git_util.clone(git_repo, self.repo_url, reference=reference)
369 git_util.checkout_version(git_repo, self.rev)
370
371 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800372
373 def summary(self, _code_storage):
374 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
375
376
377class GitRemoveRepo(Action):
378 """Describes a git repo remove action."""
379
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800380 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800381 assert self.path
382 git_repo = os.path.join(root_dir, self.path)
383 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800384 shutil.rmtree(git_repo)
385
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800386 code_storage.remove_from_project_list(root_dir, self.path)
387
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800388 def summary(self, _code_storage):
389 return 'remove repo %s' % self.path
390
391
392def apply_actions(code_storage, action_groups, root_dir):
393 # Speed optimization: only apply the last one of consecutive commits per
394 # repo. It is possible to optimize further, but need to take care git repo
395 # add/remove within another repo.
396 commits = {}
397
398 def batch_apply(commits):
399 for i, commit_action in sorted(commits.values()):
400 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800401 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800402
403 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800404 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800405 if not isinstance(action, GitCheckoutCommit):
406 break
407 else:
408 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800409 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410 commits[action.path] = (i, action)
411 continue
412
413 batch_apply(commits)
414 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800415 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800416
417 batch_apply(commits)
418
419
420class SpecManager(object):
421 """Spec related abstract operations.
422
423 This class enumerates Spec instances and switch disk state to Spec.
424
425 In other words, this class abstracts:
426 - discovery of gclient's DEPS and repo's manifest
427 - gclient sync and repo sync
428 """
429
430 def collect_float_spec(self, old, new):
431 """Collects float Spec between two versions.
432
433 This method may fetch spec from network. However, it should not switch tree
434 version state.
435 """
436 raise NotImplementedError
437
438 def collect_fixed_spec(self, old, new):
439 """Collects fixed Spec between two versions.
440
441 This method may fetch spec from network. However, it should not switch tree
442 version state.
443 """
444 raise NotImplementedError
445
446 def parse_spec(self, spec):
447 """Parses information for Spec object.
448
449 Args:
450 spec: Spec object. It specifies what to parse and the parsed information
451 is stored inside.
452 """
453 raise NotImplementedError
454
455 def sync_disk_state(self, rev):
456 """Switch source tree state to given version."""
457 raise NotImplementedError
458
459
460class CodeStorage(object):
461 """Query code history and commit relationship without checkout.
462
463 Because paths inside source tree may be deleted or map to different remote
464 repo in different versions, we cannot query git information of one version
465 but the tree state is at another version. In order to query information
466 without changing tree state and fast, we need out of tree source code
467 storage.
468
469 This class assumes all git repos are mirrored somewhere on local disk.
470 Subclasses just need to implement cached_git_root() which returns the
471 location.
472
473 In other words, this class abstracts operations upon gclient's cache-dir
474 repo's mirror.
475 """
476
477 def cached_git_root(self, repo_url):
478 """The cached path of given remote git repo.
479
480 Args:
481 repo_url: URL of git remote repo
482
483 Returns:
484 path of cache folder
485 """
486 raise NotImplementedError
487
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800488 def add_to_project_list(self, project_root, path, repo_url):
489 raise NotImplementedError
490
491 def remove_from_project_list(self, project_root, path):
492 raise NotImplementedError
493
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800494 def is_ancestor_commit(self, spec, path, old, new):
495 """Determine one commit is ancestor of another.
496
497 Args:
498 spec: Spec object
499 path: local path relative to project root
500 old: commit id
501 new: commit id
502
503 Returns:
504 True if `old` is ancestor of `new`
505 """
506 git_root = self.cached_git_root(spec[path].repo_url)
507 return git_util.is_ancestor_commit(git_root, old, new)
508
509 def get_rev_by_time(self, spec, path, timestamp):
510 """Get commit hash of given spec by time.
511
512 Args:
513 spec: Spec object
514 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800515 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800516
517 Returns:
518 The commit hash of given time. If there are commits with the given
519 timestamp, returns the last commit.
520 """
521 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800522 # spec[path].at is remote reference name. Since git_root is a mirror (not
523 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800524 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800525
526 def get_actions_between_two_commit(self, spec, path, old, new):
527 git_root = self.cached_git_root(spec[path].repo_url)
528 result = []
529 for timestamp, git_rev in git_util.list_commits_between_commits(
530 git_root, old, new):
531 result.append(
532 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
533 return result
534
535 def is_containing_commit(self, spec, path, rev):
536 git_root = self.cached_git_root(spec[path].repo_url)
537 return git_util.is_containing_commit(git_root, rev)
538
539 def are_spec_commits_available(self, spec):
540 for path, path_spec in spec.entries.items():
541 if not path_spec.is_static():
542 continue
543 if not self.is_containing_commit(spec, path, path_spec.at):
544 return False
545 return True
546
547
548class CodeManager(object):
549 """Class to reconstruct historical source tree state.
550
551 This class can reconstruct all moments of source tree state and diffs between
552 them.
553
554 Attributes:
555 root_dir: root path of project source tree
556 spec_manager: SpecManager object
557 code_storage: CodeStorage object
558 """
559
560 def __init__(self, root_dir, spec_manager, code_storage):
561 self.root_dir = root_dir
562 self.spec_manager = spec_manager
563 self.code_storage = code_storage
564
565 def generate_actions_between_specs(self, prev_float, next_float):
566 """Generates actions between two float specs.
567
568 Args:
569 prev_float: start of spec object (exclusive)
570 next_float: end of spec object (inclusive)
571
572 Returns:
573 list of Action object (unordered)
574 """
575 actions = []
576 for path in set(prev_float.entries) | set(next_float.entries):
577
578 # Add repo
579 if path not in prev_float:
580 if next_float[path].is_static():
581 next_at = next_float[path].at
582 else:
583 next_at = self.code_storage.get_rev_by_time(next_float, path,
584 next_float.timestamp)
585 actions.append(
586 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
587 next_at))
588 continue
589
590 # Existing path is floating, enumerates commits until next spec.
591 #
592 # prev_at till_at
593 # prev branch ---> o --------> o --------> o --------> o --------> ...
594 # ^ ^
595 # prev_float.timestamp next_float.timestamp
596 if not prev_float[path].is_static():
597 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
598 prev_float.timestamp)
599 till_at = self.code_storage.get_rev_by_time(prev_float, path,
600 next_float.timestamp)
601
602 actions.extend(
603 self.code_storage.get_actions_between_two_commit(
604 prev_float, path, prev_at, till_at))
605 else:
606 prev_at = till_at = prev_float[path].at
607
608 # At next_float.timestamp.
609 if path not in next_float:
610 # remove repo
611 actions.append(GitRemoveRepo(next_float.timestamp, path))
612 next_at = None
613
614 elif next_float[path].is_static():
615 # pinned to certain commit on different branch
616 next_at = next_float[path].at
617
618 elif next_float[path].at == prev_float[path].at:
619 # keep floating on the same branch
620 next_at = till_at
621
622 else:
623 # switch to another branch
624 # prev_at till_at
625 # prev branch ---> o --------> o --------> o --------> o --------> ...
626 #
627 # next_at
628 # next branch ...... o ------> o --------> o -----> ...
629 # ^ ^
630 # prev_float.timestamp next_float.timestamp
631 next_at = self.code_storage.get_rev_by_time(next_float, path,
632 next_float.timestamp)
633
634 if next_at and next_at != till_at:
635 actions.append(
636 GitCheckoutCommit(next_float.timestamp, path,
637 next_float[path].repo_url, next_at))
638
639 return actions
640
641 def synthesize_fixed_spec(self, float_spec, timestamp):
642 """Synthesizes fixed spec from float spec of given time.
643
644 Args:
645 float_spec: the float spec
646 timestamp: snapshot time
647
648 Returns:
649 Spec object
650 """
651 result = {}
652 for path, path_spec in float_spec.entries.items():
653 if not path_spec.is_static():
654 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
655 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
656
657 result[path] = copy.deepcopy(path_spec)
658
659 name = '%s@%s' % (float_spec.path, timestamp)
660 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
661
662 def reorder_actions(self, actions):
663 """Reorder and cluster actions.
664
665 Args:
666 actions: list of Action objects
667
668 Returns:
669 list of ActionGroup objects
670 """
671 # TODO(kcwu): support atomic commits across repos
672 actions.sort(key=lambda x: x.timestamp)
673 result = []
674 for action in actions:
675 group = ActionGroup(action.timestamp)
676 group.add(action)
677 result.append(group)
678 return result
679
680 def match_spec(self, target, specs, start_index=0):
681 threshold = 3600
682 # ideal_index is the index of last spec before target
683 # begin and end are the range of indexes within threshold (inclusive)
684 ideal_index = None
685 begin, end = None, None
686 for i, spec in enumerate(specs[start_index:], start_index):
687 if spec.timestamp <= target.timestamp:
688 ideal_index = i
689 if abs(spec.timestamp - target.timestamp) < threshold:
690 if begin is None:
691 begin = i
692 end = i
693
694 candidates = []
695 if ideal_index is not None:
696 candidates.append(ideal_index)
697 if begin is not None:
698 candidates.extend(range(begin, end + 1))
699 if not candidates:
700 logger.error('unable to match %s: all specs are after it', target.name)
701 return None
702
703 compatible_candidates = [
704 i for i in candidates if specs[i].is_subset(target)
705 ]
706 if not compatible_candidates:
707 logger.error('unable to match %s: no compatible specs', target.name)
708 spec = specs[candidates[0]]
709 target.diff(spec)
710 return None
711
712 scores = []
713 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800714 # Tie-break: prefer earlier timestamp and smaller difference.
715 if specs[i].timestamp <= target.timestamp:
716 timediff = 0, target.timestamp - specs[i].timestamp
717 else:
718 timediff = 1, specs[i].timestamp - target.timestamp
719 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800720 scores.sort()
721
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800722 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800723 if score != 0:
724 logger.warning('not exactly match (score=%s): %s', score, target.name)
725 target.diff(specs[index])
726
727 if index < ideal_index:
728 logger.warning(
729 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
730 target.name, target.timestamp, specs[index].timestamp,
731 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800732 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800733 if index > ideal_index:
734 logger.warning(
735 'spec committed at %d matched later commit at %d. bad server clock?',
736 target.timestamp, specs[index].timestamp)
737
738 return index
739
740 def associate_fixed_and_synthesized_specs(self, fixed_specs,
741 synthesized_specs):
742 # All fixed specs are snapshot of float specs. Theoretically, they
743 # should be identical to one of the synthesized specs.
744 # However, it's not always true for some reasons --- maybe due to race
745 # condition, maybe due to bugs of this bisect-kit.
746 # To overcome this glitch, we try to match them by similarity instead of
747 # exact match.
748 result = []
749 last_index = 0
750 for i, fixed_spec in enumerate(fixed_specs):
751 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
752 if matched_index is None:
753 if i in (0, len(fixed_specs) - 1):
754 logger.error('essential spec mismatch, unable to continue')
755 assert 0
756 else:
757 logger.warning('%s do not match, skip', fixed_spec.name)
758 continue
759 result.append((i, matched_index))
760 last_index = matched_index
761
762 return result
763
764 def _create_make_up_actions(self, fixed_spec, synthesized):
765 timestamp = synthesized.timestamp
766 make_up = ActionGroup(
767 timestamp, comment='make up glitch for %s' % fixed_spec.name)
768 for path in set(fixed_spec.entries) & set(synthesized.entries):
769 if fixed_spec[path].at == synthesized[path].at:
770 continue
771 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
772 synthesized[path].at)
773 make_up.add(action)
774
775 if not make_up.actions:
776 return None
777 return make_up
778
779 def build_revlist(self, old, new):
780 """Build revlist.
781
782 Returns:
783 list of rev string
784 """
785 logger.info('build_revlist')
786 revlist = []
787
788 # step 1, find all float and fixed specs in the given range.
789 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800790 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800791 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800792 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800793 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
794 float_specs.pop()
795 assert float_specs
796 for spec in float_specs + fixed_specs:
797 self.spec_manager.parse_spec(spec)
798
799 # step 2, synthesize all fixed specs in the range from float specs.
800 specs = float_specs + [fixed_specs[-1]]
801 actions = []
802 logger.debug('len(specs)=%d', len(specs))
803 for i in range(len(specs) - 1):
804 prev_float = specs[i]
805 next_float = specs[i + 1]
806 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
807 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800808 for action in self.generate_actions_between_specs(prev_float, next_float):
809 if action.timestamp < fixed_specs[0].timestamp:
810 continue
811 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800812 action_groups = self.reorder_actions(actions)
813
814 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
815 synthesized = [spec.copy()]
816 for action_group in action_groups:
817 spec.apply(action_group)
818 synthesized.append(spec.copy())
819
820 # step 3, associate fixed specs with synthesized specs.
821 associated_pairs = self.associate_fixed_and_synthesized_specs(
822 fixed_specs, synthesized)
823
824 # step 4, group actions and cache them
825 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
826 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
827 revlist.append(fixed_specs[fixed_index].name)
828 this_action_groups = []
829
830 # handle glitch
831 if fixed_specs[fixed_index].similar_score(
832 synthesized[synthesized_index]) != 0:
833 assert synthesized[synthesized_index].is_subset(
834 fixed_specs[fixed_index])
835 skipped = set(fixed_specs[fixed_index].entries) - set(
836 synthesized[synthesized_index].entries)
837 if skipped:
838 logger.warning(
839 'between %s and %s, '
840 'bisect-kit cannot analyze commit history of following paths:',
841 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
842 for path in sorted(skipped):
843 logger.warning(' %s', path)
844
845 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
846 synthesized[synthesized_index])
847 if make_up:
848 this_action_groups.append(make_up)
849
850 this_action_groups.extend(
851 action_groups[synthesized_index:next_synthesized_index])
852 for idx, ag in enumerate(this_action_groups, 1):
853 rev = make_intra_rev(fixed_specs[fixed_index].name,
854 fixed_specs[next_fixed_index].name, idx)
855 ag.name = rev
856 revlist.append(rev)
857
858 self.save_action_groups_between_releases(
859 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
860 this_action_groups)
861 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
862
863 return revlist
864
865 def save_action_groups_between_releases(self, old, new, action_groups):
866 data = [ag.serialize() for ag in action_groups]
867
868 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
869 if not os.path.exists(cache_dir):
870 os.makedirs(cache_dir)
871 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
872 with file(cache_filename, 'w') as fp:
873 json.dump(data, fp, indent=4, sort_keys=True)
874
875 def load_action_groups_between_releases(self, old, new):
876 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
877 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
878 if not os.path.exists(cache_filename):
879 raise Exception('cached revlist not found: %s' % cache_filename)
880
881 result = []
882 for data in json.load(file(cache_filename)):
883 result.append(ActionGroup.unserialize(data))
884
885 return result
886
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800887 def view_rev_diff(self, revlist, old, new):
888 assert old in revlist
889 assert new in revlist
890 loaded_action_groups = None
891 old_idx = revlist.index(old)
892 new_idx = revlist.index(new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800893
Kuang-che Wu81aecc02018-10-31 19:37:32 +0800894 for i, rev in enumerate(revlist[old_idx:new_idx + 1], old_idx):
895 rev_old, rev_new, index = parse_intra_rev(rev)
896 if rev_old == rev_new:
897 logger.info('[%d] %s', i, rev)
898 else:
899 if loaded_action_groups != (rev_old, rev_new):
900 action_groups = self.load_action_groups_between_releases(
901 rev_old, rev_new)
902 loaded_action_groups = (rev_old, rev_new)
903 # Indexes inside intra_rev are 1 based.
904 action_group = action_groups[index - 1]
905 summary = action_group.summary(self.code_storage)
906 logger.info('[%d] %s %s', i, rev, summary)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800907
908 def switch(self, rev):
909 # easy case
910 if not re.match(_re_intra_rev, rev):
911 self.spec_manager.sync_disk_state(rev)
912 return
913
914 rev_old, rev_new, idx = parse_intra_rev(rev)
915 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
916 assert 0 <= idx <= len(action_groups)
917 action_groups = action_groups[:idx]
918
919 self.spec_manager.sync_disk_state(rev_old)
920
921 apply_actions(self.code_storage, action_groups, self.root_dir)