blob: f301c4bae4730e382f1aaa38cd3417fbb5f140a0 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
20
21from bisect_kit import cli
22from bisect_kit import git_util
23
24logger = logging.getLogger(__name__)
25
26_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
27
28SPEC_FIXED = 'fixed'
29SPEC_FLOAT = 'float'
30_DIFF_CACHE_DIR = 'bisectkit-cache'
31
32
33def make_intra_rev(a, b, index):
34 """Makes intra-rev version string.
35
36 Between two major "named" versions a and b, there are many small changes
37 (commits) in-between. bisect-kit will identify all those instances and bisect
38 them. We give names to those instances and call these names as "intra-rev"
39 which stands for minor version numbers within two major version.
40
41 Note, a+index (without b) is not enough to identify an unique change due to
42 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
43 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
44 in 9900.1.0 and 9901.0.0.
45
46 Args:
47 a: the start version
48 b: the end version
49 index: the index number of changes between a and b
50
51 Returns:
52 the intra-rev version string
53 """
54 return '%s~%s/%d' % (a, b, index)
55
56
57def parse_intra_rev(rev):
58 """Decomposes intra-rev string.
59
60 See comments of make_intra_rev for what is intra-rev.
61
62 Args:
63 rev: intra-rev string or normal version number
64
65 Returns:
66 (start, end, index). If rev is not intra-rev, it must be normal version
67 number and returns (rev, rev, 0).
68 """
69 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080070 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080071 return rev, rev, 0
72
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080073 return m.group(1), m.group(2), int(m.group(3))
74
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080075
76def argtype_intra_rev(argtype):
77 """Validates argument is intra-rev.
78
79 Args:
80 argtype: argtype function which validates major version number
81
82 Returns:
83 A new argtype function which matches intra-rev
84 """
85
86 def argtype_function(s):
87 m = re.match(_re_intra_rev, s)
88 if m:
89 try:
90 argtype(m.group(1))
91 argtype(m.group(2))
92 return s
93 except cli.ArgTypeError as e:
94 examples = []
95 for example in e.example:
96 examples.append(make_intra_rev(example, example, 10))
97 raise cli.ArgTypeError('Invalid intra rev', examples)
98 raise cli.ArgTypeError('Invalid intra rev',
99 [make_intra_rev('<rev1>', '<rev2>', 10)])
100
101 return argtype_function
102
103
104def _normalize_repo_url(repo_url):
105 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
106 r'https://chrome-internal.googlesource.com/', repo_url)
107 repo_url = re.sub(r'\.git$', '', repo_url)
108 return repo_url
109
110
111class PathSpec(object):
112 """Specified code version of one path.
113
114 Attributes:
115 path: local path, relative to project base dir
116 repo_url: code repository location
117 at: code version, could be git hash or branch name
118 """
119
120 def __init__(self, path, repo_url, at):
121 self.path = path
122 self.repo_url = repo_url
123 self.at = at
124
125 def is_static(self):
126 return git_util.is_git_rev(self.at)
127
128 def __eq__(self, rhs):
129 if self.path != rhs.path:
130 return False
131 if self.at != rhs.at:
132 return False
133 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
134 return False
135 return True
136
137 def __ne__(self, rhs):
138 return not self == rhs
139
140
141class Spec(object):
142 """Collection of PathSpec.
143
144 Spec is analogy to gclient's DEPS and repo's manifest.
145
146 Attributes:
147 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
148 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
149 pinned and the actual version (git commit) may change over time.
150 name: name of this spec, for debugging purpose. usually version number
151 or git hash
152 timestamp: timestamp of this spec
153 path: path of spec
154 entries: paths to PathSpec dict
155 """
156
157 def __init__(self, spec_type, name, timestamp, path, entries=None):
158 self.spec_type = spec_type
159 self.name = name
160 self.timestamp = timestamp
161 self.path = path
162 self.entries = entries
163
164 def copy(self):
165 return copy.deepcopy(self)
166
167 def similar_score(self, rhs):
168 """Calculates similar score to another Spec.
169
170 Returns:
171 score of similarity. Smaller value is more similar.
172 """
173 score = 0
174 for path in set(self.entries) & set(rhs.entries):
175 if rhs[path] == self[path]:
176 continue
177 if rhs[path].at == self[path].at:
178 # it's often that remote repo moved around but should be treated as the
179 # same one
180 score += 0.1
181 else:
182 score += 1
183 score += len(set(self.entries) ^ set(rhs.entries))
184 return score
185
186 def is_static(self):
187 return all(path_spec.is_static() for path_spec in self.entries.values())
188
189 def is_subset(self, rhs):
190 return set(self.entries.keys()) <= set(rhs.entries.keys())
191
192 def __getitem__(self, path):
193 return self.entries[path]
194
195 def __contains__(self, path):
196 return path in self.entries
197
198 def apply(self, action_group):
199 self.timestamp = action_group.timestamp
200 self.name = '(%s)' % self.timestamp
201 for action in action_group.actions:
202 if isinstance(action, GitAddRepo):
203 self.entries[action.path] = PathSpec(action.path, action.repo_url,
204 action.rev)
205 elif isinstance(action, GitCheckoutCommit):
206 self.entries[action.path].at = action.rev
207 elif isinstance(action, GitRemoveRepo):
208 del self.entries[action.path]
209 else:
210 assert 0, 'unknown action: %s' % action.__class__.__name__
211
212 def dump(self):
213 # for debugging
214 print(self.name, self.path, self.timestamp)
215 print('size', len(self.entries))
216 for path, path_spec in sorted(self.entries.items()):
217 print(path, path_spec.at)
218
219 def diff(self, rhs):
220 logger.info('diff between %s and %s', self.name, rhs.name)
221 expect = set(self.entries)
222 actual = set(rhs.entries)
223 common = 0
224 for path in sorted(expect - actual):
225 logger.info('-%s', path)
226 for path in sorted(actual - expect):
227 logger.info('+%s', path)
228 for path in sorted(expect & actual):
229 if self[path] == rhs[path]:
230 common += 1
231 continue
232 if self[path].at != rhs[path].at:
233 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
234 if self[path].repo_url != rhs[path].repo_url:
235 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
236 rhs[path].repo_url)
237 logger.info('and common=%s', common)
238
239
240class Action(object):
241 """Actions describe changes from one Spec to another.
242
243 Attributes:
244 timestamp: action time
245 path: action path, which is relative to project root
246 """
247
248 def __init__(self, timestamp, path):
249 self.timestamp = timestamp
250 self.path = path
251
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800252 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800253 raise NotImplementedError
254
255 def summary(self, _code_storage):
256 raise NotImplementedError
257
258 def __eq__(self, rhs):
259 return self.__dict__ == rhs.__dict__
260
261 def serialize(self):
262 return self.__class__.__name__, self.__dict__
263
264
265def unserialize_action(data):
266 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
267 class_name, values = data
268 assert class_name in [cls.__name__ for cls in classes
269 ], 'unknown action class: %s' % class_name
270 for cls in classes:
271 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800272 action = cls(**values)
273 break
274 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800275
276
277class ActionGroup(object):
278 """Atomic group of Action objects
279
280 This models atomic commits (for example, gerrit topic, or circular
281 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
282 object.
283 """
284
285 def __init__(self, timestamp, comment=None):
286 self.timestamp = timestamp
287 self.name = None
288 self.actions = []
289 self.comment = comment
290
291 def add(self, action):
292 self.actions.append(action)
293
294 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800295 return dict(
296 timestamp=self.timestamp,
297 name=self.name,
298 comment=self.comment,
299 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800300
301 def summary(self, code_storage):
302 if self.comment:
303 return self.comment
Kuang-che Wu22455262018-08-03 15:38:29 +0800304 assert self.actions
305 if len(self.actions) > 1:
306 # TODO(kcwu): show details for multiple Actions
307 return '(%d actions)' % len(self.actions)
308 else:
309 return self.actions[0].summary(code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800310
311 @staticmethod
312 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800313 ag = ActionGroup(data['timestamp'])
314 ag.name = data['name']
315 ag.comment = data['comment']
316 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317 ag.add(unserialize_action(x))
318 return ag
319
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800320 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800321 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800322 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323
324
325class GitCheckoutCommit(Action):
326 """Describes a git commit action.
327
328 Attributes:
329 repo_url: the corresponding url of git repo
330 rev: git commit to checkout
331 """
332
333 def __init__(self, timestamp, path, repo_url, rev):
334 super(GitCheckoutCommit, self).__init__(timestamp, path)
335 self.repo_url = repo_url
336 self.rev = rev
337
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800338 def apply(self, code_storage, root_dir):
339 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800340 git_repo = os.path.join(root_dir, self.path)
341 assert git_util.is_git_root(git_repo)
342 git_util.checkout_version(git_repo, self.rev)
343
344 def summary(self, code_storage):
345 git_root = code_storage.cached_git_root(self.repo_url)
346 summary = git_util.get_commit_log(git_root, self.rev).splitlines()[0]
347 return 'commit %s %s %r' % (self.rev[:10], self.path, summary)
348
349
350class GitAddRepo(Action):
351 """Describes a git repo add action.
352
353 Attributes:
354 repo_url: the corresponding url of git repo to add
355 rev: git commit to checkout
356 """
357
358 def __init__(self, timestamp, path, repo_url, rev):
359 super(GitAddRepo, self).__init__(timestamp, path)
360 self.repo_url = repo_url
361 self.rev = rev
362
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800363 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800364 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800365 assert not os.path.exists(git_repo)
366
367 reference = code_storage.cached_git_root(self.repo_url)
368 git_util.clone(git_repo, self.repo_url, reference=reference)
369 git_util.checkout_version(git_repo, self.rev)
370
371 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800372
373 def summary(self, _code_storage):
374 return 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
375
376
377class GitRemoveRepo(Action):
378 """Describes a git repo remove action."""
379
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800380 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800381 assert self.path
382 git_repo = os.path.join(root_dir, self.path)
383 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800384 shutil.rmtree(git_repo)
385
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800386 code_storage.remove_from_project_list(root_dir, self.path)
387
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800388 def summary(self, _code_storage):
389 return 'remove repo %s' % self.path
390
391
392def apply_actions(code_storage, action_groups, root_dir):
393 # Speed optimization: only apply the last one of consecutive commits per
394 # repo. It is possible to optimize further, but need to take care git repo
395 # add/remove within another repo.
396 commits = {}
397
398 def batch_apply(commits):
399 for i, commit_action in sorted(commits.values()):
400 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800401 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800402
403 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800404 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800405 if not isinstance(action, GitCheckoutCommit):
406 break
407 else:
408 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800409 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410 commits[action.path] = (i, action)
411 continue
412
413 batch_apply(commits)
414 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800415 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800416
417 batch_apply(commits)
418
419
420class SpecManager(object):
421 """Spec related abstract operations.
422
423 This class enumerates Spec instances and switch disk state to Spec.
424
425 In other words, this class abstracts:
426 - discovery of gclient's DEPS and repo's manifest
427 - gclient sync and repo sync
428 """
429
430 def collect_float_spec(self, old, new):
431 """Collects float Spec between two versions.
432
433 This method may fetch spec from network. However, it should not switch tree
434 version state.
435 """
436 raise NotImplementedError
437
438 def collect_fixed_spec(self, old, new):
439 """Collects fixed Spec between two versions.
440
441 This method may fetch spec from network. However, it should not switch tree
442 version state.
443 """
444 raise NotImplementedError
445
446 def parse_spec(self, spec):
447 """Parses information for Spec object.
448
449 Args:
450 spec: Spec object. It specifies what to parse and the parsed information
451 is stored inside.
452 """
453 raise NotImplementedError
454
455 def sync_disk_state(self, rev):
456 """Switch source tree state to given version."""
457 raise NotImplementedError
458
459
460class CodeStorage(object):
461 """Query code history and commit relationship without checkout.
462
463 Because paths inside source tree may be deleted or map to different remote
464 repo in different versions, we cannot query git information of one version
465 but the tree state is at another version. In order to query information
466 without changing tree state and fast, we need out of tree source code
467 storage.
468
469 This class assumes all git repos are mirrored somewhere on local disk.
470 Subclasses just need to implement cached_git_root() which returns the
471 location.
472
473 In other words, this class abstracts operations upon gclient's cache-dir
474 repo's mirror.
475 """
476
477 def cached_git_root(self, repo_url):
478 """The cached path of given remote git repo.
479
480 Args:
481 repo_url: URL of git remote repo
482
483 Returns:
484 path of cache folder
485 """
486 raise NotImplementedError
487
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800488 def add_to_project_list(self, project_root, path, repo_url):
489 raise NotImplementedError
490
491 def remove_from_project_list(self, project_root, path):
492 raise NotImplementedError
493
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800494 def is_ancestor_commit(self, spec, path, old, new):
495 """Determine one commit is ancestor of another.
496
497 Args:
498 spec: Spec object
499 path: local path relative to project root
500 old: commit id
501 new: commit id
502
503 Returns:
504 True if `old` is ancestor of `new`
505 """
506 git_root = self.cached_git_root(spec[path].repo_url)
507 return git_util.is_ancestor_commit(git_root, old, new)
508
509 def get_rev_by_time(self, spec, path, timestamp):
510 """Get commit hash of given spec by time.
511
512 Args:
513 spec: Spec object
514 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800515 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800516
517 Returns:
518 The commit hash of given time. If there are commits with the given
519 timestamp, returns the last commit.
520 """
521 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800522 # spec[path].at is remote reference name. Since git_root is a mirror (not
523 # a local checkout), there is no need to convert the name.
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800524 return git_util.get_rev_by_time(git_root, timestamp, branch=spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800525
526 def get_actions_between_two_commit(self, spec, path, old, new):
527 git_root = self.cached_git_root(spec[path].repo_url)
528 result = []
529 for timestamp, git_rev in git_util.list_commits_between_commits(
530 git_root, old, new):
531 result.append(
532 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
533 return result
534
535 def is_containing_commit(self, spec, path, rev):
536 git_root = self.cached_git_root(spec[path].repo_url)
537 return git_util.is_containing_commit(git_root, rev)
538
539 def are_spec_commits_available(self, spec):
540 for path, path_spec in spec.entries.items():
541 if not path_spec.is_static():
542 continue
543 if not self.is_containing_commit(spec, path, path_spec.at):
544 return False
545 return True
546
547
548class CodeManager(object):
549 """Class to reconstruct historical source tree state.
550
551 This class can reconstruct all moments of source tree state and diffs between
552 them.
553
554 Attributes:
555 root_dir: root path of project source tree
556 spec_manager: SpecManager object
557 code_storage: CodeStorage object
558 """
559
560 def __init__(self, root_dir, spec_manager, code_storage):
561 self.root_dir = root_dir
562 self.spec_manager = spec_manager
563 self.code_storage = code_storage
564
565 def generate_actions_between_specs(self, prev_float, next_float):
566 """Generates actions between two float specs.
567
568 Args:
569 prev_float: start of spec object (exclusive)
570 next_float: end of spec object (inclusive)
571
572 Returns:
573 list of Action object (unordered)
574 """
575 actions = []
576 for path in set(prev_float.entries) | set(next_float.entries):
577
578 # Add repo
579 if path not in prev_float:
580 if next_float[path].is_static():
581 next_at = next_float[path].at
582 else:
583 next_at = self.code_storage.get_rev_by_time(next_float, path,
584 next_float.timestamp)
585 actions.append(
586 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
587 next_at))
588 continue
589
590 # Existing path is floating, enumerates commits until next spec.
591 #
592 # prev_at till_at
593 # prev branch ---> o --------> o --------> o --------> o --------> ...
594 # ^ ^
595 # prev_float.timestamp next_float.timestamp
596 if not prev_float[path].is_static():
597 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
598 prev_float.timestamp)
599 till_at = self.code_storage.get_rev_by_time(prev_float, path,
600 next_float.timestamp)
601
602 actions.extend(
603 self.code_storage.get_actions_between_two_commit(
604 prev_float, path, prev_at, till_at))
605 else:
606 prev_at = till_at = prev_float[path].at
607
608 # At next_float.timestamp.
609 if path not in next_float:
610 # remove repo
611 actions.append(GitRemoveRepo(next_float.timestamp, path))
612 next_at = None
613
614 elif next_float[path].is_static():
615 # pinned to certain commit on different branch
616 next_at = next_float[path].at
617
618 elif next_float[path].at == prev_float[path].at:
619 # keep floating on the same branch
620 next_at = till_at
621
622 else:
623 # switch to another branch
624 # prev_at till_at
625 # prev branch ---> o --------> o --------> o --------> o --------> ...
626 #
627 # next_at
628 # next branch ...... o ------> o --------> o -----> ...
629 # ^ ^
630 # prev_float.timestamp next_float.timestamp
631 next_at = self.code_storage.get_rev_by_time(next_float, path,
632 next_float.timestamp)
633
634 if next_at and next_at != till_at:
635 actions.append(
636 GitCheckoutCommit(next_float.timestamp, path,
637 next_float[path].repo_url, next_at))
638
639 return actions
640
641 def synthesize_fixed_spec(self, float_spec, timestamp):
642 """Synthesizes fixed spec from float spec of given time.
643
644 Args:
645 float_spec: the float spec
646 timestamp: snapshot time
647
648 Returns:
649 Spec object
650 """
651 result = {}
652 for path, path_spec in float_spec.entries.items():
653 if not path_spec.is_static():
654 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
655 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
656
657 result[path] = copy.deepcopy(path_spec)
658
659 name = '%s@%s' % (float_spec.path, timestamp)
660 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
661
662 def reorder_actions(self, actions):
663 """Reorder and cluster actions.
664
665 Args:
666 actions: list of Action objects
667
668 Returns:
669 list of ActionGroup objects
670 """
671 # TODO(kcwu): support atomic commits across repos
672 actions.sort(key=lambda x: x.timestamp)
673 result = []
674 for action in actions:
675 group = ActionGroup(action.timestamp)
676 group.add(action)
677 result.append(group)
678 return result
679
680 def match_spec(self, target, specs, start_index=0):
681 threshold = 3600
682 # ideal_index is the index of last spec before target
683 # begin and end are the range of indexes within threshold (inclusive)
684 ideal_index = None
685 begin, end = None, None
686 for i, spec in enumerate(specs[start_index:], start_index):
687 if spec.timestamp <= target.timestamp:
688 ideal_index = i
689 if abs(spec.timestamp - target.timestamp) < threshold:
690 if begin is None:
691 begin = i
692 end = i
693
694 candidates = []
695 if ideal_index is not None:
696 candidates.append(ideal_index)
697 if begin is not None:
698 candidates.extend(range(begin, end + 1))
699 if not candidates:
700 logger.error('unable to match %s: all specs are after it', target.name)
701 return None
702
703 compatible_candidates = [
704 i for i in candidates if specs[i].is_subset(target)
705 ]
706 if not compatible_candidates:
707 logger.error('unable to match %s: no compatible specs', target.name)
708 spec = specs[candidates[0]]
709 target.diff(spec)
710 return None
711
712 scores = []
713 for i in compatible_candidates:
714 scores.append((specs[i].similar_score(target), i))
715 scores.sort()
716
717 score, index = scores[0]
718 if score != 0:
719 logger.warning('not exactly match (score=%s): %s', score, target.name)
720 target.diff(specs[index])
721
722 if index < ideal_index:
723 logger.warning(
724 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
725 target.name, target.timestamp, specs[index].timestamp,
726 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800727 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800728 if index > ideal_index:
729 logger.warning(
730 'spec committed at %d matched later commit at %d. bad server clock?',
731 target.timestamp, specs[index].timestamp)
732
733 return index
734
735 def associate_fixed_and_synthesized_specs(self, fixed_specs,
736 synthesized_specs):
737 # All fixed specs are snapshot of float specs. Theoretically, they
738 # should be identical to one of the synthesized specs.
739 # However, it's not always true for some reasons --- maybe due to race
740 # condition, maybe due to bugs of this bisect-kit.
741 # To overcome this glitch, we try to match them by similarity instead of
742 # exact match.
743 result = []
744 last_index = 0
745 for i, fixed_spec in enumerate(fixed_specs):
746 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
747 if matched_index is None:
748 if i in (0, len(fixed_specs) - 1):
749 logger.error('essential spec mismatch, unable to continue')
750 assert 0
751 else:
752 logger.warning('%s do not match, skip', fixed_spec.name)
753 continue
754 result.append((i, matched_index))
755 last_index = matched_index
756
757 return result
758
759 def _create_make_up_actions(self, fixed_spec, synthesized):
760 timestamp = synthesized.timestamp
761 make_up = ActionGroup(
762 timestamp, comment='make up glitch for %s' % fixed_spec.name)
763 for path in set(fixed_spec.entries) & set(synthesized.entries):
764 if fixed_spec[path].at == synthesized[path].at:
765 continue
766 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
767 synthesized[path].at)
768 make_up.add(action)
769
770 if not make_up.actions:
771 return None
772 return make_up
773
774 def build_revlist(self, old, new):
775 """Build revlist.
776
777 Returns:
778 list of rev string
779 """
780 logger.info('build_revlist')
781 revlist = []
782
783 # step 1, find all float and fixed specs in the given range.
784 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800785 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800786 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800787 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800788 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
789 float_specs.pop()
790 assert float_specs
791 for spec in float_specs + fixed_specs:
792 self.spec_manager.parse_spec(spec)
793
794 # step 2, synthesize all fixed specs in the range from float specs.
795 specs = float_specs + [fixed_specs[-1]]
796 actions = []
797 logger.debug('len(specs)=%d', len(specs))
798 for i in range(len(specs) - 1):
799 prev_float = specs[i]
800 next_float = specs[i + 1]
801 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
802 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800803 for action in self.generate_actions_between_specs(prev_float, next_float):
804 if action.timestamp < fixed_specs[0].timestamp:
805 continue
806 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800807 action_groups = self.reorder_actions(actions)
808
809 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
810 synthesized = [spec.copy()]
811 for action_group in action_groups:
812 spec.apply(action_group)
813 synthesized.append(spec.copy())
814
815 # step 3, associate fixed specs with synthesized specs.
816 associated_pairs = self.associate_fixed_and_synthesized_specs(
817 fixed_specs, synthesized)
818
819 # step 4, group actions and cache them
820 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
821 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
822 revlist.append(fixed_specs[fixed_index].name)
823 this_action_groups = []
824
825 # handle glitch
826 if fixed_specs[fixed_index].similar_score(
827 synthesized[synthesized_index]) != 0:
828 assert synthesized[synthesized_index].is_subset(
829 fixed_specs[fixed_index])
830 skipped = set(fixed_specs[fixed_index].entries) - set(
831 synthesized[synthesized_index].entries)
832 if skipped:
833 logger.warning(
834 'between %s and %s, '
835 'bisect-kit cannot analyze commit history of following paths:',
836 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
837 for path in sorted(skipped):
838 logger.warning(' %s', path)
839
840 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
841 synthesized[synthesized_index])
842 if make_up:
843 this_action_groups.append(make_up)
844
845 this_action_groups.extend(
846 action_groups[synthesized_index:next_synthesized_index])
847 for idx, ag in enumerate(this_action_groups, 1):
848 rev = make_intra_rev(fixed_specs[fixed_index].name,
849 fixed_specs[next_fixed_index].name, idx)
850 ag.name = rev
851 revlist.append(rev)
852
853 self.save_action_groups_between_releases(
854 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
855 this_action_groups)
856 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
857
858 return revlist
859
860 def save_action_groups_between_releases(self, old, new, action_groups):
861 data = [ag.serialize() for ag in action_groups]
862
863 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
864 if not os.path.exists(cache_dir):
865 os.makedirs(cache_dir)
866 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
867 with file(cache_filename, 'w') as fp:
868 json.dump(data, fp, indent=4, sort_keys=True)
869
870 def load_action_groups_between_releases(self, old, new):
871 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
872 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
873 if not os.path.exists(cache_filename):
874 raise Exception('cached revlist not found: %s' % cache_filename)
875
876 result = []
877 for data in json.load(file(cache_filename)):
878 result.append(ActionGroup.unserialize(data))
879
880 return result
881
882 def view_rev_diff(self, old, new):
883 old_base, _, _ = parse_intra_rev(old)
884 _, new_next, _ = parse_intra_rev(new)
885 assert old_base != new_next
886
887 revlist = []
888 rev_summary = {}
889 fixed_specs = self.spec_manager.collect_fixed_spec(old_base, new_next)
890 for i, spec in enumerate(fixed_specs[:-1]):
891 action_groups = self.load_action_groups_between_releases(
892 fixed_specs[i].name, fixed_specs[i + 1].name)
893 revlist.append(spec.name)
894 rev_summary[spec.name] = ''
895 for action_group in action_groups:
896 revlist.append(action_group.name)
897 rev_summary[action_group.name] = action_group.summary(self.code_storage)
898
899 revlist.append(fixed_specs[-1].name)
900 rev_summary[fixed_specs[-1].name] = ''
901
902 old_index = revlist.index(old)
903 new_index = revlist.index(new)
904 for rev in revlist[old_index:new_index + 1]:
905 logger.info('%s %s', rev, rev_summary[rev])
906
907 def switch(self, rev):
908 # easy case
909 if not re.match(_re_intra_rev, rev):
910 self.spec_manager.sync_disk_state(rev)
911 return
912
913 rev_old, rev_new, idx = parse_intra_rev(rev)
914 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
915 assert 0 <= idx <= len(action_groups)
916 action_groups = action_groups[:idx]
917
918 self.spec_manager.sync_disk_state(rev_old)
919
920 apply_actions(self.code_storage, action_groups, self.root_dir)