blob: 15685eed9dbbcdea0d5296341ad92b060a7c04ab [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
89 m = re.match(_re_intra_rev, s)
90 if m:
91 try:
92 argtype(m.group(1))
93 argtype(m.group(2))
94 return s
95 except cli.ArgTypeError as e:
96 examples = []
97 for example in e.example:
98 examples.append(make_intra_rev(example, example, 10))
99 raise cli.ArgTypeError('Invalid intra rev', examples)
100 raise cli.ArgTypeError('Invalid intra rev',
101 [make_intra_rev('<rev1>', '<rev2>', 10)])
102
103 return argtype_function
104
105
106def _normalize_repo_url(repo_url):
107 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
108 r'https://chrome-internal.googlesource.com/', repo_url)
109 repo_url = re.sub(r'\.git$', '', repo_url)
110 return repo_url
111
112
113class PathSpec(object):
114 """Specified code version of one path.
115
116 Attributes:
117 path: local path, relative to project base dir
118 repo_url: code repository location
119 at: code version, could be git hash or branch name
120 """
121
122 def __init__(self, path, repo_url, at):
123 self.path = path
124 self.repo_url = repo_url
125 self.at = at
126
127 def is_static(self):
128 return git_util.is_git_rev(self.at)
129
130 def __eq__(self, rhs):
131 if self.path != rhs.path:
132 return False
133 if self.at != rhs.at:
134 return False
135 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
136 return False
137 return True
138
139 def __ne__(self, rhs):
140 return not self == rhs
141
142
143class Spec(object):
144 """Collection of PathSpec.
145
146 Spec is analogy to gclient's DEPS and repo's manifest.
147
148 Attributes:
149 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
150 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
151 pinned and the actual version (git commit) may change over time.
152 name: name of this spec, for debugging purpose. usually version number
153 or git hash
154 timestamp: timestamp of this spec
155 path: path of spec
156 entries: paths to PathSpec dict
157 """
158
159 def __init__(self, spec_type, name, timestamp, path, entries=None):
160 self.spec_type = spec_type
161 self.name = name
162 self.timestamp = timestamp
163 self.path = path
164 self.entries = entries
165
166 def copy(self):
167 return copy.deepcopy(self)
168
169 def similar_score(self, rhs):
170 """Calculates similar score to another Spec.
171
172 Returns:
173 score of similarity. Smaller value is more similar.
174 """
175 score = 0
176 for path in set(self.entries) & set(rhs.entries):
177 if rhs[path] == self[path]:
178 continue
179 if rhs[path].at == self[path].at:
180 # it's often that remote repo moved around but should be treated as the
181 # same one
182 score += 0.1
183 else:
184 score += 1
185 score += len(set(self.entries) ^ set(rhs.entries))
186 return score
187
188 def is_static(self):
189 return all(path_spec.is_static() for path_spec in self.entries.values())
190
191 def is_subset(self, rhs):
192 return set(self.entries.keys()) <= set(rhs.entries.keys())
193
194 def __getitem__(self, path):
195 return self.entries[path]
196
197 def __contains__(self, path):
198 return path in self.entries
199
200 def apply(self, action_group):
201 self.timestamp = action_group.timestamp
202 self.name = '(%s)' % self.timestamp
203 for action in action_group.actions:
204 if isinstance(action, GitAddRepo):
205 self.entries[action.path] = PathSpec(action.path, action.repo_url,
206 action.rev)
207 elif isinstance(action, GitCheckoutCommit):
208 self.entries[action.path].at = action.rev
209 elif isinstance(action, GitRemoveRepo):
210 del self.entries[action.path]
211 else:
212 assert 0, 'unknown action: %s' % action.__class__.__name__
213
214 def dump(self):
215 # for debugging
216 print(self.name, self.path, self.timestamp)
217 print('size', len(self.entries))
218 for path, path_spec in sorted(self.entries.items()):
219 print(path, path_spec.at)
220
221 def diff(self, rhs):
222 logger.info('diff between %s and %s', self.name, rhs.name)
223 expect = set(self.entries)
224 actual = set(rhs.entries)
225 common = 0
226 for path in sorted(expect - actual):
227 logger.info('-%s', path)
228 for path in sorted(actual - expect):
229 logger.info('+%s', path)
230 for path in sorted(expect & actual):
231 if self[path] == rhs[path]:
232 common += 1
233 continue
234 if self[path].at != rhs[path].at:
235 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
236 if self[path].repo_url != rhs[path].repo_url:
237 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
238 rhs[path].repo_url)
239 logger.info('and common=%s', common)
240
241
242class Action(object):
243 """Actions describe changes from one Spec to another.
244
245 Attributes:
246 timestamp: action time
247 path: action path, which is relative to project root
248 """
249
250 def __init__(self, timestamp, path):
251 self.timestamp = timestamp
252 self.path = path
253
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800254 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800255 raise NotImplementedError
256
257 def summary(self, _code_storage):
258 raise NotImplementedError
259
260 def __eq__(self, rhs):
261 return self.__dict__ == rhs.__dict__
262
263 def serialize(self):
264 return self.__class__.__name__, self.__dict__
265
266
267def unserialize_action(data):
268 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
269 class_name, values = data
270 assert class_name in [cls.__name__ for cls in classes
271 ], 'unknown action class: %s' % class_name
272 for cls in classes:
273 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800274 action = cls(**values)
275 break
276 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800277
278
279class ActionGroup(object):
280 """Atomic group of Action objects
281
282 This models atomic commits (for example, gerrit topic, or circular
283 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
284 object.
285 """
286
287 def __init__(self, timestamp, comment=None):
288 self.timestamp = timestamp
289 self.name = None
290 self.actions = []
291 self.comment = comment
292
293 def add(self, action):
294 self.actions.append(action)
295
296 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800297 return dict(
298 timestamp=self.timestamp,
299 name=self.name,
300 comment=self.comment,
301 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800302
303 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800304 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800305 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800306 result['comment'] = self.comment
307 result['actions'] = [
308 action.summary(code_storage) for action in self.actions
309 ]
310 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311
312 @staticmethod
313 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800314 ag = ActionGroup(data['timestamp'])
315 ag.name = data['name']
316 ag.comment = data['comment']
317 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800318 ag.add(unserialize_action(x))
319 return ag
320
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800321 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800322 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800323 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324
325
326class GitCheckoutCommit(Action):
327 """Describes a git commit action.
328
329 Attributes:
330 repo_url: the corresponding url of git repo
331 rev: git commit to checkout
332 """
333
334 def __init__(self, timestamp, path, repo_url, rev):
335 super(GitCheckoutCommit, self).__init__(timestamp, path)
336 self.repo_url = repo_url
337 self.rev = rev
338
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800339 def apply(self, code_storage, root_dir):
340 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800341 git_repo = os.path.join(root_dir, self.path)
342 assert git_util.is_git_root(git_repo)
343 git_util.checkout_version(git_repo, self.rev)
344
345 def summary(self, code_storage):
346 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800347 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800348 commit_summary = git_util.get_commit_log(git_root,
349 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800350 except subprocess.CalledProcessError:
351 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
352 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800353 commit_summary = '(unknown)'
354 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
355 return dict(
356 timestamp=self.timestamp,
357 action_type='commit',
358 path=self.path,
359 commit_summary=commit_summary,
360 repo_url=self.repo_url,
361 rev=self.rev,
362 text=text,
363 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800364
365
366class GitAddRepo(Action):
367 """Describes a git repo add action.
368
369 Attributes:
370 repo_url: the corresponding url of git repo to add
371 rev: git commit to checkout
372 """
373
374 def __init__(self, timestamp, path, repo_url, rev):
375 super(GitAddRepo, self).__init__(timestamp, path)
376 self.repo_url = repo_url
377 self.rev = rev
378
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800379 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800380 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800381 assert not os.path.exists(git_repo)
382
383 reference = code_storage.cached_git_root(self.repo_url)
384 git_util.clone(git_repo, self.repo_url, reference=reference)
385 git_util.checkout_version(git_repo, self.rev)
386
387 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800388
389 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800390 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
391 return dict(
392 timestamp=self.timestamp,
393 action_type='add_repo',
394 path=self.path,
395 text=text,
396 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800397
398
399class GitRemoveRepo(Action):
400 """Describes a git repo remove action."""
401
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800402 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800403 assert self.path
404 git_repo = os.path.join(root_dir, self.path)
405 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800406 shutil.rmtree(git_repo)
407
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800408 code_storage.remove_from_project_list(root_dir, self.path)
409
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800411 return dict(
412 timestamp=self.timestamp,
413 action_type='remove_repo',
414 path=self.path,
415 text='remove repo %s' % self.path,
416 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800417
418
419def apply_actions(code_storage, action_groups, root_dir):
420 # Speed optimization: only apply the last one of consecutive commits per
421 # repo. It is possible to optimize further, but need to take care git repo
422 # add/remove within another repo.
423 commits = {}
424
425 def batch_apply(commits):
426 for i, commit_action in sorted(commits.values()):
427 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800428 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800429
430 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800431 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800432 if not isinstance(action, GitCheckoutCommit):
433 break
434 else:
435 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800436 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800437 commits[action.path] = (i, action)
438 continue
439
440 batch_apply(commits)
441 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800442 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800443
444 batch_apply(commits)
445
446
447class SpecManager(object):
448 """Spec related abstract operations.
449
450 This class enumerates Spec instances and switch disk state to Spec.
451
452 In other words, this class abstracts:
453 - discovery of gclient's DEPS and repo's manifest
454 - gclient sync and repo sync
455 """
456
457 def collect_float_spec(self, old, new):
458 """Collects float Spec between two versions.
459
460 This method may fetch spec from network. However, it should not switch tree
461 version state.
462 """
463 raise NotImplementedError
464
465 def collect_fixed_spec(self, old, new):
466 """Collects fixed Spec between two versions.
467
468 This method may fetch spec from network. However, it should not switch tree
469 version state.
470 """
471 raise NotImplementedError
472
473 def parse_spec(self, spec):
474 """Parses information for Spec object.
475
476 Args:
477 spec: Spec object. It specifies what to parse and the parsed information
478 is stored inside.
479 """
480 raise NotImplementedError
481
482 def sync_disk_state(self, rev):
483 """Switch source tree state to given version."""
484 raise NotImplementedError
485
486
487class CodeStorage(object):
488 """Query code history and commit relationship without checkout.
489
490 Because paths inside source tree may be deleted or map to different remote
491 repo in different versions, we cannot query git information of one version
492 but the tree state is at another version. In order to query information
493 without changing tree state and fast, we need out of tree source code
494 storage.
495
496 This class assumes all git repos are mirrored somewhere on local disk.
497 Subclasses just need to implement cached_git_root() which returns the
498 location.
499
500 In other words, this class abstracts operations upon gclient's cache-dir
501 repo's mirror.
502 """
503
504 def cached_git_root(self, repo_url):
505 """The cached path of given remote git repo.
506
507 Args:
508 repo_url: URL of git remote repo
509
510 Returns:
511 path of cache folder
512 """
513 raise NotImplementedError
514
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800515 def add_to_project_list(self, project_root, path, repo_url):
516 raise NotImplementedError
517
518 def remove_from_project_list(self, project_root, path):
519 raise NotImplementedError
520
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800521 def is_ancestor_commit(self, spec, path, old, new):
522 """Determine one commit is ancestor of another.
523
524 Args:
525 spec: Spec object
526 path: local path relative to project root
527 old: commit id
528 new: commit id
529
530 Returns:
531 True if `old` is ancestor of `new`
532 """
533 git_root = self.cached_git_root(spec[path].repo_url)
534 return git_util.is_ancestor_commit(git_root, old, new)
535
536 def get_rev_by_time(self, spec, path, timestamp):
537 """Get commit hash of given spec by time.
538
539 Args:
540 spec: Spec object
541 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800542 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800543
544 Returns:
545 The commit hash of given time. If there are commits with the given
546 timestamp, returns the last commit.
547 """
548 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800549 # spec[path].at is remote reference name. Since git_root is a mirror (not
550 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800551 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800552
553 def get_actions_between_two_commit(self, spec, path, old, new):
554 git_root = self.cached_git_root(spec[path].repo_url)
555 result = []
556 for timestamp, git_rev in git_util.list_commits_between_commits(
557 git_root, old, new):
558 result.append(
559 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
560 return result
561
562 def is_containing_commit(self, spec, path, rev):
563 git_root = self.cached_git_root(spec[path].repo_url)
564 return git_util.is_containing_commit(git_root, rev)
565
566 def are_spec_commits_available(self, spec):
567 for path, path_spec in spec.entries.items():
568 if not path_spec.is_static():
569 continue
570 if not self.is_containing_commit(spec, path, path_spec.at):
571 return False
572 return True
573
574
575class CodeManager(object):
576 """Class to reconstruct historical source tree state.
577
578 This class can reconstruct all moments of source tree state and diffs between
579 them.
580
581 Attributes:
582 root_dir: root path of project source tree
583 spec_manager: SpecManager object
584 code_storage: CodeStorage object
585 """
586
587 def __init__(self, root_dir, spec_manager, code_storage):
588 self.root_dir = root_dir
589 self.spec_manager = spec_manager
590 self.code_storage = code_storage
591
592 def generate_actions_between_specs(self, prev_float, next_float):
593 """Generates actions between two float specs.
594
595 Args:
596 prev_float: start of spec object (exclusive)
597 next_float: end of spec object (inclusive)
598
599 Returns:
600 list of Action object (unordered)
601 """
602 actions = []
603 for path in set(prev_float.entries) | set(next_float.entries):
604
605 # Add repo
606 if path not in prev_float:
607 if next_float[path].is_static():
608 next_at = next_float[path].at
609 else:
610 next_at = self.code_storage.get_rev_by_time(next_float, path,
611 next_float.timestamp)
612 actions.append(
613 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
614 next_at))
615 continue
616
617 # Existing path is floating, enumerates commits until next spec.
618 #
619 # prev_at till_at
620 # prev branch ---> o --------> o --------> o --------> o --------> ...
621 # ^ ^
622 # prev_float.timestamp next_float.timestamp
623 if not prev_float[path].is_static():
624 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
625 prev_float.timestamp)
626 till_at = self.code_storage.get_rev_by_time(prev_float, path,
627 next_float.timestamp)
628
629 actions.extend(
630 self.code_storage.get_actions_between_two_commit(
631 prev_float, path, prev_at, till_at))
632 else:
633 prev_at = till_at = prev_float[path].at
634
635 # At next_float.timestamp.
636 if path not in next_float:
637 # remove repo
638 actions.append(GitRemoveRepo(next_float.timestamp, path))
639 next_at = None
640
641 elif next_float[path].is_static():
642 # pinned to certain commit on different branch
643 next_at = next_float[path].at
644
645 elif next_float[path].at == prev_float[path].at:
646 # keep floating on the same branch
647 next_at = till_at
648
649 else:
650 # switch to another branch
651 # prev_at till_at
652 # prev branch ---> o --------> o --------> o --------> o --------> ...
653 #
654 # next_at
655 # next branch ...... o ------> o --------> o -----> ...
656 # ^ ^
657 # prev_float.timestamp next_float.timestamp
658 next_at = self.code_storage.get_rev_by_time(next_float, path,
659 next_float.timestamp)
660
661 if next_at and next_at != till_at:
662 actions.append(
663 GitCheckoutCommit(next_float.timestamp, path,
664 next_float[path].repo_url, next_at))
665
666 return actions
667
668 def synthesize_fixed_spec(self, float_spec, timestamp):
669 """Synthesizes fixed spec from float spec of given time.
670
671 Args:
672 float_spec: the float spec
673 timestamp: snapshot time
674
675 Returns:
676 Spec object
677 """
678 result = {}
679 for path, path_spec in float_spec.entries.items():
680 if not path_spec.is_static():
681 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
682 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
683
684 result[path] = copy.deepcopy(path_spec)
685
686 name = '%s@%s' % (float_spec.path, timestamp)
687 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
688
689 def reorder_actions(self, actions):
690 """Reorder and cluster actions.
691
692 Args:
693 actions: list of Action objects
694
695 Returns:
696 list of ActionGroup objects
697 """
698 # TODO(kcwu): support atomic commits across repos
699 actions.sort(key=lambda x: x.timestamp)
700 result = []
701 for action in actions:
702 group = ActionGroup(action.timestamp)
703 group.add(action)
704 result.append(group)
705 return result
706
707 def match_spec(self, target, specs, start_index=0):
708 threshold = 3600
709 # ideal_index is the index of last spec before target
710 # begin and end are the range of indexes within threshold (inclusive)
711 ideal_index = None
712 begin, end = None, None
713 for i, spec in enumerate(specs[start_index:], start_index):
714 if spec.timestamp <= target.timestamp:
715 ideal_index = i
716 if abs(spec.timestamp - target.timestamp) < threshold:
717 if begin is None:
718 begin = i
719 end = i
720
721 candidates = []
722 if ideal_index is not None:
723 candidates.append(ideal_index)
724 if begin is not None:
725 candidates.extend(range(begin, end + 1))
726 if not candidates:
727 logger.error('unable to match %s: all specs are after it', target.name)
728 return None
729
730 compatible_candidates = [
731 i for i in candidates if specs[i].is_subset(target)
732 ]
733 if not compatible_candidates:
734 logger.error('unable to match %s: no compatible specs', target.name)
735 spec = specs[candidates[0]]
736 target.diff(spec)
737 return None
738
739 scores = []
740 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800741 # Tie-break: prefer earlier timestamp and smaller difference.
742 if specs[i].timestamp <= target.timestamp:
743 timediff = 0, target.timestamp - specs[i].timestamp
744 else:
745 timediff = 1, specs[i].timestamp - target.timestamp
746 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800747 scores.sort()
748
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800749 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800750 if score != 0:
751 logger.warning('not exactly match (score=%s): %s', score, target.name)
752 target.diff(specs[index])
753
754 if index < ideal_index:
755 logger.warning(
756 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
757 target.name, target.timestamp, specs[index].timestamp,
758 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800759 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800760 if index > ideal_index:
761 logger.warning(
762 'spec committed at %d matched later commit at %d. bad server clock?',
763 target.timestamp, specs[index].timestamp)
764
765 return index
766
767 def associate_fixed_and_synthesized_specs(self, fixed_specs,
768 synthesized_specs):
769 # All fixed specs are snapshot of float specs. Theoretically, they
770 # should be identical to one of the synthesized specs.
771 # However, it's not always true for some reasons --- maybe due to race
772 # condition, maybe due to bugs of this bisect-kit.
773 # To overcome this glitch, we try to match them by similarity instead of
774 # exact match.
775 result = []
776 last_index = 0
777 for i, fixed_spec in enumerate(fixed_specs):
778 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
779 if matched_index is None:
780 if i in (0, len(fixed_specs) - 1):
781 logger.error('essential spec mismatch, unable to continue')
782 assert 0
783 else:
784 logger.warning('%s do not match, skip', fixed_spec.name)
785 continue
786 result.append((i, matched_index))
787 last_index = matched_index
788
789 return result
790
791 def _create_make_up_actions(self, fixed_spec, synthesized):
792 timestamp = synthesized.timestamp
793 make_up = ActionGroup(
794 timestamp, comment='make up glitch for %s' % fixed_spec.name)
795 for path in set(fixed_spec.entries) & set(synthesized.entries):
796 if fixed_spec[path].at == synthesized[path].at:
797 continue
798 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
799 synthesized[path].at)
800 make_up.add(action)
801
802 if not make_up.actions:
803 return None
804 return make_up
805
806 def build_revlist(self, old, new):
807 """Build revlist.
808
809 Returns:
810 list of rev string
811 """
812 logger.info('build_revlist')
813 revlist = []
814
815 # step 1, find all float and fixed specs in the given range.
816 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800817 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800818 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800819 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800820 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
821 float_specs.pop()
822 assert float_specs
823 for spec in float_specs + fixed_specs:
824 self.spec_manager.parse_spec(spec)
825
826 # step 2, synthesize all fixed specs in the range from float specs.
827 specs = float_specs + [fixed_specs[-1]]
828 actions = []
829 logger.debug('len(specs)=%d', len(specs))
830 for i in range(len(specs) - 1):
831 prev_float = specs[i]
832 next_float = specs[i + 1]
833 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
834 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800835 for action in self.generate_actions_between_specs(prev_float, next_float):
836 if action.timestamp < fixed_specs[0].timestamp:
837 continue
838 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800839 action_groups = self.reorder_actions(actions)
840
841 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
842 synthesized = [spec.copy()]
843 for action_group in action_groups:
844 spec.apply(action_group)
845 synthesized.append(spec.copy())
846
847 # step 3, associate fixed specs with synthesized specs.
848 associated_pairs = self.associate_fixed_and_synthesized_specs(
849 fixed_specs, synthesized)
850
851 # step 4, group actions and cache them
852 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
853 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
854 revlist.append(fixed_specs[fixed_index].name)
855 this_action_groups = []
856
857 # handle glitch
858 if fixed_specs[fixed_index].similar_score(
859 synthesized[synthesized_index]) != 0:
860 assert synthesized[synthesized_index].is_subset(
861 fixed_specs[fixed_index])
862 skipped = set(fixed_specs[fixed_index].entries) - set(
863 synthesized[synthesized_index].entries)
864 if skipped:
865 logger.warning(
866 'between %s and %s, '
867 'bisect-kit cannot analyze commit history of following paths:',
868 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
869 for path in sorted(skipped):
870 logger.warning(' %s', path)
871
872 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
873 synthesized[synthesized_index])
874 if make_up:
875 this_action_groups.append(make_up)
876
877 this_action_groups.extend(
878 action_groups[synthesized_index:next_synthesized_index])
879 for idx, ag in enumerate(this_action_groups, 1):
880 rev = make_intra_rev(fixed_specs[fixed_index].name,
881 fixed_specs[next_fixed_index].name, idx)
882 ag.name = rev
883 revlist.append(rev)
884
885 self.save_action_groups_between_releases(
886 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
887 this_action_groups)
888 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
889
890 return revlist
891
892 def save_action_groups_between_releases(self, old, new, action_groups):
893 data = [ag.serialize() for ag in action_groups]
894
895 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
896 if not os.path.exists(cache_dir):
897 os.makedirs(cache_dir)
898 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
899 with file(cache_filename, 'w') as fp:
900 json.dump(data, fp, indent=4, sort_keys=True)
901
902 def load_action_groups_between_releases(self, old, new):
903 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
904 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
905 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800906 raise errors.InternalError(
907 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800908
909 result = []
910 for data in json.load(file(cache_filename)):
911 result.append(ActionGroup.unserialize(data))
912
913 return result
914
Kuang-che Wue80bb872018-11-15 19:45:25 +0800915 def get_rev_detail(self, rev):
916 rev_old, rev_new, index = parse_intra_rev(rev)
917 if rev_old == rev_new:
918 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800919
Kuang-che Wue80bb872018-11-15 19:45:25 +0800920 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
921 # Indexes inside intra_rev are 1 based.
922 action_group = action_groups[index - 1]
923 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800924
925 def switch(self, rev):
926 # easy case
927 if not re.match(_re_intra_rev, rev):
928 self.spec_manager.sync_disk_state(rev)
929 return
930
931 rev_old, rev_new, idx = parse_intra_rev(rev)
932 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
933 assert 0 <= idx <= len(action_groups)
934 action_groups = action_groups[:idx]
935
936 self.spec_manager.sync_disk_state(rev_old)
937
938 apply_actions(self.code_storage, action_groups, self.root_dir)