blob: 5b4a0958a9c2389d5b9effff762179ba541bb324 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
231 common = 0
232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
238 common += 1
239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
245 logger.info('and common=%s', common)
246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800387 assert not os.path.exists(git_repo)
388
389 reference = code_storage.cached_git_root(self.repo_url)
390 git_util.clone(git_repo, self.repo_url, reference=reference)
391 git_util.checkout_version(git_repo, self.rev)
392
393 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800394
395 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800396 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
397 return dict(
398 timestamp=self.timestamp,
399 action_type='add_repo',
400 path=self.path,
401 text=text,
402 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800403
404
405class GitRemoveRepo(Action):
406 """Describes a git repo remove action."""
407
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800408 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800409 assert self.path
410 git_repo = os.path.join(root_dir, self.path)
411 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800412 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
413 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800414 shutil.rmtree(git_repo)
415
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800416 code_storage.remove_from_project_list(root_dir, self.path)
417
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800418 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800419 return dict(
420 timestamp=self.timestamp,
421 action_type='remove_repo',
422 path=self.path,
423 text='remove repo %s' % self.path,
424 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800425
426
427def apply_actions(code_storage, action_groups, root_dir):
428 # Speed optimization: only apply the last one of consecutive commits per
429 # repo. It is possible to optimize further, but need to take care git repo
430 # add/remove within another repo.
431 commits = {}
432
433 def batch_apply(commits):
434 for i, commit_action in sorted(commits.values()):
435 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800436 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800437
438 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800439 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800440 if not isinstance(action, GitCheckoutCommit):
441 break
442 else:
443 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800444 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800445 commits[action.path] = (i, action)
446 continue
447
448 batch_apply(commits)
449 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800450 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800451
452 batch_apply(commits)
453
454
455class SpecManager(object):
456 """Spec related abstract operations.
457
458 This class enumerates Spec instances and switch disk state to Spec.
459
460 In other words, this class abstracts:
461 - discovery of gclient's DEPS and repo's manifest
462 - gclient sync and repo sync
463 """
464
465 def collect_float_spec(self, old, new):
466 """Collects float Spec between two versions.
467
468 This method may fetch spec from network. However, it should not switch tree
469 version state.
470 """
471 raise NotImplementedError
472
473 def collect_fixed_spec(self, old, new):
474 """Collects fixed Spec between two versions.
475
476 This method may fetch spec from network. However, it should not switch tree
477 version state.
478 """
479 raise NotImplementedError
480
481 def parse_spec(self, spec):
482 """Parses information for Spec object.
483
484 Args:
485 spec: Spec object. It specifies what to parse and the parsed information
486 is stored inside.
487 """
488 raise NotImplementedError
489
490 def sync_disk_state(self, rev):
491 """Switch source tree state to given version."""
492 raise NotImplementedError
493
494
495class CodeStorage(object):
496 """Query code history and commit relationship without checkout.
497
498 Because paths inside source tree may be deleted or map to different remote
499 repo in different versions, we cannot query git information of one version
500 but the tree state is at another version. In order to query information
501 without changing tree state and fast, we need out of tree source code
502 storage.
503
504 This class assumes all git repos are mirrored somewhere on local disk.
505 Subclasses just need to implement cached_git_root() which returns the
506 location.
507
508 In other words, this class abstracts operations upon gclient's cache-dir
509 repo's mirror.
510 """
511
512 def cached_git_root(self, repo_url):
513 """The cached path of given remote git repo.
514
515 Args:
516 repo_url: URL of git remote repo
517
518 Returns:
519 path of cache folder
520 """
521 raise NotImplementedError
522
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800523 def add_to_project_list(self, project_root, path, repo_url):
524 raise NotImplementedError
525
526 def remove_from_project_list(self, project_root, path):
527 raise NotImplementedError
528
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800529 def is_ancestor_commit(self, spec, path, old, new):
530 """Determine one commit is ancestor of another.
531
532 Args:
533 spec: Spec object
534 path: local path relative to project root
535 old: commit id
536 new: commit id
537
538 Returns:
539 True if `old` is ancestor of `new`
540 """
541 git_root = self.cached_git_root(spec[path].repo_url)
542 return git_util.is_ancestor_commit(git_root, old, new)
543
544 def get_rev_by_time(self, spec, path, timestamp):
545 """Get commit hash of given spec by time.
546
547 Args:
548 spec: Spec object
549 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800550 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800551
552 Returns:
553 The commit hash of given time. If there are commits with the given
554 timestamp, returns the last commit.
555 """
556 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800557 # spec[path].at is remote reference name. Since git_root is a mirror (not
558 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800559 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800560
561 def get_actions_between_two_commit(self, spec, path, old, new):
562 git_root = self.cached_git_root(spec[path].repo_url)
563 result = []
564 for timestamp, git_rev in git_util.list_commits_between_commits(
565 git_root, old, new):
566 result.append(
567 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
568 return result
569
570 def is_containing_commit(self, spec, path, rev):
571 git_root = self.cached_git_root(spec[path].repo_url)
572 return git_util.is_containing_commit(git_root, rev)
573
574 def are_spec_commits_available(self, spec):
575 for path, path_spec in spec.entries.items():
576 if not path_spec.is_static():
577 continue
578 if not self.is_containing_commit(spec, path, path_spec.at):
579 return False
580 return True
581
582
583class CodeManager(object):
584 """Class to reconstruct historical source tree state.
585
586 This class can reconstruct all moments of source tree state and diffs between
587 them.
588
589 Attributes:
590 root_dir: root path of project source tree
591 spec_manager: SpecManager object
592 code_storage: CodeStorage object
593 """
594
595 def __init__(self, root_dir, spec_manager, code_storage):
596 self.root_dir = root_dir
597 self.spec_manager = spec_manager
598 self.code_storage = code_storage
599
600 def generate_actions_between_specs(self, prev_float, next_float):
601 """Generates actions between two float specs.
602
603 Args:
604 prev_float: start of spec object (exclusive)
605 next_float: end of spec object (inclusive)
606
607 Returns:
608 list of Action object (unordered)
609 """
610 actions = []
611 for path in set(prev_float.entries) | set(next_float.entries):
612
613 # Add repo
614 if path not in prev_float:
615 if next_float[path].is_static():
616 next_at = next_float[path].at
617 else:
618 next_at = self.code_storage.get_rev_by_time(next_float, path,
619 next_float.timestamp)
620 actions.append(
621 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
622 next_at))
623 continue
624
625 # Existing path is floating, enumerates commits until next spec.
626 #
627 # prev_at till_at
628 # prev branch ---> o --------> o --------> o --------> o --------> ...
629 # ^ ^
630 # prev_float.timestamp next_float.timestamp
631 if not prev_float[path].is_static():
632 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
633 prev_float.timestamp)
634 till_at = self.code_storage.get_rev_by_time(prev_float, path,
635 next_float.timestamp)
636
637 actions.extend(
638 self.code_storage.get_actions_between_two_commit(
639 prev_float, path, prev_at, till_at))
640 else:
641 prev_at = till_at = prev_float[path].at
642
643 # At next_float.timestamp.
644 if path not in next_float:
645 # remove repo
646 actions.append(GitRemoveRepo(next_float.timestamp, path))
647 next_at = None
648
649 elif next_float[path].is_static():
650 # pinned to certain commit on different branch
651 next_at = next_float[path].at
652
653 elif next_float[path].at == prev_float[path].at:
654 # keep floating on the same branch
655 next_at = till_at
656
657 else:
658 # switch to another branch
659 # prev_at till_at
660 # prev branch ---> o --------> o --------> o --------> o --------> ...
661 #
662 # next_at
663 # next branch ...... o ------> o --------> o -----> ...
664 # ^ ^
665 # prev_float.timestamp next_float.timestamp
666 next_at = self.code_storage.get_rev_by_time(next_float, path,
667 next_float.timestamp)
668
669 if next_at and next_at != till_at:
670 actions.append(
671 GitCheckoutCommit(next_float.timestamp, path,
672 next_float[path].repo_url, next_at))
673
674 return actions
675
676 def synthesize_fixed_spec(self, float_spec, timestamp):
677 """Synthesizes fixed spec from float spec of given time.
678
679 Args:
680 float_spec: the float spec
681 timestamp: snapshot time
682
683 Returns:
684 Spec object
685 """
686 result = {}
687 for path, path_spec in float_spec.entries.items():
688 if not path_spec.is_static():
689 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
690 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
691
692 result[path] = copy.deepcopy(path_spec)
693
694 name = '%s@%s' % (float_spec.path, timestamp)
695 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
696
697 def reorder_actions(self, actions):
698 """Reorder and cluster actions.
699
700 Args:
701 actions: list of Action objects
702
703 Returns:
704 list of ActionGroup objects
705 """
706 # TODO(kcwu): support atomic commits across repos
707 actions.sort(key=lambda x: x.timestamp)
708 result = []
709 for action in actions:
710 group = ActionGroup(action.timestamp)
711 group.add(action)
712 result.append(group)
713 return result
714
715 def match_spec(self, target, specs, start_index=0):
716 threshold = 3600
717 # ideal_index is the index of last spec before target
718 # begin and end are the range of indexes within threshold (inclusive)
719 ideal_index = None
720 begin, end = None, None
721 for i, spec in enumerate(specs[start_index:], start_index):
722 if spec.timestamp <= target.timestamp:
723 ideal_index = i
724 if abs(spec.timestamp - target.timestamp) < threshold:
725 if begin is None:
726 begin = i
727 end = i
728
729 candidates = []
730 if ideal_index is not None:
731 candidates.append(ideal_index)
732 if begin is not None:
733 candidates.extend(range(begin, end + 1))
734 if not candidates:
735 logger.error('unable to match %s: all specs are after it', target.name)
736 return None
737
738 compatible_candidates = [
739 i for i in candidates if specs[i].is_subset(target)
740 ]
741 if not compatible_candidates:
742 logger.error('unable to match %s: no compatible specs', target.name)
743 spec = specs[candidates[0]]
744 target.diff(spec)
745 return None
746
747 scores = []
748 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800749 # Tie-break: prefer earlier timestamp and smaller difference.
750 if specs[i].timestamp <= target.timestamp:
751 timediff = 0, target.timestamp - specs[i].timestamp
752 else:
753 timediff = 1, specs[i].timestamp - target.timestamp
754 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800755 scores.sort()
756
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800757 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800758 if score != 0:
759 logger.warning('not exactly match (score=%s): %s', score, target.name)
760 target.diff(specs[index])
761
762 if index < ideal_index:
763 logger.warning(
764 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
765 target.name, target.timestamp, specs[index].timestamp,
766 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800767 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800768 if index > ideal_index:
769 logger.warning(
770 'spec committed at %d matched later commit at %d. bad server clock?',
771 target.timestamp, specs[index].timestamp)
772
773 return index
774
775 def associate_fixed_and_synthesized_specs(self, fixed_specs,
776 synthesized_specs):
777 # All fixed specs are snapshot of float specs. Theoretically, they
778 # should be identical to one of the synthesized specs.
779 # However, it's not always true for some reasons --- maybe due to race
780 # condition, maybe due to bugs of this bisect-kit.
781 # To overcome this glitch, we try to match them by similarity instead of
782 # exact match.
783 result = []
784 last_index = 0
785 for i, fixed_spec in enumerate(fixed_specs):
786 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
787 if matched_index is None:
788 if i in (0, len(fixed_specs) - 1):
789 logger.error('essential spec mismatch, unable to continue')
790 assert 0
791 else:
792 logger.warning('%s do not match, skip', fixed_spec.name)
793 continue
794 result.append((i, matched_index))
795 last_index = matched_index
796
797 return result
798
799 def _create_make_up_actions(self, fixed_spec, synthesized):
800 timestamp = synthesized.timestamp
801 make_up = ActionGroup(
802 timestamp, comment='make up glitch for %s' % fixed_spec.name)
803 for path in set(fixed_spec.entries) & set(synthesized.entries):
804 if fixed_spec[path].at == synthesized[path].at:
805 continue
806 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
807 synthesized[path].at)
808 make_up.add(action)
809
810 if not make_up.actions:
811 return None
812 return make_up
813
814 def build_revlist(self, old, new):
815 """Build revlist.
816
817 Returns:
818 list of rev string
819 """
820 logger.info('build_revlist')
821 revlist = []
822
823 # step 1, find all float and fixed specs in the given range.
824 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800825 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800826 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800827 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800828 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
829 float_specs.pop()
830 assert float_specs
831 for spec in float_specs + fixed_specs:
832 self.spec_manager.parse_spec(spec)
833
834 # step 2, synthesize all fixed specs in the range from float specs.
835 specs = float_specs + [fixed_specs[-1]]
836 actions = []
837 logger.debug('len(specs)=%d', len(specs))
838 for i in range(len(specs) - 1):
839 prev_float = specs[i]
840 next_float = specs[i + 1]
841 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
842 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wu1ad2c0e2019-02-26 00:41:10 +0800843 actions += self.generate_actions_between_specs(prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800844 action_groups = self.reorder_actions(actions)
845
846 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
847 synthesized = [spec.copy()]
848 for action_group in action_groups:
849 spec.apply(action_group)
850 synthesized.append(spec.copy())
851
852 # step 3, associate fixed specs with synthesized specs.
853 associated_pairs = self.associate_fixed_and_synthesized_specs(
854 fixed_specs, synthesized)
855
856 # step 4, group actions and cache them
857 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
858 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
859 revlist.append(fixed_specs[fixed_index].name)
860 this_action_groups = []
861
862 # handle glitch
863 if fixed_specs[fixed_index].similar_score(
864 synthesized[synthesized_index]) != 0:
865 assert synthesized[synthesized_index].is_subset(
866 fixed_specs[fixed_index])
867 skipped = set(fixed_specs[fixed_index].entries) - set(
868 synthesized[synthesized_index].entries)
869 if skipped:
870 logger.warning(
871 'between %s and %s, '
872 'bisect-kit cannot analyze commit history of following paths:',
873 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
874 for path in sorted(skipped):
875 logger.warning(' %s', path)
876
877 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
878 synthesized[synthesized_index])
879 if make_up:
880 this_action_groups.append(make_up)
881
882 this_action_groups.extend(
883 action_groups[synthesized_index:next_synthesized_index])
884 for idx, ag in enumerate(this_action_groups, 1):
885 rev = make_intra_rev(fixed_specs[fixed_index].name,
886 fixed_specs[next_fixed_index].name, idx)
887 ag.name = rev
888 revlist.append(rev)
889
890 self.save_action_groups_between_releases(
891 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
892 this_action_groups)
893 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
894
895 return revlist
896
897 def save_action_groups_between_releases(self, old, new, action_groups):
898 data = [ag.serialize() for ag in action_groups]
899
900 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
901 if not os.path.exists(cache_dir):
902 os.makedirs(cache_dir)
903 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
904 with file(cache_filename, 'w') as fp:
905 json.dump(data, fp, indent=4, sort_keys=True)
906
907 def load_action_groups_between_releases(self, old, new):
908 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
909 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
910 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800911 raise errors.InternalError(
912 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800913
914 result = []
915 for data in json.load(file(cache_filename)):
916 result.append(ActionGroup.unserialize(data))
917
918 return result
919
Kuang-che Wue80bb872018-11-15 19:45:25 +0800920 def get_rev_detail(self, rev):
921 rev_old, rev_new, index = parse_intra_rev(rev)
922 if rev_old == rev_new:
923 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800924
Kuang-che Wue80bb872018-11-15 19:45:25 +0800925 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
926 # Indexes inside intra_rev are 1 based.
927 action_group = action_groups[index - 1]
928 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800929
930 def switch(self, rev):
931 # easy case
932 if not re.match(_re_intra_rev, rev):
933 self.spec_manager.sync_disk_state(rev)
934 return
935
936 rev_old, rev_new, idx = parse_intra_rev(rev)
937 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
938 assert 0 <= idx <= len(action_groups)
939 action_groups = action_groups[:idx]
940
941 self.spec_manager.sync_disk_state(rev_old)
942
943 apply_actions(self.code_storage, action_groups, self.root_dir)