blob: 132415a0ab769bf7dd70b8c6089cbf74a4dc36ee [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800231 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800238 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800245 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800387 if os.path.exists(git_repo):
388 if os.path.isdir(git_repo) and not os.listdir(git_repo):
389 # mimic gclient's behavior; don't panic
390 logger.warning(
391 'adding repo %s; there is already an empty directory; '
392 'assume it is okay', git_repo)
393 else:
394 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800395
396 reference = code_storage.cached_git_root(self.repo_url)
397 git_util.clone(git_repo, self.repo_url, reference=reference)
398 git_util.checkout_version(git_repo, self.rev)
399
400 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800401
402 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800403 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
404 return dict(
405 timestamp=self.timestamp,
406 action_type='add_repo',
407 path=self.path,
408 text=text,
409 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410
411
412class GitRemoveRepo(Action):
413 """Describes a git repo remove action."""
414
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800415 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800416 assert self.path
417 git_repo = os.path.join(root_dir, self.path)
418 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800419 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
420 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800421 shutil.rmtree(git_repo)
422
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800423 code_storage.remove_from_project_list(root_dir, self.path)
424
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800425 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800426 return dict(
427 timestamp=self.timestamp,
428 action_type='remove_repo',
429 path=self.path,
430 text='remove repo %s' % self.path,
431 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800432
433
434def apply_actions(code_storage, action_groups, root_dir):
435 # Speed optimization: only apply the last one of consecutive commits per
436 # repo. It is possible to optimize further, but need to take care git repo
437 # add/remove within another repo.
438 commits = {}
439
440 def batch_apply(commits):
441 for i, commit_action in sorted(commits.values()):
442 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800443 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800444
445 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800446 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800447 if not isinstance(action, GitCheckoutCommit):
448 break
449 else:
450 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800451 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800452 commits[action.path] = (i, action)
453 continue
454
455 batch_apply(commits)
456 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800457 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800458
459 batch_apply(commits)
460
461
462class SpecManager(object):
463 """Spec related abstract operations.
464
465 This class enumerates Spec instances and switch disk state to Spec.
466
467 In other words, this class abstracts:
468 - discovery of gclient's DEPS and repo's manifest
469 - gclient sync and repo sync
470 """
471
472 def collect_float_spec(self, old, new):
473 """Collects float Spec between two versions.
474
475 This method may fetch spec from network. However, it should not switch tree
476 version state.
477 """
478 raise NotImplementedError
479
480 def collect_fixed_spec(self, old, new):
481 """Collects fixed Spec between two versions.
482
483 This method may fetch spec from network. However, it should not switch tree
484 version state.
485 """
486 raise NotImplementedError
487
488 def parse_spec(self, spec):
489 """Parses information for Spec object.
490
491 Args:
492 spec: Spec object. It specifies what to parse and the parsed information
493 is stored inside.
494 """
495 raise NotImplementedError
496
497 def sync_disk_state(self, rev):
498 """Switch source tree state to given version."""
499 raise NotImplementedError
500
501
502class CodeStorage(object):
503 """Query code history and commit relationship without checkout.
504
505 Because paths inside source tree may be deleted or map to different remote
506 repo in different versions, we cannot query git information of one version
507 but the tree state is at another version. In order to query information
508 without changing tree state and fast, we need out of tree source code
509 storage.
510
511 This class assumes all git repos are mirrored somewhere on local disk.
512 Subclasses just need to implement cached_git_root() which returns the
513 location.
514
515 In other words, this class abstracts operations upon gclient's cache-dir
516 repo's mirror.
517 """
518
519 def cached_git_root(self, repo_url):
520 """The cached path of given remote git repo.
521
522 Args:
523 repo_url: URL of git remote repo
524
525 Returns:
526 path of cache folder
527 """
528 raise NotImplementedError
529
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800530 def add_to_project_list(self, project_root, path, repo_url):
531 raise NotImplementedError
532
533 def remove_from_project_list(self, project_root, path):
534 raise NotImplementedError
535
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800536 def is_ancestor_commit(self, spec, path, old, new):
537 """Determine one commit is ancestor of another.
538
539 Args:
540 spec: Spec object
541 path: local path relative to project root
542 old: commit id
543 new: commit id
544
545 Returns:
546 True if `old` is ancestor of `new`
547 """
548 git_root = self.cached_git_root(spec[path].repo_url)
549 return git_util.is_ancestor_commit(git_root, old, new)
550
551 def get_rev_by_time(self, spec, path, timestamp):
552 """Get commit hash of given spec by time.
553
554 Args:
555 spec: Spec object
556 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800557 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800558
559 Returns:
560 The commit hash of given time. If there are commits with the given
561 timestamp, returns the last commit.
562 """
563 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800564 # spec[path].at is remote reference name. Since git_root is a mirror (not
565 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800566 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800567
568 def get_actions_between_two_commit(self, spec, path, old, new):
569 git_root = self.cached_git_root(spec[path].repo_url)
570 result = []
571 for timestamp, git_rev in git_util.list_commits_between_commits(
572 git_root, old, new):
573 result.append(
574 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
575 return result
576
577 def is_containing_commit(self, spec, path, rev):
578 git_root = self.cached_git_root(spec[path].repo_url)
579 return git_util.is_containing_commit(git_root, rev)
580
581 def are_spec_commits_available(self, spec):
582 for path, path_spec in spec.entries.items():
583 if not path_spec.is_static():
584 continue
585 if not self.is_containing_commit(spec, path, path_spec.at):
586 return False
587 return True
588
589
590class CodeManager(object):
591 """Class to reconstruct historical source tree state.
592
593 This class can reconstruct all moments of source tree state and diffs between
594 them.
595
596 Attributes:
597 root_dir: root path of project source tree
598 spec_manager: SpecManager object
599 code_storage: CodeStorage object
600 """
601
602 def __init__(self, root_dir, spec_manager, code_storage):
603 self.root_dir = root_dir
604 self.spec_manager = spec_manager
605 self.code_storage = code_storage
606
607 def generate_actions_between_specs(self, prev_float, next_float):
608 """Generates actions between two float specs.
609
610 Args:
611 prev_float: start of spec object (exclusive)
612 next_float: end of spec object (inclusive)
613
614 Returns:
615 list of Action object (unordered)
616 """
617 actions = []
618 for path in set(prev_float.entries) | set(next_float.entries):
619
620 # Add repo
621 if path not in prev_float:
622 if next_float[path].is_static():
623 next_at = next_float[path].at
624 else:
625 next_at = self.code_storage.get_rev_by_time(next_float, path,
626 next_float.timestamp)
627 actions.append(
628 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
629 next_at))
630 continue
631
632 # Existing path is floating, enumerates commits until next spec.
633 #
634 # prev_at till_at
635 # prev branch ---> o --------> o --------> o --------> o --------> ...
636 # ^ ^
637 # prev_float.timestamp next_float.timestamp
638 if not prev_float[path].is_static():
639 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
640 prev_float.timestamp)
641 till_at = self.code_storage.get_rev_by_time(prev_float, path,
642 next_float.timestamp)
643
644 actions.extend(
645 self.code_storage.get_actions_between_two_commit(
646 prev_float, path, prev_at, till_at))
647 else:
648 prev_at = till_at = prev_float[path].at
649
650 # At next_float.timestamp.
651 if path not in next_float:
652 # remove repo
653 actions.append(GitRemoveRepo(next_float.timestamp, path))
654 next_at = None
655
656 elif next_float[path].is_static():
657 # pinned to certain commit on different branch
658 next_at = next_float[path].at
659
660 elif next_float[path].at == prev_float[path].at:
661 # keep floating on the same branch
662 next_at = till_at
663
664 else:
665 # switch to another branch
666 # prev_at till_at
667 # prev branch ---> o --------> o --------> o --------> o --------> ...
668 #
669 # next_at
670 # next branch ...... o ------> o --------> o -----> ...
671 # ^ ^
672 # prev_float.timestamp next_float.timestamp
673 next_at = self.code_storage.get_rev_by_time(next_float, path,
674 next_float.timestamp)
675
676 if next_at and next_at != till_at:
677 actions.append(
678 GitCheckoutCommit(next_float.timestamp, path,
679 next_float[path].repo_url, next_at))
680
681 return actions
682
683 def synthesize_fixed_spec(self, float_spec, timestamp):
684 """Synthesizes fixed spec from float spec of given time.
685
686 Args:
687 float_spec: the float spec
688 timestamp: snapshot time
689
690 Returns:
691 Spec object
692 """
693 result = {}
694 for path, path_spec in float_spec.entries.items():
695 if not path_spec.is_static():
696 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
697 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
698
699 result[path] = copy.deepcopy(path_spec)
700
701 name = '%s@%s' % (float_spec.path, timestamp)
702 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
703
704 def reorder_actions(self, actions):
705 """Reorder and cluster actions.
706
707 Args:
708 actions: list of Action objects
709
710 Returns:
711 list of ActionGroup objects
712 """
713 # TODO(kcwu): support atomic commits across repos
714 actions.sort(key=lambda x: x.timestamp)
715 result = []
716 for action in actions:
717 group = ActionGroup(action.timestamp)
718 group.add(action)
719 result.append(group)
720 return result
721
722 def match_spec(self, target, specs, start_index=0):
723 threshold = 3600
724 # ideal_index is the index of last spec before target
725 # begin and end are the range of indexes within threshold (inclusive)
726 ideal_index = None
727 begin, end = None, None
728 for i, spec in enumerate(specs[start_index:], start_index):
729 if spec.timestamp <= target.timestamp:
730 ideal_index = i
731 if abs(spec.timestamp - target.timestamp) < threshold:
732 if begin is None:
733 begin = i
734 end = i
735
736 candidates = []
737 if ideal_index is not None:
738 candidates.append(ideal_index)
739 if begin is not None:
740 candidates.extend(range(begin, end + 1))
741 if not candidates:
742 logger.error('unable to match %s: all specs are after it', target.name)
743 return None
744
745 compatible_candidates = [
746 i for i in candidates if specs[i].is_subset(target)
747 ]
748 if not compatible_candidates:
749 logger.error('unable to match %s: no compatible specs', target.name)
750 spec = specs[candidates[0]]
751 target.diff(spec)
752 return None
753
754 scores = []
755 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800756 # Tie-break: prefer earlier timestamp and smaller difference.
757 if specs[i].timestamp <= target.timestamp:
758 timediff = 0, target.timestamp - specs[i].timestamp
759 else:
760 timediff = 1, specs[i].timestamp - target.timestamp
761 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800762 scores.sort()
763
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800764 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800765 if score != 0:
766 logger.warning('not exactly match (score=%s): %s', score, target.name)
767 target.diff(specs[index])
768
769 if index < ideal_index:
770 logger.warning(
771 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
772 target.name, target.timestamp, specs[index].timestamp,
773 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800774 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800775 if index > ideal_index:
776 logger.warning(
777 'spec committed at %d matched later commit at %d. bad server clock?',
778 target.timestamp, specs[index].timestamp)
779
780 return index
781
782 def associate_fixed_and_synthesized_specs(self, fixed_specs,
783 synthesized_specs):
784 # All fixed specs are snapshot of float specs. Theoretically, they
785 # should be identical to one of the synthesized specs.
786 # However, it's not always true for some reasons --- maybe due to race
787 # condition, maybe due to bugs of this bisect-kit.
788 # To overcome this glitch, we try to match them by similarity instead of
789 # exact match.
790 result = []
791 last_index = 0
792 for i, fixed_spec in enumerate(fixed_specs):
793 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
794 if matched_index is None:
795 if i in (0, len(fixed_specs) - 1):
796 logger.error('essential spec mismatch, unable to continue')
797 assert 0
798 else:
799 logger.warning('%s do not match, skip', fixed_spec.name)
800 continue
801 result.append((i, matched_index))
802 last_index = matched_index
803
804 return result
805
806 def _create_make_up_actions(self, fixed_spec, synthesized):
807 timestamp = synthesized.timestamp
808 make_up = ActionGroup(
809 timestamp, comment='make up glitch for %s' % fixed_spec.name)
810 for path in set(fixed_spec.entries) & set(synthesized.entries):
811 if fixed_spec[path].at == synthesized[path].at:
812 continue
813 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
814 synthesized[path].at)
815 make_up.add(action)
816
817 if not make_up.actions:
818 return None
819 return make_up
820
821 def build_revlist(self, old, new):
822 """Build revlist.
823
824 Returns:
825 list of rev string
826 """
827 logger.info('build_revlist')
828 revlist = []
829
830 # step 1, find all float and fixed specs in the given range.
831 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800832 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800833 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800834 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800835 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
836 float_specs.pop()
837 assert float_specs
838 for spec in float_specs + fixed_specs:
839 self.spec_manager.parse_spec(spec)
840
841 # step 2, synthesize all fixed specs in the range from float specs.
842 specs = float_specs + [fixed_specs[-1]]
843 actions = []
844 logger.debug('len(specs)=%d', len(specs))
845 for i in range(len(specs) - 1):
846 prev_float = specs[i]
847 next_float = specs[i + 1]
848 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
849 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wu1ad2c0e2019-02-26 00:41:10 +0800850 actions += self.generate_actions_between_specs(prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800851 action_groups = self.reorder_actions(actions)
852
853 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
854 synthesized = [spec.copy()]
855 for action_group in action_groups:
856 spec.apply(action_group)
857 synthesized.append(spec.copy())
858
859 # step 3, associate fixed specs with synthesized specs.
860 associated_pairs = self.associate_fixed_and_synthesized_specs(
861 fixed_specs, synthesized)
862
863 # step 4, group actions and cache them
864 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
865 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
866 revlist.append(fixed_specs[fixed_index].name)
867 this_action_groups = []
868
869 # handle glitch
870 if fixed_specs[fixed_index].similar_score(
871 synthesized[synthesized_index]) != 0:
872 assert synthesized[synthesized_index].is_subset(
873 fixed_specs[fixed_index])
874 skipped = set(fixed_specs[fixed_index].entries) - set(
875 synthesized[synthesized_index].entries)
876 if skipped:
877 logger.warning(
878 'between %s and %s, '
879 'bisect-kit cannot analyze commit history of following paths:',
880 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
881 for path in sorted(skipped):
882 logger.warning(' %s', path)
883
884 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
885 synthesized[synthesized_index])
886 if make_up:
887 this_action_groups.append(make_up)
888
889 this_action_groups.extend(
890 action_groups[synthesized_index:next_synthesized_index])
891 for idx, ag in enumerate(this_action_groups, 1):
892 rev = make_intra_rev(fixed_specs[fixed_index].name,
893 fixed_specs[next_fixed_index].name, idx)
894 ag.name = rev
895 revlist.append(rev)
896
897 self.save_action_groups_between_releases(
898 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
899 this_action_groups)
900 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
901
902 return revlist
903
904 def save_action_groups_between_releases(self, old, new, action_groups):
905 data = [ag.serialize() for ag in action_groups]
906
907 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
908 if not os.path.exists(cache_dir):
909 os.makedirs(cache_dir)
910 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
911 with file(cache_filename, 'w') as fp:
912 json.dump(data, fp, indent=4, sort_keys=True)
913
914 def load_action_groups_between_releases(self, old, new):
915 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
916 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
917 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800918 raise errors.InternalError(
919 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800920
921 result = []
922 for data in json.load(file(cache_filename)):
923 result.append(ActionGroup.unserialize(data))
924
925 return result
926
Kuang-che Wue80bb872018-11-15 19:45:25 +0800927 def get_rev_detail(self, rev):
928 rev_old, rev_new, index = parse_intra_rev(rev)
929 if rev_old == rev_new:
930 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800931
Kuang-che Wue80bb872018-11-15 19:45:25 +0800932 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
933 # Indexes inside intra_rev are 1 based.
934 action_group = action_groups[index - 1]
935 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800936
937 def switch(self, rev):
938 # easy case
939 if not re.match(_re_intra_rev, rev):
940 self.spec_manager.sync_disk_state(rev)
941 return
942
943 rev_old, rev_new, idx = parse_intra_rev(rev)
944 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
945 assert 0 <= idx <= len(action_groups)
946 action_groups = action_groups[:idx]
947
948 self.spec_manager.sync_disk_state(rev_old)
949
950 apply_actions(self.code_storage, action_groups, self.root_dir)