blob: 172544e75f321198cc8db54be1e0d870785137a4 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800231 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800238 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800245 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800387 if os.path.exists(git_repo):
388 if os.path.isdir(git_repo) and not os.listdir(git_repo):
389 # mimic gclient's behavior; don't panic
390 logger.warning(
391 'adding repo %s; there is already an empty directory; '
392 'assume it is okay', git_repo)
393 else:
394 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800395
396 reference = code_storage.cached_git_root(self.repo_url)
397 git_util.clone(git_repo, self.repo_url, reference=reference)
398 git_util.checkout_version(git_repo, self.rev)
399
400 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800401
402 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800403 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
404 return dict(
405 timestamp=self.timestamp,
406 action_type='add_repo',
407 path=self.path,
408 text=text,
409 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410
411
412class GitRemoveRepo(Action):
413 """Describes a git repo remove action."""
414
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800415 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800416 assert self.path
417 git_repo = os.path.join(root_dir, self.path)
418 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800419 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
420 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800421 shutil.rmtree(git_repo)
422
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800423 code_storage.remove_from_project_list(root_dir, self.path)
424
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800425 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800426 return dict(
427 timestamp=self.timestamp,
428 action_type='remove_repo',
429 path=self.path,
430 text='remove repo %s' % self.path,
431 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800432
433
434def apply_actions(code_storage, action_groups, root_dir):
435 # Speed optimization: only apply the last one of consecutive commits per
436 # repo. It is possible to optimize further, but need to take care git repo
437 # add/remove within another repo.
438 commits = {}
439
440 def batch_apply(commits):
441 for i, commit_action in sorted(commits.values()):
442 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800443 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800444
445 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800446 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800447 if not isinstance(action, GitCheckoutCommit):
448 break
449 else:
450 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800451 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800452 commits[action.path] = (i, action)
453 continue
454
455 batch_apply(commits)
456 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800457 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800458
459 batch_apply(commits)
460
461
462class SpecManager(object):
463 """Spec related abstract operations.
464
465 This class enumerates Spec instances and switch disk state to Spec.
466
467 In other words, this class abstracts:
468 - discovery of gclient's DEPS and repo's manifest
469 - gclient sync and repo sync
470 """
471
472 def collect_float_spec(self, old, new):
473 """Collects float Spec between two versions.
474
475 This method may fetch spec from network. However, it should not switch tree
476 version state.
477 """
478 raise NotImplementedError
479
480 def collect_fixed_spec(self, old, new):
481 """Collects fixed Spec between two versions.
482
483 This method may fetch spec from network. However, it should not switch tree
484 version state.
485 """
486 raise NotImplementedError
487
488 def parse_spec(self, spec):
489 """Parses information for Spec object.
490
491 Args:
492 spec: Spec object. It specifies what to parse and the parsed information
493 is stored inside.
494 """
495 raise NotImplementedError
496
497 def sync_disk_state(self, rev):
498 """Switch source tree state to given version."""
499 raise NotImplementedError
500
501
502class CodeStorage(object):
503 """Query code history and commit relationship without checkout.
504
505 Because paths inside source tree may be deleted or map to different remote
506 repo in different versions, we cannot query git information of one version
507 but the tree state is at another version. In order to query information
508 without changing tree state and fast, we need out of tree source code
509 storage.
510
511 This class assumes all git repos are mirrored somewhere on local disk.
512 Subclasses just need to implement cached_git_root() which returns the
513 location.
514
515 In other words, this class abstracts operations upon gclient's cache-dir
516 repo's mirror.
517 """
518
519 def cached_git_root(self, repo_url):
520 """The cached path of given remote git repo.
521
522 Args:
523 repo_url: URL of git remote repo
524
525 Returns:
526 path of cache folder
527 """
528 raise NotImplementedError
529
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800530 def add_to_project_list(self, project_root, path, repo_url):
531 raise NotImplementedError
532
533 def remove_from_project_list(self, project_root, path):
534 raise NotImplementedError
535
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800536 def is_ancestor_commit(self, spec, path, old, new):
537 """Determine one commit is ancestor of another.
538
539 Args:
540 spec: Spec object
541 path: local path relative to project root
542 old: commit id
543 new: commit id
544
545 Returns:
546 True if `old` is ancestor of `new`
547 """
548 git_root = self.cached_git_root(spec[path].repo_url)
549 return git_util.is_ancestor_commit(git_root, old, new)
550
551 def get_rev_by_time(self, spec, path, timestamp):
552 """Get commit hash of given spec by time.
553
554 Args:
555 spec: Spec object
556 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800557 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800558
559 Returns:
560 The commit hash of given time. If there are commits with the given
561 timestamp, returns the last commit.
562 """
563 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800564 # spec[path].at is remote reference name. Since git_root is a mirror (not
565 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800566 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800567
568 def get_actions_between_two_commit(self, spec, path, old, new):
569 git_root = self.cached_git_root(spec[path].repo_url)
570 result = []
571 for timestamp, git_rev in git_util.list_commits_between_commits(
572 git_root, old, new):
573 result.append(
574 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
575 return result
576
577 def is_containing_commit(self, spec, path, rev):
578 git_root = self.cached_git_root(spec[path].repo_url)
579 return git_util.is_containing_commit(git_root, rev)
580
581 def are_spec_commits_available(self, spec):
582 for path, path_spec in spec.entries.items():
583 if not path_spec.is_static():
584 continue
585 if not self.is_containing_commit(spec, path, path_spec.at):
586 return False
587 return True
588
589
590class CodeManager(object):
591 """Class to reconstruct historical source tree state.
592
593 This class can reconstruct all moments of source tree state and diffs between
594 them.
595
596 Attributes:
597 root_dir: root path of project source tree
598 spec_manager: SpecManager object
599 code_storage: CodeStorage object
600 """
601
602 def __init__(self, root_dir, spec_manager, code_storage):
603 self.root_dir = root_dir
604 self.spec_manager = spec_manager
605 self.code_storage = code_storage
606
607 def generate_actions_between_specs(self, prev_float, next_float):
608 """Generates actions between two float specs.
609
610 Args:
611 prev_float: start of spec object (exclusive)
612 next_float: end of spec object (inclusive)
613
614 Returns:
615 list of Action object (unordered)
616 """
617 actions = []
618 for path in set(prev_float.entries) | set(next_float.entries):
619
620 # Add repo
621 if path not in prev_float:
622 if next_float[path].is_static():
623 next_at = next_float[path].at
624 else:
625 next_at = self.code_storage.get_rev_by_time(next_float, path,
626 next_float.timestamp)
627 actions.append(
628 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
629 next_at))
630 continue
631
632 # Existing path is floating, enumerates commits until next spec.
633 #
634 # prev_at till_at
635 # prev branch ---> o --------> o --------> o --------> o --------> ...
636 # ^ ^
637 # prev_float.timestamp next_float.timestamp
638 if not prev_float[path].is_static():
639 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
640 prev_float.timestamp)
641 till_at = self.code_storage.get_rev_by_time(prev_float, path,
642 next_float.timestamp)
643
644 actions.extend(
645 self.code_storage.get_actions_between_two_commit(
646 prev_float, path, prev_at, till_at))
647 else:
648 prev_at = till_at = prev_float[path].at
649
650 # At next_float.timestamp.
651 if path not in next_float:
652 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800653 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800654 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
655 group = ActionGroup(next_float.timestamp, comment='remove %s' % path)
656 # Remove deeper repo first
657 for path2 in sorted(sub_repos, reverse=True):
658 group.add(GitRemoveRepo(next_float.timestamp, path2))
659 group.add(GitRemoveRepo(next_float.timestamp, path))
660 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
661 group.add(
662 GitAddRepo(next_float.timestamp, path2,
663 next_float[path2].repo_url, prev_float[path2].at))
664 actions.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800665
666 elif next_float[path].is_static():
667 # pinned to certain commit on different branch
668 next_at = next_float[path].at
669
670 elif next_float[path].at == prev_float[path].at:
671 # keep floating on the same branch
672 next_at = till_at
673
674 else:
675 # switch to another branch
676 # prev_at till_at
677 # prev branch ---> o --------> o --------> o --------> o --------> ...
678 #
679 # next_at
680 # next branch ...... o ------> o --------> o -----> ...
681 # ^ ^
682 # prev_float.timestamp next_float.timestamp
683 next_at = self.code_storage.get_rev_by_time(next_float, path,
684 next_float.timestamp)
685
686 if next_at and next_at != till_at:
687 actions.append(
688 GitCheckoutCommit(next_float.timestamp, path,
689 next_float[path].repo_url, next_at))
690
691 return actions
692
693 def synthesize_fixed_spec(self, float_spec, timestamp):
694 """Synthesizes fixed spec from float spec of given time.
695
696 Args:
697 float_spec: the float spec
698 timestamp: snapshot time
699
700 Returns:
701 Spec object
702 """
703 result = {}
704 for path, path_spec in float_spec.entries.items():
705 if not path_spec.is_static():
706 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
707 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
708
709 result[path] = copy.deepcopy(path_spec)
710
711 name = '%s@%s' % (float_spec.path, timestamp)
712 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
713
714 def reorder_actions(self, actions):
715 """Reorder and cluster actions.
716
717 Args:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800718 actions: list of Action or ActionGroup objects
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800719
720 Returns:
721 list of ActionGroup objects
722 """
723 # TODO(kcwu): support atomic commits across repos
724 actions.sort(key=lambda x: x.timestamp)
725 result = []
726 for action in actions:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800727 if isinstance(action, ActionGroup):
728 group = action
729 else:
730 group = ActionGroup(action.timestamp)
731 group.add(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800732 result.append(group)
733 return result
734
735 def match_spec(self, target, specs, start_index=0):
736 threshold = 3600
737 # ideal_index is the index of last spec before target
738 # begin and end are the range of indexes within threshold (inclusive)
739 ideal_index = None
740 begin, end = None, None
741 for i, spec in enumerate(specs[start_index:], start_index):
742 if spec.timestamp <= target.timestamp:
743 ideal_index = i
744 if abs(spec.timestamp - target.timestamp) < threshold:
745 if begin is None:
746 begin = i
747 end = i
748
749 candidates = []
750 if ideal_index is not None:
751 candidates.append(ideal_index)
752 if begin is not None:
753 candidates.extend(range(begin, end + 1))
754 if not candidates:
755 logger.error('unable to match %s: all specs are after it', target.name)
756 return None
757
758 compatible_candidates = [
759 i for i in candidates if specs[i].is_subset(target)
760 ]
761 if not compatible_candidates:
762 logger.error('unable to match %s: no compatible specs', target.name)
763 spec = specs[candidates[0]]
764 target.diff(spec)
765 return None
766
767 scores = []
768 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800769 # Tie-break: prefer earlier timestamp and smaller difference.
770 if specs[i].timestamp <= target.timestamp:
771 timediff = 0, target.timestamp - specs[i].timestamp
772 else:
773 timediff = 1, specs[i].timestamp - target.timestamp
774 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800775 scores.sort()
776
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800777 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800778 if score != 0:
779 logger.warning('not exactly match (score=%s): %s', score, target.name)
780 target.diff(specs[index])
781
782 if index < ideal_index:
783 logger.warning(
784 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
785 target.name, target.timestamp, specs[index].timestamp,
786 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800787 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800788 if index > ideal_index:
789 logger.warning(
790 'spec committed at %d matched later commit at %d. bad server clock?',
791 target.timestamp, specs[index].timestamp)
792
793 return index
794
795 def associate_fixed_and_synthesized_specs(self, fixed_specs,
796 synthesized_specs):
797 # All fixed specs are snapshot of float specs. Theoretically, they
798 # should be identical to one of the synthesized specs.
799 # However, it's not always true for some reasons --- maybe due to race
800 # condition, maybe due to bugs of this bisect-kit.
801 # To overcome this glitch, we try to match them by similarity instead of
802 # exact match.
803 result = []
804 last_index = 0
805 for i, fixed_spec in enumerate(fixed_specs):
806 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
807 if matched_index is None:
808 if i in (0, len(fixed_specs) - 1):
809 logger.error('essential spec mismatch, unable to continue')
810 assert 0
811 else:
812 logger.warning('%s do not match, skip', fixed_spec.name)
813 continue
814 result.append((i, matched_index))
815 last_index = matched_index
816
817 return result
818
819 def _create_make_up_actions(self, fixed_spec, synthesized):
820 timestamp = synthesized.timestamp
821 make_up = ActionGroup(
822 timestamp, comment='make up glitch for %s' % fixed_spec.name)
823 for path in set(fixed_spec.entries) & set(synthesized.entries):
824 if fixed_spec[path].at == synthesized[path].at:
825 continue
826 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
827 synthesized[path].at)
828 make_up.add(action)
829
830 if not make_up.actions:
831 return None
832 return make_up
833
834 def build_revlist(self, old, new):
835 """Build revlist.
836
837 Returns:
838 list of rev string
839 """
840 logger.info('build_revlist')
841 revlist = []
842
843 # step 1, find all float and fixed specs in the given range.
844 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800845 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800846 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800847 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800848 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
849 float_specs.pop()
850 assert float_specs
851 for spec in float_specs + fixed_specs:
852 self.spec_manager.parse_spec(spec)
853
854 # step 2, synthesize all fixed specs in the range from float specs.
855 specs = float_specs + [fixed_specs[-1]]
856 actions = []
857 logger.debug('len(specs)=%d', len(specs))
858 for i in range(len(specs) - 1):
859 prev_float = specs[i]
860 next_float = specs[i + 1]
861 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
862 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wu1ad2c0e2019-02-26 00:41:10 +0800863 actions += self.generate_actions_between_specs(prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800864 action_groups = self.reorder_actions(actions)
865
866 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
867 synthesized = [spec.copy()]
868 for action_group in action_groups:
869 spec.apply(action_group)
870 synthesized.append(spec.copy())
871
872 # step 3, associate fixed specs with synthesized specs.
873 associated_pairs = self.associate_fixed_and_synthesized_specs(
874 fixed_specs, synthesized)
875
876 # step 4, group actions and cache them
877 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
878 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
879 revlist.append(fixed_specs[fixed_index].name)
880 this_action_groups = []
881
882 # handle glitch
883 if fixed_specs[fixed_index].similar_score(
884 synthesized[synthesized_index]) != 0:
885 assert synthesized[synthesized_index].is_subset(
886 fixed_specs[fixed_index])
887 skipped = set(fixed_specs[fixed_index].entries) - set(
888 synthesized[synthesized_index].entries)
889 if skipped:
890 logger.warning(
891 'between %s and %s, '
892 'bisect-kit cannot analyze commit history of following paths:',
893 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
894 for path in sorted(skipped):
895 logger.warning(' %s', path)
896
897 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
898 synthesized[synthesized_index])
899 if make_up:
900 this_action_groups.append(make_up)
901
902 this_action_groups.extend(
903 action_groups[synthesized_index:next_synthesized_index])
904 for idx, ag in enumerate(this_action_groups, 1):
905 rev = make_intra_rev(fixed_specs[fixed_index].name,
906 fixed_specs[next_fixed_index].name, idx)
907 ag.name = rev
908 revlist.append(rev)
909
910 self.save_action_groups_between_releases(
911 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
912 this_action_groups)
913 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
914
915 return revlist
916
917 def save_action_groups_between_releases(self, old, new, action_groups):
918 data = [ag.serialize() for ag in action_groups]
919
920 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
921 if not os.path.exists(cache_dir):
922 os.makedirs(cache_dir)
923 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
924 with file(cache_filename, 'w') as fp:
925 json.dump(data, fp, indent=4, sort_keys=True)
926
927 def load_action_groups_between_releases(self, old, new):
928 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
929 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
930 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800931 raise errors.InternalError(
932 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800933
934 result = []
935 for data in json.load(file(cache_filename)):
936 result.append(ActionGroup.unserialize(data))
937
938 return result
939
Kuang-che Wue80bb872018-11-15 19:45:25 +0800940 def get_rev_detail(self, rev):
941 rev_old, rev_new, index = parse_intra_rev(rev)
942 if rev_old == rev_new:
943 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800944
Kuang-che Wue80bb872018-11-15 19:45:25 +0800945 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
946 # Indexes inside intra_rev are 1 based.
947 action_group = action_groups[index - 1]
948 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800949
950 def switch(self, rev):
951 # easy case
952 if not re.match(_re_intra_rev, rev):
953 self.spec_manager.sync_disk_state(rev)
954 return
955
956 rev_old, rev_new, idx = parse_intra_rev(rev)
957 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
958 assert 0 <= idx <= len(action_groups)
959 action_groups = action_groups[:idx]
960
961 self.spec_manager.sync_disk_state(rev_old)
962
963 apply_actions(self.code_storage, action_groups, self.root_dir)