blob: c9a339105910ec1ab4889c882f582cd5b21c6e62 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800231 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800238 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800245 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800387 if os.path.exists(git_repo):
388 if os.path.isdir(git_repo) and not os.listdir(git_repo):
389 # mimic gclient's behavior; don't panic
390 logger.warning(
391 'adding repo %s; there is already an empty directory; '
392 'assume it is okay', git_repo)
393 else:
394 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800395
396 reference = code_storage.cached_git_root(self.repo_url)
397 git_util.clone(git_repo, self.repo_url, reference=reference)
398 git_util.checkout_version(git_repo, self.rev)
399
400 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800401
402 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800403 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
404 return dict(
405 timestamp=self.timestamp,
406 action_type='add_repo',
407 path=self.path,
Kuang-che Wu356ecb92019-04-02 16:30:25 +0800408 repo_url=self.repo_url,
409 rev=self.rev,
Kuang-che Wue80bb872018-11-15 19:45:25 +0800410 text=text,
411 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800412
413
414class GitRemoveRepo(Action):
415 """Describes a git repo remove action."""
416
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800417 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800418 assert self.path
419 git_repo = os.path.join(root_dir, self.path)
420 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800421 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
422 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800423 shutil.rmtree(git_repo)
424
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800425 code_storage.remove_from_project_list(root_dir, self.path)
426
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800427 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800428 return dict(
429 timestamp=self.timestamp,
430 action_type='remove_repo',
431 path=self.path,
432 text='remove repo %s' % self.path,
433 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800434
435
436def apply_actions(code_storage, action_groups, root_dir):
437 # Speed optimization: only apply the last one of consecutive commits per
438 # repo. It is possible to optimize further, but need to take care git repo
439 # add/remove within another repo.
440 commits = {}
441
442 def batch_apply(commits):
Kuang-che Wu261174e2020-01-09 17:51:31 +0800443 for i, _, commit_action in sorted(commits.values(), key=lambda x: x[:2]):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800444 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800445 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800446
447 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800448 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800449 if not isinstance(action, GitCheckoutCommit):
450 break
451 else:
452 # If all actions are commits, defer them for batch processing.
Kuang-che Wu261174e2020-01-09 17:51:31 +0800453 for j, action in enumerate(action_group.actions):
454 commits[action.path] = (i, j, action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800455 continue
456
457 batch_apply(commits)
458 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800459 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800460
461 batch_apply(commits)
462
463
464class SpecManager(object):
465 """Spec related abstract operations.
466
467 This class enumerates Spec instances and switch disk state to Spec.
468
469 In other words, this class abstracts:
470 - discovery of gclient's DEPS and repo's manifest
471 - gclient sync and repo sync
472 """
473
474 def collect_float_spec(self, old, new):
475 """Collects float Spec between two versions.
476
477 This method may fetch spec from network. However, it should not switch tree
478 version state.
479 """
480 raise NotImplementedError
481
482 def collect_fixed_spec(self, old, new):
483 """Collects fixed Spec between two versions.
484
485 This method may fetch spec from network. However, it should not switch tree
486 version state.
487 """
488 raise NotImplementedError
489
490 def parse_spec(self, spec):
491 """Parses information for Spec object.
492
493 Args:
494 spec: Spec object. It specifies what to parse and the parsed information
495 is stored inside.
496 """
497 raise NotImplementedError
498
499 def sync_disk_state(self, rev):
500 """Switch source tree state to given version."""
501 raise NotImplementedError
502
503
504class CodeStorage(object):
505 """Query code history and commit relationship without checkout.
506
507 Because paths inside source tree may be deleted or map to different remote
508 repo in different versions, we cannot query git information of one version
509 but the tree state is at another version. In order to query information
510 without changing tree state and fast, we need out of tree source code
511 storage.
512
513 This class assumes all git repos are mirrored somewhere on local disk.
514 Subclasses just need to implement cached_git_root() which returns the
515 location.
516
517 In other words, this class abstracts operations upon gclient's cache-dir
518 repo's mirror.
519 """
520
521 def cached_git_root(self, repo_url):
522 """The cached path of given remote git repo.
523
524 Args:
525 repo_url: URL of git remote repo
526
527 Returns:
528 path of cache folder
529 """
530 raise NotImplementedError
531
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800532 def add_to_project_list(self, project_root, path, repo_url):
533 raise NotImplementedError
534
535 def remove_from_project_list(self, project_root, path):
536 raise NotImplementedError
537
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800538 def is_ancestor_commit(self, spec, path, old, new):
539 """Determine one commit is ancestor of another.
540
541 Args:
542 spec: Spec object
543 path: local path relative to project root
544 old: commit id
545 new: commit id
546
547 Returns:
548 True if `old` is ancestor of `new`
549 """
550 git_root = self.cached_git_root(spec[path].repo_url)
551 return git_util.is_ancestor_commit(git_root, old, new)
552
553 def get_rev_by_time(self, spec, path, timestamp):
554 """Get commit hash of given spec by time.
555
556 Args:
557 spec: Spec object
558 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800559 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800560
561 Returns:
562 The commit hash of given time. If there are commits with the given
563 timestamp, returns the last commit.
564 """
565 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800566 # spec[path].at is remote reference name. Since git_root is a mirror (not
567 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800568 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800569
570 def get_actions_between_two_commit(self, spec, path, old, new):
571 git_root = self.cached_git_root(spec[path].repo_url)
572 result = []
573 for timestamp, git_rev in git_util.list_commits_between_commits(
574 git_root, old, new):
575 result.append(
576 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
577 return result
578
579 def is_containing_commit(self, spec, path, rev):
580 git_root = self.cached_git_root(spec[path].repo_url)
581 return git_util.is_containing_commit(git_root, rev)
582
583 def are_spec_commits_available(self, spec):
584 for path, path_spec in spec.entries.items():
585 if not path_spec.is_static():
586 continue
587 if not self.is_containing_commit(spec, path, path_spec.at):
588 return False
589 return True
590
591
592class CodeManager(object):
593 """Class to reconstruct historical source tree state.
594
595 This class can reconstruct all moments of source tree state and diffs between
596 them.
597
598 Attributes:
599 root_dir: root path of project source tree
600 spec_manager: SpecManager object
601 code_storage: CodeStorage object
602 """
603
604 def __init__(self, root_dir, spec_manager, code_storage):
605 self.root_dir = root_dir
606 self.spec_manager = spec_manager
607 self.code_storage = code_storage
608
609 def generate_actions_between_specs(self, prev_float, next_float):
610 """Generates actions between two float specs.
611
612 Args:
613 prev_float: start of spec object (exclusive)
614 next_float: end of spec object (inclusive)
615
616 Returns:
617 list of Action object (unordered)
618 """
619 actions = []
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800620 is_removed = set()
621 for path in sorted(set(prev_float.entries) | set(next_float.entries)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800622
623 # Add repo
624 if path not in prev_float:
625 if next_float[path].is_static():
626 next_at = next_float[path].at
627 else:
628 next_at = self.code_storage.get_rev_by_time(next_float, path,
629 next_float.timestamp)
630 actions.append(
631 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
632 next_at))
633 continue
634
635 # Existing path is floating, enumerates commits until next spec.
636 #
637 # prev_at till_at
638 # prev branch ---> o --------> o --------> o --------> o --------> ...
639 # ^ ^
640 # prev_float.timestamp next_float.timestamp
641 if not prev_float[path].is_static():
642 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
643 prev_float.timestamp)
644 till_at = self.code_storage.get_rev_by_time(prev_float, path,
645 next_float.timestamp)
646
647 actions.extend(
648 self.code_storage.get_actions_between_two_commit(
649 prev_float, path, prev_at, till_at))
650 else:
651 prev_at = till_at = prev_float[path].at
652
653 # At next_float.timestamp.
654 if path not in next_float:
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800655 if path in is_removed:
656 continue
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800657 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800658 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800659 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
660 group = ActionGroup(next_float.timestamp, comment='remove %s' % path)
661 # Remove deeper repo first
662 for path2 in sorted(sub_repos, reverse=True):
663 group.add(GitRemoveRepo(next_float.timestamp, path2))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800664 is_removed.add(path2)
Kuang-che Wucbe12432019-03-18 19:35:03 +0800665 group.add(GitRemoveRepo(next_float.timestamp, path))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800666 is_removed.add(path)
Kuang-che Wucbe12432019-03-18 19:35:03 +0800667 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
668 group.add(
669 GitAddRepo(next_float.timestamp, path2,
670 next_float[path2].repo_url, prev_float[path2].at))
671 actions.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800672
673 elif next_float[path].is_static():
674 # pinned to certain commit on different branch
675 next_at = next_float[path].at
676
677 elif next_float[path].at == prev_float[path].at:
678 # keep floating on the same branch
679 next_at = till_at
680
681 else:
682 # switch to another branch
683 # prev_at till_at
684 # prev branch ---> o --------> o --------> o --------> o --------> ...
685 #
686 # next_at
687 # next branch ...... o ------> o --------> o -----> ...
688 # ^ ^
689 # prev_float.timestamp next_float.timestamp
690 next_at = self.code_storage.get_rev_by_time(next_float, path,
691 next_float.timestamp)
692
693 if next_at and next_at != till_at:
694 actions.append(
695 GitCheckoutCommit(next_float.timestamp, path,
696 next_float[path].repo_url, next_at))
697
698 return actions
699
700 def synthesize_fixed_spec(self, float_spec, timestamp):
701 """Synthesizes fixed spec from float spec of given time.
702
703 Args:
704 float_spec: the float spec
705 timestamp: snapshot time
706
707 Returns:
708 Spec object
709 """
710 result = {}
711 for path, path_spec in float_spec.entries.items():
712 if not path_spec.is_static():
713 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
714 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
715
716 result[path] = copy.deepcopy(path_spec)
717
718 name = '%s@%s' % (float_spec.path, timestamp)
719 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
720
721 def reorder_actions(self, actions):
722 """Reorder and cluster actions.
723
724 Args:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800725 actions: list of Action or ActionGroup objects
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800726
727 Returns:
728 list of ActionGroup objects
729 """
730 # TODO(kcwu): support atomic commits across repos
731 actions.sort(key=lambda x: x.timestamp)
732 result = []
733 for action in actions:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800734 if isinstance(action, ActionGroup):
735 group = action
736 else:
737 group = ActionGroup(action.timestamp)
738 group.add(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800739 result.append(group)
740 return result
741
742 def match_spec(self, target, specs, start_index=0):
743 threshold = 3600
744 # ideal_index is the index of last spec before target
745 # begin and end are the range of indexes within threshold (inclusive)
746 ideal_index = None
747 begin, end = None, None
748 for i, spec in enumerate(specs[start_index:], start_index):
749 if spec.timestamp <= target.timestamp:
750 ideal_index = i
751 if abs(spec.timestamp - target.timestamp) < threshold:
752 if begin is None:
753 begin = i
754 end = i
755
756 candidates = []
757 if ideal_index is not None:
758 candidates.append(ideal_index)
759 if begin is not None:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800760 candidates.extend(list(range(begin, end + 1)))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800761 if not candidates:
762 logger.error('unable to match %s: all specs are after it', target.name)
763 return None
764
765 compatible_candidates = [
766 i for i in candidates if specs[i].is_subset(target)
767 ]
768 if not compatible_candidates:
769 logger.error('unable to match %s: no compatible specs', target.name)
770 spec = specs[candidates[0]]
771 target.diff(spec)
772 return None
773
774 scores = []
775 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800776 # Tie-break: prefer earlier timestamp and smaller difference.
777 if specs[i].timestamp <= target.timestamp:
778 timediff = 0, target.timestamp - specs[i].timestamp
779 else:
780 timediff = 1, specs[i].timestamp - target.timestamp
781 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800782 scores.sort()
783
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800784 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800785 if score != 0:
786 logger.warning('not exactly match (score=%s): %s', score, target.name)
787 target.diff(specs[index])
788
789 if index < ideal_index:
790 logger.warning(
791 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
792 target.name, target.timestamp, specs[index].timestamp,
793 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800794 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800795 if index > ideal_index:
796 logger.warning(
797 'spec committed at %d matched later commit at %d. bad server clock?',
798 target.timestamp, specs[index].timestamp)
799
800 return index
801
802 def associate_fixed_and_synthesized_specs(self, fixed_specs,
803 synthesized_specs):
804 # All fixed specs are snapshot of float specs. Theoretically, they
805 # should be identical to one of the synthesized specs.
806 # However, it's not always true for some reasons --- maybe due to race
807 # condition, maybe due to bugs of this bisect-kit.
808 # To overcome this glitch, we try to match them by similarity instead of
809 # exact match.
810 result = []
811 last_index = 0
812 for i, fixed_spec in enumerate(fixed_specs):
813 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
814 if matched_index is None:
815 if i in (0, len(fixed_specs) - 1):
816 logger.error('essential spec mismatch, unable to continue')
Kuang-che Wufe1e88a2019-09-10 21:52:25 +0800817 raise ValueError('Commit history analyze failed. '
818 'Bisector cannot deal with this version range.')
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800819 else:
820 logger.warning('%s do not match, skip', fixed_spec.name)
821 continue
822 result.append((i, matched_index))
823 last_index = matched_index
824
825 return result
826
827 def _create_make_up_actions(self, fixed_spec, synthesized):
828 timestamp = synthesized.timestamp
829 make_up = ActionGroup(
830 timestamp, comment='make up glitch for %s' % fixed_spec.name)
831 for path in set(fixed_spec.entries) & set(synthesized.entries):
832 if fixed_spec[path].at == synthesized[path].at:
833 continue
834 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
835 synthesized[path].at)
836 make_up.add(action)
837
838 if not make_up.actions:
839 return None
840 return make_up
841
842 def build_revlist(self, old, new):
843 """Build revlist.
844
845 Returns:
846 list of rev string
847 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800848 logger.info('build_revlist: old = %s, new = %s', old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800849 revlist = []
850
851 # step 1, find all float and fixed specs in the given range.
852 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800853 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800854 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800855 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800856 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
857 float_specs.pop()
858 assert float_specs
859 for spec in float_specs + fixed_specs:
860 self.spec_manager.parse_spec(spec)
861
862 # step 2, synthesize all fixed specs in the range from float specs.
863 specs = float_specs + [fixed_specs[-1]]
864 actions = []
865 logger.debug('len(specs)=%d', len(specs))
866 for i in range(len(specs) - 1):
867 prev_float = specs[i]
868 next_float = specs[i + 1]
869 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
870 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wu1ad2c0e2019-02-26 00:41:10 +0800871 actions += self.generate_actions_between_specs(prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800872 action_groups = self.reorder_actions(actions)
873
874 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
875 synthesized = [spec.copy()]
876 for action_group in action_groups:
877 spec.apply(action_group)
878 synthesized.append(spec.copy())
879
880 # step 3, associate fixed specs with synthesized specs.
881 associated_pairs = self.associate_fixed_and_synthesized_specs(
882 fixed_specs, synthesized)
883
884 # step 4, group actions and cache them
885 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
886 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
887 revlist.append(fixed_specs[fixed_index].name)
888 this_action_groups = []
889
890 # handle glitch
891 if fixed_specs[fixed_index].similar_score(
892 synthesized[synthesized_index]) != 0:
893 assert synthesized[synthesized_index].is_subset(
894 fixed_specs[fixed_index])
895 skipped = set(fixed_specs[fixed_index].entries) - set(
896 synthesized[synthesized_index].entries)
897 if skipped:
898 logger.warning(
899 'between %s and %s, '
900 'bisect-kit cannot analyze commit history of following paths:',
901 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
902 for path in sorted(skipped):
903 logger.warning(' %s', path)
904
905 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
906 synthesized[synthesized_index])
907 if make_up:
908 this_action_groups.append(make_up)
909
910 this_action_groups.extend(
911 action_groups[synthesized_index:next_synthesized_index])
912 for idx, ag in enumerate(this_action_groups, 1):
913 rev = make_intra_rev(fixed_specs[fixed_index].name,
914 fixed_specs[next_fixed_index].name, idx)
915 ag.name = rev
916 revlist.append(rev)
917
918 self.save_action_groups_between_releases(
919 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
920 this_action_groups)
921 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
922
923 return revlist
924
925 def save_action_groups_between_releases(self, old, new, action_groups):
926 data = [ag.serialize() for ag in action_groups]
927
928 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
929 if not os.path.exists(cache_dir):
930 os.makedirs(cache_dir)
931 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800932 with open(cache_filename, 'w') as fp:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800933 json.dump(data, fp, indent=4, sort_keys=True)
934
935 def load_action_groups_between_releases(self, old, new):
936 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
937 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
938 if not os.path.exists(cache_filename):
Kuang-che Wuce2f3be2019-10-28 19:44:54 +0800939 raise errors.InternalError(
940 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800941
942 result = []
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800943 for data in json.load(open(cache_filename)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800944 result.append(ActionGroup.unserialize(data))
945
946 return result
947
Kuang-che Wue80bb872018-11-15 19:45:25 +0800948 def get_rev_detail(self, rev):
949 rev_old, rev_new, index = parse_intra_rev(rev)
950 if rev_old == rev_new:
951 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800952
Kuang-che Wue80bb872018-11-15 19:45:25 +0800953 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
954 # Indexes inside intra_rev are 1 based.
955 action_group = action_groups[index - 1]
956 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800957
958 def switch(self, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800959 rev_old, action_groups = self.get_intra_and_diff(rev)
960 self.spec_manager.sync_disk_state(rev_old)
961 apply_actions(self.code_storage, action_groups, self.root_dir)
962
963 def get_intra_and_diff(self, rev):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800964 # easy case
965 if not re.match(_re_intra_rev, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800966 return rev, []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800967
968 rev_old, rev_new, idx = parse_intra_rev(rev)
969 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
970 assert 0 <= idx <= len(action_groups)
971 action_groups = action_groups[:idx]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800972 return rev_old, action_groups