blob: 0e5bc68fa3764a9545066d263035b16e7d1786f4 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800231 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800238 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800245 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800387 if os.path.exists(git_repo):
388 if os.path.isdir(git_repo) and not os.listdir(git_repo):
389 # mimic gclient's behavior; don't panic
390 logger.warning(
391 'adding repo %s; there is already an empty directory; '
392 'assume it is okay', git_repo)
393 else:
394 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800395
396 reference = code_storage.cached_git_root(self.repo_url)
397 git_util.clone(git_repo, self.repo_url, reference=reference)
398 git_util.checkout_version(git_repo, self.rev)
399
400 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800401
402 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800403 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
404 return dict(
405 timestamp=self.timestamp,
406 action_type='add_repo',
407 path=self.path,
Kuang-che Wu356ecb92019-04-02 16:30:25 +0800408 repo_url=self.repo_url,
409 rev=self.rev,
Kuang-che Wue80bb872018-11-15 19:45:25 +0800410 text=text,
411 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800412
413
414class GitRemoveRepo(Action):
415 """Describes a git repo remove action."""
416
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800417 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800418 assert self.path
419 git_repo = os.path.join(root_dir, self.path)
420 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800421 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
422 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800423 shutil.rmtree(git_repo)
424
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800425 code_storage.remove_from_project_list(root_dir, self.path)
426
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800427 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800428 return dict(
429 timestamp=self.timestamp,
430 action_type='remove_repo',
431 path=self.path,
432 text='remove repo %s' % self.path,
433 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800434
435
436def apply_actions(code_storage, action_groups, root_dir):
437 # Speed optimization: only apply the last one of consecutive commits per
438 # repo. It is possible to optimize further, but need to take care git repo
439 # add/remove within another repo.
440 commits = {}
441
442 def batch_apply(commits):
443 for i, commit_action in sorted(commits.values()):
444 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800445 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800446
447 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800448 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800449 if not isinstance(action, GitCheckoutCommit):
450 break
451 else:
452 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800453 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800454 commits[action.path] = (i, action)
455 continue
456
457 batch_apply(commits)
458 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800459 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800460
461 batch_apply(commits)
462
463
464class SpecManager(object):
465 """Spec related abstract operations.
466
467 This class enumerates Spec instances and switch disk state to Spec.
468
469 In other words, this class abstracts:
470 - discovery of gclient's DEPS and repo's manifest
471 - gclient sync and repo sync
472 """
473
474 def collect_float_spec(self, old, new):
475 """Collects float Spec between two versions.
476
477 This method may fetch spec from network. However, it should not switch tree
478 version state.
479 """
480 raise NotImplementedError
481
482 def collect_fixed_spec(self, old, new):
483 """Collects fixed Spec between two versions.
484
485 This method may fetch spec from network. However, it should not switch tree
486 version state.
487 """
488 raise NotImplementedError
489
490 def parse_spec(self, spec):
491 """Parses information for Spec object.
492
493 Args:
494 spec: Spec object. It specifies what to parse and the parsed information
495 is stored inside.
496 """
497 raise NotImplementedError
498
499 def sync_disk_state(self, rev):
500 """Switch source tree state to given version."""
501 raise NotImplementedError
502
503
504class CodeStorage(object):
505 """Query code history and commit relationship without checkout.
506
507 Because paths inside source tree may be deleted or map to different remote
508 repo in different versions, we cannot query git information of one version
509 but the tree state is at another version. In order to query information
510 without changing tree state and fast, we need out of tree source code
511 storage.
512
513 This class assumes all git repos are mirrored somewhere on local disk.
514 Subclasses just need to implement cached_git_root() which returns the
515 location.
516
517 In other words, this class abstracts operations upon gclient's cache-dir
518 repo's mirror.
519 """
520
521 def cached_git_root(self, repo_url):
522 """The cached path of given remote git repo.
523
524 Args:
525 repo_url: URL of git remote repo
526
527 Returns:
528 path of cache folder
529 """
530 raise NotImplementedError
531
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800532 def add_to_project_list(self, project_root, path, repo_url):
533 raise NotImplementedError
534
535 def remove_from_project_list(self, project_root, path):
536 raise NotImplementedError
537
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800538 def is_ancestor_commit(self, spec, path, old, new):
539 """Determine one commit is ancestor of another.
540
541 Args:
542 spec: Spec object
543 path: local path relative to project root
544 old: commit id
545 new: commit id
546
547 Returns:
548 True if `old` is ancestor of `new`
549 """
550 git_root = self.cached_git_root(spec[path].repo_url)
551 return git_util.is_ancestor_commit(git_root, old, new)
552
553 def get_rev_by_time(self, spec, path, timestamp):
554 """Get commit hash of given spec by time.
555
556 Args:
557 spec: Spec object
558 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800559 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800560
561 Returns:
562 The commit hash of given time. If there are commits with the given
563 timestamp, returns the last commit.
564 """
565 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800566 # spec[path].at is remote reference name. Since git_root is a mirror (not
567 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800568 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800569
570 def get_actions_between_two_commit(self, spec, path, old, new):
571 git_root = self.cached_git_root(spec[path].repo_url)
572 result = []
573 for timestamp, git_rev in git_util.list_commits_between_commits(
574 git_root, old, new):
575 result.append(
576 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
577 return result
578
579 def is_containing_commit(self, spec, path, rev):
580 git_root = self.cached_git_root(spec[path].repo_url)
581 return git_util.is_containing_commit(git_root, rev)
582
583 def are_spec_commits_available(self, spec):
584 for path, path_spec in spec.entries.items():
585 if not path_spec.is_static():
586 continue
587 if not self.is_containing_commit(spec, path, path_spec.at):
588 return False
589 return True
590
591
592class CodeManager(object):
593 """Class to reconstruct historical source tree state.
594
595 This class can reconstruct all moments of source tree state and diffs between
596 them.
597
598 Attributes:
599 root_dir: root path of project source tree
600 spec_manager: SpecManager object
601 code_storage: CodeStorage object
602 """
603
604 def __init__(self, root_dir, spec_manager, code_storage):
605 self.root_dir = root_dir
606 self.spec_manager = spec_manager
607 self.code_storage = code_storage
608
609 def generate_actions_between_specs(self, prev_float, next_float):
610 """Generates actions between two float specs.
611
612 Args:
613 prev_float: start of spec object (exclusive)
614 next_float: end of spec object (inclusive)
615
616 Returns:
617 list of Action object (unordered)
618 """
619 actions = []
620 for path in set(prev_float.entries) | set(next_float.entries):
621
622 # Add repo
623 if path not in prev_float:
624 if next_float[path].is_static():
625 next_at = next_float[path].at
626 else:
627 next_at = self.code_storage.get_rev_by_time(next_float, path,
628 next_float.timestamp)
629 actions.append(
630 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
631 next_at))
632 continue
633
634 # Existing path is floating, enumerates commits until next spec.
635 #
636 # prev_at till_at
637 # prev branch ---> o --------> o --------> o --------> o --------> ...
638 # ^ ^
639 # prev_float.timestamp next_float.timestamp
640 if not prev_float[path].is_static():
641 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
642 prev_float.timestamp)
643 till_at = self.code_storage.get_rev_by_time(prev_float, path,
644 next_float.timestamp)
645
646 actions.extend(
647 self.code_storage.get_actions_between_two_commit(
648 prev_float, path, prev_at, till_at))
649 else:
650 prev_at = till_at = prev_float[path].at
651
652 # At next_float.timestamp.
653 if path not in next_float:
654 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800655 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800656 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
657 group = ActionGroup(next_float.timestamp, comment='remove %s' % path)
658 # Remove deeper repo first
659 for path2 in sorted(sub_repos, reverse=True):
660 group.add(GitRemoveRepo(next_float.timestamp, path2))
661 group.add(GitRemoveRepo(next_float.timestamp, path))
662 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
663 group.add(
664 GitAddRepo(next_float.timestamp, path2,
665 next_float[path2].repo_url, prev_float[path2].at))
666 actions.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800667
668 elif next_float[path].is_static():
669 # pinned to certain commit on different branch
670 next_at = next_float[path].at
671
672 elif next_float[path].at == prev_float[path].at:
673 # keep floating on the same branch
674 next_at = till_at
675
676 else:
677 # switch to another branch
678 # prev_at till_at
679 # prev branch ---> o --------> o --------> o --------> o --------> ...
680 #
681 # next_at
682 # next branch ...... o ------> o --------> o -----> ...
683 # ^ ^
684 # prev_float.timestamp next_float.timestamp
685 next_at = self.code_storage.get_rev_by_time(next_float, path,
686 next_float.timestamp)
687
688 if next_at and next_at != till_at:
689 actions.append(
690 GitCheckoutCommit(next_float.timestamp, path,
691 next_float[path].repo_url, next_at))
692
693 return actions
694
695 def synthesize_fixed_spec(self, float_spec, timestamp):
696 """Synthesizes fixed spec from float spec of given time.
697
698 Args:
699 float_spec: the float spec
700 timestamp: snapshot time
701
702 Returns:
703 Spec object
704 """
705 result = {}
706 for path, path_spec in float_spec.entries.items():
707 if not path_spec.is_static():
708 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
709 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
710
711 result[path] = copy.deepcopy(path_spec)
712
713 name = '%s@%s' % (float_spec.path, timestamp)
714 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
715
716 def reorder_actions(self, actions):
717 """Reorder and cluster actions.
718
719 Args:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800720 actions: list of Action or ActionGroup objects
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800721
722 Returns:
723 list of ActionGroup objects
724 """
725 # TODO(kcwu): support atomic commits across repos
726 actions.sort(key=lambda x: x.timestamp)
727 result = []
728 for action in actions:
Kuang-che Wucbe12432019-03-18 19:35:03 +0800729 if isinstance(action, ActionGroup):
730 group = action
731 else:
732 group = ActionGroup(action.timestamp)
733 group.add(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800734 result.append(group)
735 return result
736
737 def match_spec(self, target, specs, start_index=0):
738 threshold = 3600
739 # ideal_index is the index of last spec before target
740 # begin and end are the range of indexes within threshold (inclusive)
741 ideal_index = None
742 begin, end = None, None
743 for i, spec in enumerate(specs[start_index:], start_index):
744 if spec.timestamp <= target.timestamp:
745 ideal_index = i
746 if abs(spec.timestamp - target.timestamp) < threshold:
747 if begin is None:
748 begin = i
749 end = i
750
751 candidates = []
752 if ideal_index is not None:
753 candidates.append(ideal_index)
754 if begin is not None:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800755 candidates.extend(list(range(begin, end + 1)))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800756 if not candidates:
757 logger.error('unable to match %s: all specs are after it', target.name)
758 return None
759
760 compatible_candidates = [
761 i for i in candidates if specs[i].is_subset(target)
762 ]
763 if not compatible_candidates:
764 logger.error('unable to match %s: no compatible specs', target.name)
765 spec = specs[candidates[0]]
766 target.diff(spec)
767 return None
768
769 scores = []
770 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800771 # Tie-break: prefer earlier timestamp and smaller difference.
772 if specs[i].timestamp <= target.timestamp:
773 timediff = 0, target.timestamp - specs[i].timestamp
774 else:
775 timediff = 1, specs[i].timestamp - target.timestamp
776 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800777 scores.sort()
778
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800779 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800780 if score != 0:
781 logger.warning('not exactly match (score=%s): %s', score, target.name)
782 target.diff(specs[index])
783
784 if index < ideal_index:
785 logger.warning(
786 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
787 target.name, target.timestamp, specs[index].timestamp,
788 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800789 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800790 if index > ideal_index:
791 logger.warning(
792 'spec committed at %d matched later commit at %d. bad server clock?',
793 target.timestamp, specs[index].timestamp)
794
795 return index
796
797 def associate_fixed_and_synthesized_specs(self, fixed_specs,
798 synthesized_specs):
799 # All fixed specs are snapshot of float specs. Theoretically, they
800 # should be identical to one of the synthesized specs.
801 # However, it's not always true for some reasons --- maybe due to race
802 # condition, maybe due to bugs of this bisect-kit.
803 # To overcome this glitch, we try to match them by similarity instead of
804 # exact match.
805 result = []
806 last_index = 0
807 for i, fixed_spec in enumerate(fixed_specs):
808 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
809 if matched_index is None:
810 if i in (0, len(fixed_specs) - 1):
811 logger.error('essential spec mismatch, unable to continue')
Kuang-che Wufe1e88a2019-09-10 21:52:25 +0800812 raise ValueError('Commit history analyze failed. '
813 'Bisector cannot deal with this version range.')
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800814 else:
815 logger.warning('%s do not match, skip', fixed_spec.name)
816 continue
817 result.append((i, matched_index))
818 last_index = matched_index
819
820 return result
821
822 def _create_make_up_actions(self, fixed_spec, synthesized):
823 timestamp = synthesized.timestamp
824 make_up = ActionGroup(
825 timestamp, comment='make up glitch for %s' % fixed_spec.name)
826 for path in set(fixed_spec.entries) & set(synthesized.entries):
827 if fixed_spec[path].at == synthesized[path].at:
828 continue
829 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
830 synthesized[path].at)
831 make_up.add(action)
832
833 if not make_up.actions:
834 return None
835 return make_up
836
837 def build_revlist(self, old, new):
838 """Build revlist.
839
840 Returns:
841 list of rev string
842 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800843 logger.info('build_revlist: old = %s, new = %s', old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800844 revlist = []
845
846 # step 1, find all float and fixed specs in the given range.
847 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800848 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800849 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800850 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800851 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
852 float_specs.pop()
853 assert float_specs
854 for spec in float_specs + fixed_specs:
855 self.spec_manager.parse_spec(spec)
856
857 # step 2, synthesize all fixed specs in the range from float specs.
858 specs = float_specs + [fixed_specs[-1]]
859 actions = []
860 logger.debug('len(specs)=%d', len(specs))
861 for i in range(len(specs) - 1):
862 prev_float = specs[i]
863 next_float = specs[i + 1]
864 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
865 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wu1ad2c0e2019-02-26 00:41:10 +0800866 actions += self.generate_actions_between_specs(prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800867 action_groups = self.reorder_actions(actions)
868
869 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
870 synthesized = [spec.copy()]
871 for action_group in action_groups:
872 spec.apply(action_group)
873 synthesized.append(spec.copy())
874
875 # step 3, associate fixed specs with synthesized specs.
876 associated_pairs = self.associate_fixed_and_synthesized_specs(
877 fixed_specs, synthesized)
878
879 # step 4, group actions and cache them
880 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
881 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
882 revlist.append(fixed_specs[fixed_index].name)
883 this_action_groups = []
884
885 # handle glitch
886 if fixed_specs[fixed_index].similar_score(
887 synthesized[synthesized_index]) != 0:
888 assert synthesized[synthesized_index].is_subset(
889 fixed_specs[fixed_index])
890 skipped = set(fixed_specs[fixed_index].entries) - set(
891 synthesized[synthesized_index].entries)
892 if skipped:
893 logger.warning(
894 'between %s and %s, '
895 'bisect-kit cannot analyze commit history of following paths:',
896 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
897 for path in sorted(skipped):
898 logger.warning(' %s', path)
899
900 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
901 synthesized[synthesized_index])
902 if make_up:
903 this_action_groups.append(make_up)
904
905 this_action_groups.extend(
906 action_groups[synthesized_index:next_synthesized_index])
907 for idx, ag in enumerate(this_action_groups, 1):
908 rev = make_intra_rev(fixed_specs[fixed_index].name,
909 fixed_specs[next_fixed_index].name, idx)
910 ag.name = rev
911 revlist.append(rev)
912
913 self.save_action_groups_between_releases(
914 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
915 this_action_groups)
916 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
917
918 return revlist
919
920 def save_action_groups_between_releases(self, old, new, action_groups):
921 data = [ag.serialize() for ag in action_groups]
922
923 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
924 if not os.path.exists(cache_dir):
925 os.makedirs(cache_dir)
926 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800927 with open(cache_filename, 'w') as fp:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800928 json.dump(data, fp, indent=4, sort_keys=True)
929
930 def load_action_groups_between_releases(self, old, new):
931 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
932 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
933 if not os.path.exists(cache_filename):
Kuang-che Wuce2f3be2019-10-28 19:44:54 +0800934 raise errors.InternalError(
935 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800936
937 result = []
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800938 for data in json.load(open(cache_filename)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800939 result.append(ActionGroup.unserialize(data))
940
941 return result
942
Kuang-che Wue80bb872018-11-15 19:45:25 +0800943 def get_rev_detail(self, rev):
944 rev_old, rev_new, index = parse_intra_rev(rev)
945 if rev_old == rev_new:
946 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800947
Kuang-che Wue80bb872018-11-15 19:45:25 +0800948 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
949 # Indexes inside intra_rev are 1 based.
950 action_group = action_groups[index - 1]
951 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800952
953 def switch(self, rev):
954 # easy case
955 if not re.match(_re_intra_rev, rev):
956 self.spec_manager.sync_disk_state(rev)
957 return
958
959 rev_old, rev_new, idx = parse_intra_rev(rev)
960 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
961 assert 0 <= idx <= len(action_groups)
962 action_groups = action_groups[:idx]
963
964 self.spec_manager.sync_disk_state(rev_old)
965
966 apply_actions(self.code_storage, action_groups, self.root_dir)