blob: 8af4b22bc3b89df777816cbce617301e3dbf553d [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800231 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800238 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800245 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800288 This models atomic actions, ex:
289 - repo added/removed in the same manifest commit
290 - commits appears at the same time due to repo add
291 - gerrit topic
292 - circular CQ-DEPEND (Cq-Depend)
293 Otherwise, one ActionGroup usually consists only one Action object.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800294 """
295
296 def __init__(self, timestamp, comment=None):
297 self.timestamp = timestamp
298 self.name = None
299 self.actions = []
300 self.comment = comment
301
302 def add(self, action):
303 self.actions.append(action)
304
305 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800306 return dict(
307 timestamp=self.timestamp,
308 name=self.name,
309 comment=self.comment,
310 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311
312 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800313 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800314 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800315 result['comment'] = self.comment
316 result['actions'] = [
317 action.summary(code_storage) for action in self.actions
318 ]
319 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800320
321 @staticmethod
322 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800323 ag = ActionGroup(data['timestamp'])
324 ag.name = data['name']
325 ag.comment = data['comment']
326 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800327 ag.add(unserialize_action(x))
328 return ag
329
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800330 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800331 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800332 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800333
334
335class GitCheckoutCommit(Action):
336 """Describes a git commit action.
337
338 Attributes:
339 repo_url: the corresponding url of git repo
340 rev: git commit to checkout
341 """
342
343 def __init__(self, timestamp, path, repo_url, rev):
344 super(GitCheckoutCommit, self).__init__(timestamp, path)
345 self.repo_url = repo_url
346 self.rev = rev
347
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800348 def apply(self, code_storage, root_dir):
349 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800350 git_repo = os.path.join(root_dir, self.path)
351 assert git_util.is_git_root(git_repo)
352 git_util.checkout_version(git_repo, self.rev)
353
354 def summary(self, code_storage):
355 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800357 commit_summary = git_util.get_commit_log(git_root,
358 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800359 except subprocess.CalledProcessError:
360 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
361 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800362 commit_summary = '(unknown)'
363 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
364 return dict(
365 timestamp=self.timestamp,
366 action_type='commit',
367 path=self.path,
368 commit_summary=commit_summary,
369 repo_url=self.repo_url,
370 rev=self.rev,
371 text=text,
372 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800373
374
375class GitAddRepo(Action):
376 """Describes a git repo add action.
377
378 Attributes:
379 repo_url: the corresponding url of git repo to add
380 rev: git commit to checkout
381 """
382
383 def __init__(self, timestamp, path, repo_url, rev):
384 super(GitAddRepo, self).__init__(timestamp, path)
385 self.repo_url = repo_url
386 self.rev = rev
387
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800388 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800389 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800390 if os.path.exists(git_repo):
391 if os.path.isdir(git_repo) and not os.listdir(git_repo):
392 # mimic gclient's behavior; don't panic
393 logger.warning(
394 'adding repo %s; there is already an empty directory; '
395 'assume it is okay', git_repo)
396 else:
397 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800398
399 reference = code_storage.cached_git_root(self.repo_url)
400 git_util.clone(git_repo, self.repo_url, reference=reference)
401 git_util.checkout_version(git_repo, self.rev)
402
403 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800404
405 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800406 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
407 return dict(
408 timestamp=self.timestamp,
409 action_type='add_repo',
410 path=self.path,
Kuang-che Wu356ecb92019-04-02 16:30:25 +0800411 repo_url=self.repo_url,
412 rev=self.rev,
Kuang-che Wue80bb872018-11-15 19:45:25 +0800413 text=text,
414 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800415
416
417class GitRemoveRepo(Action):
418 """Describes a git repo remove action."""
419
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800420 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800421 assert self.path
422 git_repo = os.path.join(root_dir, self.path)
423 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800424 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
425 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800426 shutil.rmtree(git_repo)
427
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800428 code_storage.remove_from_project_list(root_dir, self.path)
429
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800430 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800431 return dict(
432 timestamp=self.timestamp,
433 action_type='remove_repo',
434 path=self.path,
435 text='remove repo %s' % self.path,
436 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800437
438
439def apply_actions(code_storage, action_groups, root_dir):
440 # Speed optimization: only apply the last one of consecutive commits per
441 # repo. It is possible to optimize further, but need to take care git repo
442 # add/remove within another repo.
443 commits = {}
444
445 def batch_apply(commits):
Kuang-che Wu261174e2020-01-09 17:51:31 +0800446 for i, _, commit_action in sorted(commits.values(), key=lambda x: x[:2]):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800447 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800448 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800449
450 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800451 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800452 if not isinstance(action, GitCheckoutCommit):
453 break
454 else:
455 # If all actions are commits, defer them for batch processing.
Kuang-che Wu261174e2020-01-09 17:51:31 +0800456 for j, action in enumerate(action_group.actions):
457 commits[action.path] = (i, j, action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800458 continue
459
460 batch_apply(commits)
461 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800462 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800463
464 batch_apply(commits)
465
466
467class SpecManager(object):
468 """Spec related abstract operations.
469
470 This class enumerates Spec instances and switch disk state to Spec.
471
472 In other words, this class abstracts:
473 - discovery of gclient's DEPS and repo's manifest
474 - gclient sync and repo sync
475 """
476
477 def collect_float_spec(self, old, new):
478 """Collects float Spec between two versions.
479
480 This method may fetch spec from network. However, it should not switch tree
481 version state.
482 """
483 raise NotImplementedError
484
485 def collect_fixed_spec(self, old, new):
486 """Collects fixed Spec between two versions.
487
488 This method may fetch spec from network. However, it should not switch tree
489 version state.
490 """
491 raise NotImplementedError
492
493 def parse_spec(self, spec):
494 """Parses information for Spec object.
495
496 Args:
497 spec: Spec object. It specifies what to parse and the parsed information
498 is stored inside.
499 """
500 raise NotImplementedError
501
502 def sync_disk_state(self, rev):
503 """Switch source tree state to given version."""
504 raise NotImplementedError
505
506
507class CodeStorage(object):
508 """Query code history and commit relationship without checkout.
509
510 Because paths inside source tree may be deleted or map to different remote
511 repo in different versions, we cannot query git information of one version
512 but the tree state is at another version. In order to query information
513 without changing tree state and fast, we need out of tree source code
514 storage.
515
516 This class assumes all git repos are mirrored somewhere on local disk.
517 Subclasses just need to implement cached_git_root() which returns the
518 location.
519
520 In other words, this class abstracts operations upon gclient's cache-dir
521 repo's mirror.
522 """
523
524 def cached_git_root(self, repo_url):
525 """The cached path of given remote git repo.
526
527 Args:
528 repo_url: URL of git remote repo
529
530 Returns:
531 path of cache folder
532 """
533 raise NotImplementedError
534
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800535 def add_to_project_list(self, project_root, path, repo_url):
536 raise NotImplementedError
537
538 def remove_from_project_list(self, project_root, path):
539 raise NotImplementedError
540
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800541 def is_ancestor_commit(self, spec, path, old, new):
542 """Determine one commit is ancestor of another.
543
544 Args:
545 spec: Spec object
546 path: local path relative to project root
547 old: commit id
548 new: commit id
549
550 Returns:
551 True if `old` is ancestor of `new`
552 """
553 git_root = self.cached_git_root(spec[path].repo_url)
554 return git_util.is_ancestor_commit(git_root, old, new)
555
556 def get_rev_by_time(self, spec, path, timestamp):
557 """Get commit hash of given spec by time.
558
559 Args:
560 spec: Spec object
561 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800562 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800563
564 Returns:
565 The commit hash of given time. If there are commits with the given
566 timestamp, returns the last commit.
567 """
568 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800569 # spec[path].at is remote reference name. Since git_root is a mirror (not
570 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800571 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800572
573 def get_actions_between_two_commit(self, spec, path, old, new):
574 git_root = self.cached_git_root(spec[path].repo_url)
575 result = []
576 for timestamp, git_rev in git_util.list_commits_between_commits(
577 git_root, old, new):
578 result.append(
579 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
580 return result
581
582 def is_containing_commit(self, spec, path, rev):
583 git_root = self.cached_git_root(spec[path].repo_url)
584 return git_util.is_containing_commit(git_root, rev)
585
586 def are_spec_commits_available(self, spec):
587 for path, path_spec in spec.entries.items():
588 if not path_spec.is_static():
589 continue
590 if not self.is_containing_commit(spec, path, path_spec.at):
591 return False
592 return True
593
594
595class CodeManager(object):
596 """Class to reconstruct historical source tree state.
597
598 This class can reconstruct all moments of source tree state and diffs between
599 them.
600
601 Attributes:
602 root_dir: root path of project source tree
603 spec_manager: SpecManager object
604 code_storage: CodeStorage object
605 """
606
607 def __init__(self, root_dir, spec_manager, code_storage):
608 self.root_dir = root_dir
609 self.spec_manager = spec_manager
610 self.code_storage = code_storage
611
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800612 def generate_action_groups_between_specs(self, prev_float, next_float):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800613 """Generates actions between two float specs.
614
615 Args:
616 prev_float: start of spec object (exclusive)
617 next_float: end of spec object (inclusive)
618
619 Returns:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800620 list of ActionGroup object (ordered)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800621 """
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800622 groups = []
623 last_group = ActionGroup(next_float.timestamp)
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800624 is_removed = set()
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800625 # Sort alphabetically, so parent directories are handled before children
626 # directories.
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800627 for path in sorted(set(prev_float.entries) | set(next_float.entries)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800628
629 # Add repo
630 if path not in prev_float:
631 if next_float[path].is_static():
632 next_at = next_float[path].at
633 else:
634 next_at = self.code_storage.get_rev_by_time(next_float, path,
635 next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800636 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800637 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
638 next_at))
639 continue
640
641 # Existing path is floating, enumerates commits until next spec.
642 #
643 # prev_at till_at
644 # prev branch ---> o --------> o --------> o --------> o --------> ...
645 # ^ ^
646 # prev_float.timestamp next_float.timestamp
647 if not prev_float[path].is_static():
648 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
649 prev_float.timestamp)
650 till_at = self.code_storage.get_rev_by_time(prev_float, path,
651 next_float.timestamp)
652
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800653 actions = self.code_storage.get_actions_between_two_commit(
654 prev_float, path, prev_at, till_at)
655
656 # Assume commits with the same timestamp as manifest/DEPS change are
657 # atomic.
658 if actions and actions[-1].timestamp == next_float.timestamp:
659 last_group.add(actions.pop())
660
661 for action in actions:
662 group = ActionGroup(action.timestamp)
663 group.add(action)
664 groups.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800665 else:
666 prev_at = till_at = prev_float[path].at
667
668 # At next_float.timestamp.
669 if path not in next_float:
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800670 if path in is_removed:
671 continue
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800672 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800673 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800674 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
Kuang-che Wucbe12432019-03-18 19:35:03 +0800675 # Remove deeper repo first
676 for path2 in sorted(sub_repos, reverse=True):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800677 last_group.add(GitRemoveRepo(next_float.timestamp, path2))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800678 is_removed.add(path2)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800679 last_group.add(GitRemoveRepo(next_float.timestamp, path))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800680 is_removed.add(path)
Kuang-che Wucbe12432019-03-18 19:35:03 +0800681 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800682 last_group.add(
Kuang-che Wucbe12432019-03-18 19:35:03 +0800683 GitAddRepo(next_float.timestamp, path2,
684 next_float[path2].repo_url, prev_float[path2].at))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800685
686 elif next_float[path].is_static():
687 # pinned to certain commit on different branch
688 next_at = next_float[path].at
689
690 elif next_float[path].at == prev_float[path].at:
691 # keep floating on the same branch
692 next_at = till_at
693
694 else:
695 # switch to another branch
696 # prev_at till_at
697 # prev branch ---> o --------> o --------> o --------> o --------> ...
698 #
699 # next_at
700 # next branch ...... o ------> o --------> o -----> ...
701 # ^ ^
702 # prev_float.timestamp next_float.timestamp
703 next_at = self.code_storage.get_rev_by_time(next_float, path,
704 next_float.timestamp)
705
706 if next_at and next_at != till_at:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800707 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800708 GitCheckoutCommit(next_float.timestamp, path,
709 next_float[path].repo_url, next_at))
710
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800711 groups.sort(key=lambda x: x.timestamp)
712 if last_group.actions:
713 groups.append(last_group)
714 return groups
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800715
716 def synthesize_fixed_spec(self, float_spec, timestamp):
717 """Synthesizes fixed spec from float spec of given time.
718
719 Args:
720 float_spec: the float spec
721 timestamp: snapshot time
722
723 Returns:
724 Spec object
725 """
726 result = {}
727 for path, path_spec in float_spec.entries.items():
728 if not path_spec.is_static():
729 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
730 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
731
732 result[path] = copy.deepcopy(path_spec)
733
734 name = '%s@%s' % (float_spec.path, timestamp)
735 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
736
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800737 def match_spec(self, target, specs, start_index=0):
738 threshold = 3600
739 # ideal_index is the index of last spec before target
740 # begin and end are the range of indexes within threshold (inclusive)
741 ideal_index = None
742 begin, end = None, None
743 for i, spec in enumerate(specs[start_index:], start_index):
744 if spec.timestamp <= target.timestamp:
745 ideal_index = i
746 if abs(spec.timestamp - target.timestamp) < threshold:
747 if begin is None:
748 begin = i
749 end = i
750
751 candidates = []
752 if ideal_index is not None:
753 candidates.append(ideal_index)
754 if begin is not None:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800755 candidates.extend(list(range(begin, end + 1)))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800756 if not candidates:
757 logger.error('unable to match %s: all specs are after it', target.name)
758 return None
759
760 compatible_candidates = [
761 i for i in candidates if specs[i].is_subset(target)
762 ]
763 if not compatible_candidates:
764 logger.error('unable to match %s: no compatible specs', target.name)
765 spec = specs[candidates[0]]
766 target.diff(spec)
767 return None
768
769 scores = []
770 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800771 # Tie-break: prefer earlier timestamp and smaller difference.
772 if specs[i].timestamp <= target.timestamp:
773 timediff = 0, target.timestamp - specs[i].timestamp
774 else:
775 timediff = 1, specs[i].timestamp - target.timestamp
776 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800777 scores.sort()
778
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800779 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800780 if score != 0:
781 logger.warning('not exactly match (score=%s): %s', score, target.name)
782 target.diff(specs[index])
783
784 if index < ideal_index:
785 logger.warning(
786 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
787 target.name, target.timestamp, specs[index].timestamp,
788 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800789 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800790 if index > ideal_index:
791 logger.warning(
792 'spec committed at %d matched later commit at %d. bad server clock?',
793 target.timestamp, specs[index].timestamp)
794
795 return index
796
797 def associate_fixed_and_synthesized_specs(self, fixed_specs,
798 synthesized_specs):
799 # All fixed specs are snapshot of float specs. Theoretically, they
800 # should be identical to one of the synthesized specs.
801 # However, it's not always true for some reasons --- maybe due to race
802 # condition, maybe due to bugs of this bisect-kit.
803 # To overcome this glitch, we try to match them by similarity instead of
804 # exact match.
805 result = []
806 last_index = 0
807 for i, fixed_spec in enumerate(fixed_specs):
808 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
809 if matched_index is None:
810 if i in (0, len(fixed_specs) - 1):
811 logger.error('essential spec mismatch, unable to continue')
Kuang-che Wufe1e88a2019-09-10 21:52:25 +0800812 raise ValueError('Commit history analyze failed. '
813 'Bisector cannot deal with this version range.')
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800814 else:
815 logger.warning('%s do not match, skip', fixed_spec.name)
816 continue
817 result.append((i, matched_index))
818 last_index = matched_index
819
820 return result
821
822 def _create_make_up_actions(self, fixed_spec, synthesized):
823 timestamp = synthesized.timestamp
824 make_up = ActionGroup(
825 timestamp, comment='make up glitch for %s' % fixed_spec.name)
826 for path in set(fixed_spec.entries) & set(synthesized.entries):
827 if fixed_spec[path].at == synthesized[path].at:
828 continue
829 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
830 synthesized[path].at)
831 make_up.add(action)
832
833 if not make_up.actions:
834 return None
835 return make_up
836
837 def build_revlist(self, old, new):
838 """Build revlist.
839
840 Returns:
841 list of rev string
842 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800843 logger.info('build_revlist: old = %s, new = %s', old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800844 revlist = []
845
846 # step 1, find all float and fixed specs in the given range.
847 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800848 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800849 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800850 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800851 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
852 float_specs.pop()
853 assert float_specs
854 for spec in float_specs + fixed_specs:
855 self.spec_manager.parse_spec(spec)
856
857 # step 2, synthesize all fixed specs in the range from float specs.
858 specs = float_specs + [fixed_specs[-1]]
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800859 action_groups = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800860 logger.debug('len(specs)=%d', len(specs))
861 for i in range(len(specs) - 1):
862 prev_float = specs[i]
863 next_float = specs[i + 1]
864 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
865 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800866 action_groups += self.generate_action_groups_between_specs(
867 prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800868
869 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
870 synthesized = [spec.copy()]
871 for action_group in action_groups:
872 spec.apply(action_group)
873 synthesized.append(spec.copy())
874
875 # step 3, associate fixed specs with synthesized specs.
876 associated_pairs = self.associate_fixed_and_synthesized_specs(
877 fixed_specs, synthesized)
878
879 # step 4, group actions and cache them
880 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
881 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
882 revlist.append(fixed_specs[fixed_index].name)
883 this_action_groups = []
884
885 # handle glitch
886 if fixed_specs[fixed_index].similar_score(
887 synthesized[synthesized_index]) != 0:
888 assert synthesized[synthesized_index].is_subset(
889 fixed_specs[fixed_index])
890 skipped = set(fixed_specs[fixed_index].entries) - set(
891 synthesized[synthesized_index].entries)
892 if skipped:
893 logger.warning(
894 'between %s and %s, '
895 'bisect-kit cannot analyze commit history of following paths:',
896 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
897 for path in sorted(skipped):
898 logger.warning(' %s', path)
899
900 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
901 synthesized[synthesized_index])
902 if make_up:
903 this_action_groups.append(make_up)
904
905 this_action_groups.extend(
906 action_groups[synthesized_index:next_synthesized_index])
907 for idx, ag in enumerate(this_action_groups, 1):
908 rev = make_intra_rev(fixed_specs[fixed_index].name,
909 fixed_specs[next_fixed_index].name, idx)
910 ag.name = rev
911 revlist.append(rev)
912
913 self.save_action_groups_between_releases(
914 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
915 this_action_groups)
916 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
917
918 return revlist
919
920 def save_action_groups_between_releases(self, old, new, action_groups):
921 data = [ag.serialize() for ag in action_groups]
922
923 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
924 if not os.path.exists(cache_dir):
925 os.makedirs(cache_dir)
926 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800927 with open(cache_filename, 'w') as fp:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800928 json.dump(data, fp, indent=4, sort_keys=True)
929
930 def load_action_groups_between_releases(self, old, new):
931 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
932 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
933 if not os.path.exists(cache_filename):
Kuang-che Wuce2f3be2019-10-28 19:44:54 +0800934 raise errors.InternalError(
935 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800936
937 result = []
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800938 for data in json.load(open(cache_filename)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800939 result.append(ActionGroup.unserialize(data))
940
941 return result
942
Kuang-che Wue80bb872018-11-15 19:45:25 +0800943 def get_rev_detail(self, rev):
944 rev_old, rev_new, index = parse_intra_rev(rev)
945 if rev_old == rev_new:
946 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800947
Kuang-che Wue80bb872018-11-15 19:45:25 +0800948 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
949 # Indexes inside intra_rev are 1 based.
950 action_group = action_groups[index - 1]
951 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800952
953 def switch(self, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800954 rev_old, action_groups = self.get_intra_and_diff(rev)
955 self.spec_manager.sync_disk_state(rev_old)
956 apply_actions(self.code_storage, action_groups, self.root_dir)
957
958 def get_intra_and_diff(self, rev):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800959 # easy case
960 if not re.match(_re_intra_rev, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800961 return rev, []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800962
963 rev_old, rev_new, idx = parse_intra_rev(rev)
964 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
965 assert 0 <= idx <= len(action_groups)
966 action_groups = action_groups[:idx]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800967 return rev_old, action_groups