blob: 3e4f3a10bb8c6fa5bba26c3acc63143f04e70ced [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800163 revision: a commit id of manifest-internal indicates the manifest revision,
164 this argument is not used in DEPS.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800165 """
166
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800167 def __init__(self,
168 spec_type,
169 name,
170 timestamp,
171 path,
172 entries=None,
173 revision=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800174 self.spec_type = spec_type
175 self.name = name
176 self.timestamp = timestamp
177 self.path = path
178 self.entries = entries
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800179 self.revision = revision
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800180
181 def copy(self):
182 return copy.deepcopy(self)
183
184 def similar_score(self, rhs):
185 """Calculates similar score to another Spec.
186
187 Returns:
188 score of similarity. Smaller value is more similar.
189 """
190 score = 0
191 for path in set(self.entries) & set(rhs.entries):
192 if rhs[path] == self[path]:
193 continue
194 if rhs[path].at == self[path].at:
195 # it's often that remote repo moved around but should be treated as the
196 # same one
197 score += 0.1
198 else:
199 score += 1
200 score += len(set(self.entries) ^ set(rhs.entries))
201 return score
202
203 def is_static(self):
204 return all(path_spec.is_static() for path_spec in self.entries.values())
205
206 def is_subset(self, rhs):
207 return set(self.entries.keys()) <= set(rhs.entries.keys())
208
209 def __getitem__(self, path):
210 return self.entries[path]
211
212 def __contains__(self, path):
213 return path in self.entries
214
215 def apply(self, action_group):
216 self.timestamp = action_group.timestamp
217 self.name = '(%s)' % self.timestamp
218 for action in action_group.actions:
219 if isinstance(action, GitAddRepo):
220 self.entries[action.path] = PathSpec(action.path, action.repo_url,
221 action.rev)
222 elif isinstance(action, GitCheckoutCommit):
223 self.entries[action.path].at = action.rev
224 elif isinstance(action, GitRemoveRepo):
225 del self.entries[action.path]
226 else:
227 assert 0, 'unknown action: %s' % action.__class__.__name__
228
229 def dump(self):
230 # for debugging
231 print(self.name, self.path, self.timestamp)
232 print('size', len(self.entries))
233 for path, path_spec in sorted(self.entries.items()):
234 print(path, path_spec.at)
235
236 def diff(self, rhs):
237 logger.info('diff between %s and %s', self.name, rhs.name)
238 expect = set(self.entries)
239 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800240 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800241 for path in sorted(expect - actual):
242 logger.info('-%s', path)
243 for path in sorted(actual - expect):
244 logger.info('+%s', path)
245 for path in sorted(expect & actual):
246 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800247 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800248 continue
249 if self[path].at != rhs[path].at:
250 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
251 if self[path].repo_url != rhs[path].repo_url:
252 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
253 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800254 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800255
256
257class Action(object):
258 """Actions describe changes from one Spec to another.
259
260 Attributes:
261 timestamp: action time
262 path: action path, which is relative to project root
263 """
264
265 def __init__(self, timestamp, path):
266 self.timestamp = timestamp
267 self.path = path
268
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800269 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800270 raise NotImplementedError
271
272 def summary(self, _code_storage):
273 raise NotImplementedError
274
275 def __eq__(self, rhs):
276 return self.__dict__ == rhs.__dict__
277
278 def serialize(self):
279 return self.__class__.__name__, self.__dict__
280
281
282def unserialize_action(data):
283 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
284 class_name, values = data
285 assert class_name in [cls.__name__ for cls in classes
286 ], 'unknown action class: %s' % class_name
287 for cls in classes:
288 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800289 action = cls(**values)
290 break
291 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800292
293
294class ActionGroup(object):
295 """Atomic group of Action objects
296
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800297 This models atomic actions, ex:
298 - repo added/removed in the same manifest commit
299 - commits appears at the same time due to repo add
300 - gerrit topic
301 - circular CQ-DEPEND (Cq-Depend)
302 Otherwise, one ActionGroup usually consists only one Action object.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800303 """
304
305 def __init__(self, timestamp, comment=None):
306 self.timestamp = timestamp
307 self.name = None
308 self.actions = []
309 self.comment = comment
310
311 def add(self, action):
312 self.actions.append(action)
313
314 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800315 return dict(
316 timestamp=self.timestamp,
317 name=self.name,
318 comment=self.comment,
319 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800320
321 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800322 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800324 result['comment'] = self.comment
325 result['actions'] = [
326 action.summary(code_storage) for action in self.actions
327 ]
328 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800329
330 @staticmethod
331 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800332 ag = ActionGroup(data['timestamp'])
333 ag.name = data['name']
334 ag.comment = data['comment']
335 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800336 ag.add(unserialize_action(x))
337 return ag
338
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800339 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800340 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800341 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800342
343
344class GitCheckoutCommit(Action):
345 """Describes a git commit action.
346
347 Attributes:
348 repo_url: the corresponding url of git repo
349 rev: git commit to checkout
350 """
351
352 def __init__(self, timestamp, path, repo_url, rev):
353 super(GitCheckoutCommit, self).__init__(timestamp, path)
354 self.repo_url = repo_url
355 self.rev = rev
356
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800357 def apply(self, code_storage, root_dir):
358 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800359 git_repo = os.path.join(root_dir, self.path)
360 assert git_util.is_git_root(git_repo)
361 git_util.checkout_version(git_repo, self.rev)
362
363 def summary(self, code_storage):
364 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800365 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800366 commit_summary = git_util.get_commit_log(git_root,
367 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800368 except subprocess.CalledProcessError:
369 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
370 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800371 commit_summary = '(unknown)'
372 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
373 return dict(
374 timestamp=self.timestamp,
375 action_type='commit',
376 path=self.path,
377 commit_summary=commit_summary,
378 repo_url=self.repo_url,
379 rev=self.rev,
380 text=text,
381 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800382
383
384class GitAddRepo(Action):
385 """Describes a git repo add action.
386
387 Attributes:
388 repo_url: the corresponding url of git repo to add
389 rev: git commit to checkout
390 """
391
392 def __init__(self, timestamp, path, repo_url, rev):
393 super(GitAddRepo, self).__init__(timestamp, path)
394 self.repo_url = repo_url
395 self.rev = rev
396
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800397 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800398 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800399 if os.path.exists(git_repo):
400 if os.path.isdir(git_repo) and not os.listdir(git_repo):
401 # mimic gclient's behavior; don't panic
402 logger.warning(
403 'adding repo %s; there is already an empty directory; '
404 'assume it is okay', git_repo)
405 else:
406 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800407
408 reference = code_storage.cached_git_root(self.repo_url)
409 git_util.clone(git_repo, self.repo_url, reference=reference)
410 git_util.checkout_version(git_repo, self.rev)
411
412 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800413
414 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800415 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
416 return dict(
417 timestamp=self.timestamp,
418 action_type='add_repo',
419 path=self.path,
Kuang-che Wu356ecb92019-04-02 16:30:25 +0800420 repo_url=self.repo_url,
421 rev=self.rev,
Kuang-che Wue80bb872018-11-15 19:45:25 +0800422 text=text,
423 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800424
425
426class GitRemoveRepo(Action):
427 """Describes a git repo remove action."""
428
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800429 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800430 assert self.path
431 git_repo = os.path.join(root_dir, self.path)
432 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800433 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
434 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800435 shutil.rmtree(git_repo)
436
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800437 code_storage.remove_from_project_list(root_dir, self.path)
438
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800439 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800440 return dict(
441 timestamp=self.timestamp,
442 action_type='remove_repo',
443 path=self.path,
444 text='remove repo %s' % self.path,
445 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800446
447
448def apply_actions(code_storage, action_groups, root_dir):
449 # Speed optimization: only apply the last one of consecutive commits per
450 # repo. It is possible to optimize further, but need to take care git repo
451 # add/remove within another repo.
452 commits = {}
453
454 def batch_apply(commits):
Kuang-che Wu261174e2020-01-09 17:51:31 +0800455 for i, _, commit_action in sorted(commits.values(), key=lambda x: x[:2]):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800456 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800457 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800458
459 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800460 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800461 if not isinstance(action, GitCheckoutCommit):
462 break
463 else:
464 # If all actions are commits, defer them for batch processing.
Kuang-che Wu261174e2020-01-09 17:51:31 +0800465 for j, action in enumerate(action_group.actions):
466 commits[action.path] = (i, j, action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800467 continue
468
469 batch_apply(commits)
470 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800471 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800472
473 batch_apply(commits)
474
475
476class SpecManager(object):
477 """Spec related abstract operations.
478
479 This class enumerates Spec instances and switch disk state to Spec.
480
481 In other words, this class abstracts:
482 - discovery of gclient's DEPS and repo's manifest
483 - gclient sync and repo sync
484 """
485
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800486 def collect_float_spec(self, old, new, fixed_specs=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800487 """Collects float Spec between two versions.
488
489 This method may fetch spec from network. However, it should not switch tree
490 version state.
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800491
492 Args:
493 old: old version
494 new: new version
495 fixed_specs: fixed specs from collect_fixed_spec(old, new) for Chrome OS
496 or None for others
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800497 """
498 raise NotImplementedError
499
500 def collect_fixed_spec(self, old, new):
501 """Collects fixed Spec between two versions.
502
503 This method may fetch spec from network. However, it should not switch tree
504 version state.
505 """
506 raise NotImplementedError
507
508 def parse_spec(self, spec):
509 """Parses information for Spec object.
510
511 Args:
512 spec: Spec object. It specifies what to parse and the parsed information
513 is stored inside.
514 """
515 raise NotImplementedError
516
517 def sync_disk_state(self, rev):
518 """Switch source tree state to given version."""
519 raise NotImplementedError
520
521
522class CodeStorage(object):
523 """Query code history and commit relationship without checkout.
524
525 Because paths inside source tree may be deleted or map to different remote
526 repo in different versions, we cannot query git information of one version
527 but the tree state is at another version. In order to query information
528 without changing tree state and fast, we need out of tree source code
529 storage.
530
531 This class assumes all git repos are mirrored somewhere on local disk.
532 Subclasses just need to implement cached_git_root() which returns the
533 location.
534
535 In other words, this class abstracts operations upon gclient's cache-dir
536 repo's mirror.
537 """
538
539 def cached_git_root(self, repo_url):
540 """The cached path of given remote git repo.
541
542 Args:
543 repo_url: URL of git remote repo
544
545 Returns:
546 path of cache folder
547 """
548 raise NotImplementedError
549
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800550 def add_to_project_list(self, project_root, path, repo_url):
551 raise NotImplementedError
552
553 def remove_from_project_list(self, project_root, path):
554 raise NotImplementedError
555
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800556 def is_ancestor_commit(self, spec, path, old, new):
557 """Determine one commit is ancestor of another.
558
559 Args:
560 spec: Spec object
561 path: local path relative to project root
562 old: commit id
563 new: commit id
564
565 Returns:
566 True if `old` is ancestor of `new`
567 """
568 git_root = self.cached_git_root(spec[path].repo_url)
569 return git_util.is_ancestor_commit(git_root, old, new)
570
571 def get_rev_by_time(self, spec, path, timestamp):
572 """Get commit hash of given spec by time.
573
574 Args:
575 spec: Spec object
576 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800577 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800578
579 Returns:
580 The commit hash of given time. If there are commits with the given
581 timestamp, returns the last commit.
582 """
583 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800584 # spec[path].at is remote reference name. Since git_root is a mirror (not
585 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800586 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800587
588 def get_actions_between_two_commit(self, spec, path, old, new):
589 git_root = self.cached_git_root(spec[path].repo_url)
590 result = []
591 for timestamp, git_rev in git_util.list_commits_between_commits(
592 git_root, old, new):
593 result.append(
594 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
595 return result
596
597 def is_containing_commit(self, spec, path, rev):
598 git_root = self.cached_git_root(spec[path].repo_url)
599 return git_util.is_containing_commit(git_root, rev)
600
601 def are_spec_commits_available(self, spec):
602 for path, path_spec in spec.entries.items():
603 if not path_spec.is_static():
604 continue
605 if not self.is_containing_commit(spec, path, path_spec.at):
606 return False
607 return True
608
609
610class CodeManager(object):
611 """Class to reconstruct historical source tree state.
612
613 This class can reconstruct all moments of source tree state and diffs between
614 them.
615
616 Attributes:
617 root_dir: root path of project source tree
618 spec_manager: SpecManager object
619 code_storage: CodeStorage object
620 """
621
622 def __init__(self, root_dir, spec_manager, code_storage):
623 self.root_dir = root_dir
624 self.spec_manager = spec_manager
625 self.code_storage = code_storage
626
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800627 def generate_action_groups_between_specs(self, prev_float, next_float):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800628 """Generates actions between two float specs.
629
630 Args:
631 prev_float: start of spec object (exclusive)
632 next_float: end of spec object (inclusive)
633
634 Returns:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800635 list of ActionGroup object (ordered)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800636 """
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800637 groups = []
638 last_group = ActionGroup(next_float.timestamp)
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800639 is_removed = set()
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800640 # Sort alphabetically, so parent directories are handled before children
641 # directories.
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800642 for path in sorted(set(prev_float.entries) | set(next_float.entries)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800643
644 # Add repo
645 if path not in prev_float:
646 if next_float[path].is_static():
647 next_at = next_float[path].at
648 else:
649 next_at = self.code_storage.get_rev_by_time(next_float, path,
650 next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800651 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800652 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
653 next_at))
654 continue
655
656 # Existing path is floating, enumerates commits until next spec.
657 #
658 # prev_at till_at
659 # prev branch ---> o --------> o --------> o --------> o --------> ...
660 # ^ ^
661 # prev_float.timestamp next_float.timestamp
662 if not prev_float[path].is_static():
663 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
664 prev_float.timestamp)
665 till_at = self.code_storage.get_rev_by_time(prev_float, path,
666 next_float.timestamp)
667
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800668 actions = self.code_storage.get_actions_between_two_commit(
669 prev_float, path, prev_at, till_at)
670
671 # Assume commits with the same timestamp as manifest/DEPS change are
672 # atomic.
673 if actions and actions[-1].timestamp == next_float.timestamp:
674 last_group.add(actions.pop())
675
676 for action in actions:
677 group = ActionGroup(action.timestamp)
678 group.add(action)
679 groups.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800680 else:
681 prev_at = till_at = prev_float[path].at
682
683 # At next_float.timestamp.
684 if path not in next_float:
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800685 if path in is_removed:
686 continue
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800687 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800688 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800689 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
Kuang-che Wucbe12432019-03-18 19:35:03 +0800690 # Remove deeper repo first
691 for path2 in sorted(sub_repos, reverse=True):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800692 last_group.add(GitRemoveRepo(next_float.timestamp, path2))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800693 is_removed.add(path2)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800694 last_group.add(GitRemoveRepo(next_float.timestamp, path))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800695 is_removed.add(path)
Kuang-che Wucbe12432019-03-18 19:35:03 +0800696 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800697 last_group.add(
Kuang-che Wucbe12432019-03-18 19:35:03 +0800698 GitAddRepo(next_float.timestamp, path2,
699 next_float[path2].repo_url, prev_float[path2].at))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800700
701 elif next_float[path].is_static():
702 # pinned to certain commit on different branch
703 next_at = next_float[path].at
704
705 elif next_float[path].at == prev_float[path].at:
706 # keep floating on the same branch
707 next_at = till_at
708
709 else:
710 # switch to another branch
711 # prev_at till_at
712 # prev branch ---> o --------> o --------> o --------> o --------> ...
713 #
714 # next_at
715 # next branch ...... o ------> o --------> o -----> ...
716 # ^ ^
717 # prev_float.timestamp next_float.timestamp
718 next_at = self.code_storage.get_rev_by_time(next_float, path,
719 next_float.timestamp)
720
721 if next_at and next_at != till_at:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800722 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800723 GitCheckoutCommit(next_float.timestamp, path,
724 next_float[path].repo_url, next_at))
725
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800726 groups.sort(key=lambda x: x.timestamp)
727 if last_group.actions:
728 groups.append(last_group)
729 return groups
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800730
731 def synthesize_fixed_spec(self, float_spec, timestamp):
732 """Synthesizes fixed spec from float spec of given time.
733
734 Args:
735 float_spec: the float spec
736 timestamp: snapshot time
737
738 Returns:
739 Spec object
740 """
741 result = {}
742 for path, path_spec in float_spec.entries.items():
743 if not path_spec.is_static():
744 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
745 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
746
747 result[path] = copy.deepcopy(path_spec)
748
749 name = '%s@%s' % (float_spec.path, timestamp)
750 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
751
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800752 def match_spec(self, target, specs, start_index=0):
753 threshold = 3600
754 # ideal_index is the index of last spec before target
755 # begin and end are the range of indexes within threshold (inclusive)
756 ideal_index = None
757 begin, end = None, None
758 for i, spec in enumerate(specs[start_index:], start_index):
759 if spec.timestamp <= target.timestamp:
760 ideal_index = i
761 if abs(spec.timestamp - target.timestamp) < threshold:
762 if begin is None:
763 begin = i
764 end = i
765
766 candidates = []
767 if ideal_index is not None:
768 candidates.append(ideal_index)
769 if begin is not None:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800770 candidates.extend(list(range(begin, end + 1)))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800771 if not candidates:
772 logger.error('unable to match %s: all specs are after it', target.name)
773 return None
774
775 compatible_candidates = [
776 i for i in candidates if specs[i].is_subset(target)
777 ]
778 if not compatible_candidates:
779 logger.error('unable to match %s: no compatible specs', target.name)
780 spec = specs[candidates[0]]
781 target.diff(spec)
782 return None
783
784 scores = []
785 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800786 # Tie-break: prefer earlier timestamp and smaller difference.
787 if specs[i].timestamp <= target.timestamp:
788 timediff = 0, target.timestamp - specs[i].timestamp
789 else:
790 timediff = 1, specs[i].timestamp - target.timestamp
791 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800792 scores.sort()
793
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800794 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800795 if score != 0:
796 logger.warning('not exactly match (score=%s): %s', score, target.name)
797 target.diff(specs[index])
798
799 if index < ideal_index:
800 logger.warning(
801 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
802 target.name, target.timestamp, specs[index].timestamp,
803 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800804 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800805 if index > ideal_index:
806 logger.warning(
807 'spec committed at %d matched later commit at %d. bad server clock?',
808 target.timestamp, specs[index].timestamp)
809
810 return index
811
812 def associate_fixed_and_synthesized_specs(self, fixed_specs,
813 synthesized_specs):
814 # All fixed specs are snapshot of float specs. Theoretically, they
815 # should be identical to one of the synthesized specs.
816 # However, it's not always true for some reasons --- maybe due to race
817 # condition, maybe due to bugs of this bisect-kit.
818 # To overcome this glitch, we try to match them by similarity instead of
819 # exact match.
820 result = []
821 last_index = 0
822 for i, fixed_spec in enumerate(fixed_specs):
823 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
824 if matched_index is None:
825 if i in (0, len(fixed_specs) - 1):
826 logger.error('essential spec mismatch, unable to continue')
Kuang-che Wufe1e88a2019-09-10 21:52:25 +0800827 raise ValueError('Commit history analyze failed. '
828 'Bisector cannot deal with this version range.')
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800829 else:
830 logger.warning('%s do not match, skip', fixed_spec.name)
831 continue
832 result.append((i, matched_index))
833 last_index = matched_index
834
835 return result
836
837 def _create_make_up_actions(self, fixed_spec, synthesized):
838 timestamp = synthesized.timestamp
839 make_up = ActionGroup(
840 timestamp, comment='make up glitch for %s' % fixed_spec.name)
841 for path in set(fixed_spec.entries) & set(synthesized.entries):
842 if fixed_spec[path].at == synthesized[path].at:
843 continue
844 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
845 synthesized[path].at)
846 make_up.add(action)
847
848 if not make_up.actions:
849 return None
850 return make_up
851
852 def build_revlist(self, old, new):
853 """Build revlist.
854
855 Returns:
856 list of rev string
857 """
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800858 _, _, revlist = self.get_specs_and_revlist(old, new)
859 return revlist
860
861 def get_specs_and_revlist(self, old, new):
862 """Build revlist.
863
864 Returns:
865 (parsed fixed_specs, parsed float_specs, list of rev string)
866 """
867 logger.info('get_specs_and_revlist: old = %s, new = %s', old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800868 revlist = []
869
870 # step 1, find all float and fixed specs in the given range.
871 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800872 assert fixed_specs
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800873 for spec in fixed_specs:
874 self.spec_manager.parse_spec(spec)
875
876 float_specs = self.spec_manager.collect_float_spec(old, new, fixed_specs)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800877 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800878 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
879 float_specs.pop()
880 assert float_specs
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800881 for spec in float_specs:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800882 self.spec_manager.parse_spec(spec)
883
884 # step 2, synthesize all fixed specs in the range from float specs.
885 specs = float_specs + [fixed_specs[-1]]
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800886 action_groups = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800887 logger.debug('len(specs)=%d', len(specs))
888 for i in range(len(specs) - 1):
889 prev_float = specs[i]
890 next_float = specs[i + 1]
891 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
892 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800893 action_groups += self.generate_action_groups_between_specs(
894 prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800895
896 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
897 synthesized = [spec.copy()]
898 for action_group in action_groups:
899 spec.apply(action_group)
900 synthesized.append(spec.copy())
901
902 # step 3, associate fixed specs with synthesized specs.
903 associated_pairs = self.associate_fixed_and_synthesized_specs(
904 fixed_specs, synthesized)
905
906 # step 4, group actions and cache them
907 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
908 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
909 revlist.append(fixed_specs[fixed_index].name)
910 this_action_groups = []
911
912 # handle glitch
913 if fixed_specs[fixed_index].similar_score(
914 synthesized[synthesized_index]) != 0:
915 assert synthesized[synthesized_index].is_subset(
916 fixed_specs[fixed_index])
917 skipped = set(fixed_specs[fixed_index].entries) - set(
918 synthesized[synthesized_index].entries)
919 if skipped:
920 logger.warning(
921 'between %s and %s, '
922 'bisect-kit cannot analyze commit history of following paths:',
923 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
924 for path in sorted(skipped):
925 logger.warning(' %s', path)
926
927 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
928 synthesized[synthesized_index])
929 if make_up:
930 this_action_groups.append(make_up)
931
932 this_action_groups.extend(
933 action_groups[synthesized_index:next_synthesized_index])
934 for idx, ag in enumerate(this_action_groups, 1):
935 rev = make_intra_rev(fixed_specs[fixed_index].name,
936 fixed_specs[next_fixed_index].name, idx)
937 ag.name = rev
938 revlist.append(rev)
939
940 self.save_action_groups_between_releases(
941 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
942 this_action_groups)
943 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
944
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800945 return fixed_specs, float_specs, revlist
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800946
947 def save_action_groups_between_releases(self, old, new, action_groups):
948 data = [ag.serialize() for ag in action_groups]
949
950 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
951 if not os.path.exists(cache_dir):
952 os.makedirs(cache_dir)
953 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800954 with open(cache_filename, 'w') as fp:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800955 json.dump(data, fp, indent=4, sort_keys=True)
956
957 def load_action_groups_between_releases(self, old, new):
958 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
959 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
960 if not os.path.exists(cache_filename):
Kuang-che Wuce2f3be2019-10-28 19:44:54 +0800961 raise errors.InternalError(
962 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800963
964 result = []
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800965 for data in json.load(open(cache_filename)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800966 result.append(ActionGroup.unserialize(data))
967
968 return result
969
Kuang-che Wue80bb872018-11-15 19:45:25 +0800970 def get_rev_detail(self, rev):
971 rev_old, rev_new, index = parse_intra_rev(rev)
972 if rev_old == rev_new:
973 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800974
Kuang-che Wue80bb872018-11-15 19:45:25 +0800975 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
976 # Indexes inside intra_rev are 1 based.
977 action_group = action_groups[index - 1]
978 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800979
980 def switch(self, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800981 rev_old, action_groups = self.get_intra_and_diff(rev)
982 self.spec_manager.sync_disk_state(rev_old)
983 apply_actions(self.code_storage, action_groups, self.root_dir)
984
985 def get_intra_and_diff(self, rev):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800986 # easy case
987 if not re.match(_re_intra_rev, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800988 return rev, []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800989
990 rev_old, rev_new, idx = parse_intra_rev(rev)
991 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
992 assert 0 <= idx <= len(action_groups)
993 action_groups = action_groups[:idx]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800994 return rev_old, action_groups