blob: 900a5a011fda952f5be0921bde8f3209ed1312e3 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800163 revision: a commit id of manifest-internal indicates the manifest revision,
164 this argument is not used in DEPS.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800165 """
166
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800167 def __init__(self,
168 spec_type,
169 name,
170 timestamp,
171 path,
172 entries=None,
173 revision=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800174 self.spec_type = spec_type
175 self.name = name
176 self.timestamp = timestamp
177 self.path = path
178 self.entries = entries
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800179 self.revision = revision
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800180
181 def copy(self):
182 return copy.deepcopy(self)
183
184 def similar_score(self, rhs):
185 """Calculates similar score to another Spec.
186
187 Returns:
188 score of similarity. Smaller value is more similar.
189 """
190 score = 0
191 for path in set(self.entries) & set(rhs.entries):
192 if rhs[path] == self[path]:
193 continue
194 if rhs[path].at == self[path].at:
195 # it's often that remote repo moved around but should be treated as the
196 # same one
197 score += 0.1
198 else:
199 score += 1
200 score += len(set(self.entries) ^ set(rhs.entries))
201 return score
202
203 def is_static(self):
204 return all(path_spec.is_static() for path_spec in self.entries.values())
205
206 def is_subset(self, rhs):
207 return set(self.entries.keys()) <= set(rhs.entries.keys())
208
209 def __getitem__(self, path):
210 return self.entries[path]
211
212 def __contains__(self, path):
213 return path in self.entries
214
215 def apply(self, action_group):
216 self.timestamp = action_group.timestamp
217 self.name = '(%s)' % self.timestamp
218 for action in action_group.actions:
219 if isinstance(action, GitAddRepo):
220 self.entries[action.path] = PathSpec(action.path, action.repo_url,
221 action.rev)
222 elif isinstance(action, GitCheckoutCommit):
223 self.entries[action.path].at = action.rev
224 elif isinstance(action, GitRemoveRepo):
225 del self.entries[action.path]
226 else:
227 assert 0, 'unknown action: %s' % action.__class__.__name__
228
229 def dump(self):
230 # for debugging
231 print(self.name, self.path, self.timestamp)
232 print('size', len(self.entries))
233 for path, path_spec in sorted(self.entries.items()):
234 print(path, path_spec.at)
235
236 def diff(self, rhs):
237 logger.info('diff between %s and %s', self.name, rhs.name)
238 expect = set(self.entries)
239 actual = set(rhs.entries)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800240 common_count = 0
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800241 for path in sorted(expect - actual):
242 logger.info('-%s', path)
243 for path in sorted(actual - expect):
244 logger.info('+%s', path)
245 for path in sorted(expect & actual):
246 if self[path] == rhs[path]:
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800247 common_count += 1
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800248 continue
249 if self[path].at != rhs[path].at:
250 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
251 if self[path].repo_url != rhs[path].repo_url:
252 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
253 rhs[path].repo_url)
Kuang-che Wu4997bfd2019-03-18 13:09:26 +0800254 logger.info('and common=%s', common_count)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800255
256
257class Action(object):
258 """Actions describe changes from one Spec to another.
259
260 Attributes:
261 timestamp: action time
262 path: action path, which is relative to project root
263 """
264
265 def __init__(self, timestamp, path):
266 self.timestamp = timestamp
267 self.path = path
268
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800269 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800270 raise NotImplementedError
271
272 def summary(self, _code_storage):
273 raise NotImplementedError
274
275 def __eq__(self, rhs):
276 return self.__dict__ == rhs.__dict__
277
278 def serialize(self):
279 return self.__class__.__name__, self.__dict__
280
281
282def unserialize_action(data):
283 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
284 class_name, values = data
285 assert class_name in [cls.__name__ for cls in classes
286 ], 'unknown action class: %s' % class_name
287 for cls in classes:
288 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800289 action = cls(**values)
290 break
291 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800292
293
294class ActionGroup(object):
295 """Atomic group of Action objects
296
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800297 This models atomic actions, ex:
298 - repo added/removed in the same manifest commit
299 - commits appears at the same time due to repo add
300 - gerrit topic
301 - circular CQ-DEPEND (Cq-Depend)
302 Otherwise, one ActionGroup usually consists only one Action object.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800303 """
304
305 def __init__(self, timestamp, comment=None):
306 self.timestamp = timestamp
307 self.name = None
308 self.actions = []
309 self.comment = comment
310
311 def add(self, action):
312 self.actions.append(action)
313
314 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800315 return dict(
316 timestamp=self.timestamp,
317 name=self.name,
318 comment=self.comment,
319 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800320
321 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800322 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800324 result['comment'] = self.comment
325 result['actions'] = [
326 action.summary(code_storage) for action in self.actions
327 ]
328 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800329
330 @staticmethod
331 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800332 ag = ActionGroup(data['timestamp'])
333 ag.name = data['name']
334 ag.comment = data['comment']
335 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800336 ag.add(unserialize_action(x))
337 return ag
338
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800339 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800340 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800341 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800342
343
344class GitCheckoutCommit(Action):
345 """Describes a git commit action.
346
347 Attributes:
348 repo_url: the corresponding url of git repo
349 rev: git commit to checkout
350 """
351
352 def __init__(self, timestamp, path, repo_url, rev):
353 super(GitCheckoutCommit, self).__init__(timestamp, path)
354 self.repo_url = repo_url
355 self.rev = rev
356
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800357 def apply(self, code_storage, root_dir):
358 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800359 git_repo = os.path.join(root_dir, self.path)
360 assert git_util.is_git_root(git_repo)
361 git_util.checkout_version(git_repo, self.rev)
362
363 def summary(self, code_storage):
364 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800365 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800366 commit_summary = git_util.get_commit_log(git_root,
367 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800368 except subprocess.CalledProcessError:
369 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
370 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800371 commit_summary = '(unknown)'
372 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
373 return dict(
374 timestamp=self.timestamp,
375 action_type='commit',
376 path=self.path,
377 commit_summary=commit_summary,
378 repo_url=self.repo_url,
379 rev=self.rev,
380 text=text,
381 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800382
383
384class GitAddRepo(Action):
385 """Describes a git repo add action.
386
387 Attributes:
388 repo_url: the corresponding url of git repo to add
389 rev: git commit to checkout
390 """
391
392 def __init__(self, timestamp, path, repo_url, rev):
393 super(GitAddRepo, self).__init__(timestamp, path)
394 self.repo_url = repo_url
395 self.rev = rev
396
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800397 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800398 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wudf11c8a2019-03-18 13:21:24 +0800399 if os.path.exists(git_repo):
400 if os.path.isdir(git_repo) and not os.listdir(git_repo):
401 # mimic gclient's behavior; don't panic
402 logger.warning(
403 'adding repo %s; there is already an empty directory; '
404 'assume it is okay', git_repo)
405 else:
406 assert not os.path.exists(git_repo), '%s already exists' % git_repo
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800407
408 reference = code_storage.cached_git_root(self.repo_url)
409 git_util.clone(git_repo, self.repo_url, reference=reference)
410 git_util.checkout_version(git_repo, self.rev)
411
412 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800413
414 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800415 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
416 return dict(
417 timestamp=self.timestamp,
418 action_type='add_repo',
419 path=self.path,
Kuang-che Wu356ecb92019-04-02 16:30:25 +0800420 repo_url=self.repo_url,
421 rev=self.rev,
Kuang-che Wue80bb872018-11-15 19:45:25 +0800422 text=text,
423 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800424
425
426class GitRemoveRepo(Action):
427 """Describes a git repo remove action."""
428
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800429 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800430 assert self.path
431 git_repo = os.path.join(root_dir, self.path)
432 assert git_util.is_git_root(git_repo)
Kuang-che Wu067ff292019-02-14 18:16:23 +0800433 # TODO(kcwu): other projects may be sub-tree of `git_repo`.
434 # They should not be deleted. (crbug/930047)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800435 shutil.rmtree(git_repo)
436
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800437 code_storage.remove_from_project_list(root_dir, self.path)
438
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800439 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800440 return dict(
441 timestamp=self.timestamp,
442 action_type='remove_repo',
443 path=self.path,
444 text='remove repo %s' % self.path,
445 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800446
447
448def apply_actions(code_storage, action_groups, root_dir):
449 # Speed optimization: only apply the last one of consecutive commits per
450 # repo. It is possible to optimize further, but need to take care git repo
451 # add/remove within another repo.
452 commits = {}
453
454 def batch_apply(commits):
Kuang-che Wu261174e2020-01-09 17:51:31 +0800455 for i, _, commit_action in sorted(commits.values(), key=lambda x: x[:2]):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800456 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800457 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800458
459 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800460 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800461 if not isinstance(action, GitCheckoutCommit):
462 break
463 else:
464 # If all actions are commits, defer them for batch processing.
Kuang-che Wu261174e2020-01-09 17:51:31 +0800465 for j, action in enumerate(action_group.actions):
466 commits[action.path] = (i, j, action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800467 continue
468
469 batch_apply(commits)
470 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800471 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800472
473 batch_apply(commits)
474
475
476class SpecManager(object):
477 """Spec related abstract operations.
478
479 This class enumerates Spec instances and switch disk state to Spec.
480
481 In other words, this class abstracts:
482 - discovery of gclient's DEPS and repo's manifest
483 - gclient sync and repo sync
484 """
485
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800486 def collect_float_spec(self, old, new, fixed_specs=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800487 """Collects float Spec between two versions.
488
489 This method may fetch spec from network. However, it should not switch tree
490 version state.
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800491
492 Args:
493 old: old version
494 new: new version
495 fixed_specs: fixed specs from collect_fixed_spec(old, new) for Chrome OS
496 or None for others
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800497 """
498 raise NotImplementedError
499
500 def collect_fixed_spec(self, old, new):
501 """Collects fixed Spec between two versions.
502
503 This method may fetch spec from network. However, it should not switch tree
504 version state.
505 """
506 raise NotImplementedError
507
508 def parse_spec(self, spec):
509 """Parses information for Spec object.
510
511 Args:
512 spec: Spec object. It specifies what to parse and the parsed information
513 is stored inside.
514 """
515 raise NotImplementedError
516
517 def sync_disk_state(self, rev):
518 """Switch source tree state to given version."""
519 raise NotImplementedError
520
521
522class CodeStorage(object):
523 """Query code history and commit relationship without checkout.
524
525 Because paths inside source tree may be deleted or map to different remote
526 repo in different versions, we cannot query git information of one version
527 but the tree state is at another version. In order to query information
528 without changing tree state and fast, we need out of tree source code
529 storage.
530
531 This class assumes all git repos are mirrored somewhere on local disk.
532 Subclasses just need to implement cached_git_root() which returns the
533 location.
534
535 In other words, this class abstracts operations upon gclient's cache-dir
536 repo's mirror.
537 """
538
539 def cached_git_root(self, repo_url):
540 """The cached path of given remote git repo.
541
542 Args:
543 repo_url: URL of git remote repo
544
545 Returns:
546 path of cache folder
547 """
548 raise NotImplementedError
549
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800550 def add_to_project_list(self, project_root, path, repo_url):
551 raise NotImplementedError
552
553 def remove_from_project_list(self, project_root, path):
554 raise NotImplementedError
555
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800556 def is_ancestor_commit(self, spec, path, old, new):
557 """Determine one commit is ancestor of another.
558
559 Args:
560 spec: Spec object
561 path: local path relative to project root
562 old: commit id
563 new: commit id
564
565 Returns:
566 True if `old` is ancestor of `new`
567 """
568 git_root = self.cached_git_root(spec[path].repo_url)
569 return git_util.is_ancestor_commit(git_root, old, new)
570
571 def get_rev_by_time(self, spec, path, timestamp):
572 """Get commit hash of given spec by time.
573
574 Args:
575 spec: Spec object
576 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800577 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800578
579 Returns:
580 The commit hash of given time. If there are commits with the given
581 timestamp, returns the last commit.
582 """
583 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800584 # spec[path].at is remote reference name. Since git_root is a mirror (not
585 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800586 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800587
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800588 def get_actions_between_two_commit(self,
589 spec,
590 path,
591 old,
592 new,
593 ignore_not_ancestor=False):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800594 git_root = self.cached_git_root(spec[path].repo_url)
595 result = []
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800596 # not in the same branch, regard as an atomic operation
597 # this situation happens when
598 # 1. new is branched from old and
599 # 2. commit timestamp is not reliable(i.e. commit time != merged time)
600 # old and new might not have ancestor relation
601 if ignore_not_ancestor and old != new and not git_util.is_ancestor_commit(
602 git_root, old, new):
603 timestamp = git_util.get_commit_time(git_root, new)
604 result.append(
605 GitCheckoutCommit(timestamp, path, spec[path].repo_url, new))
606 return result
607
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800608 for timestamp, git_rev in git_util.list_commits_between_commits(
609 git_root, old, new):
610 result.append(
611 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
612 return result
613
614 def is_containing_commit(self, spec, path, rev):
615 git_root = self.cached_git_root(spec[path].repo_url)
616 return git_util.is_containing_commit(git_root, rev)
617
618 def are_spec_commits_available(self, spec):
619 for path, path_spec in spec.entries.items():
620 if not path_spec.is_static():
621 continue
622 if not self.is_containing_commit(spec, path, path_spec.at):
623 return False
624 return True
625
626
627class CodeManager(object):
628 """Class to reconstruct historical source tree state.
629
630 This class can reconstruct all moments of source tree state and diffs between
631 them.
632
633 Attributes:
634 root_dir: root path of project source tree
635 spec_manager: SpecManager object
636 code_storage: CodeStorage object
637 """
638
639 def __init__(self, root_dir, spec_manager, code_storage):
640 self.root_dir = root_dir
641 self.spec_manager = spec_manager
642 self.code_storage = code_storage
643
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800644 def generate_action_groups_between_specs(self, prev_float, next_float):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800645 """Generates actions between two float specs.
646
647 Args:
648 prev_float: start of spec object (exclusive)
649 next_float: end of spec object (inclusive)
650
651 Returns:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800652 list of ActionGroup object (ordered)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800653 """
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800654 groups = []
655 last_group = ActionGroup(next_float.timestamp)
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800656 is_removed = set()
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800657
658 # `branch_between_float_specs` is currently a chromeos-only logic,
659 # and branch behavior is not verified for android and chrome now.
660 is_chromeos_branched = False
661 if hasattr(self.spec_manager, 'branch_between_float_specs'
662 ) and self.spec_manager.branch_between_float_specs(
663 prev_float, next_float):
664 is_chromeos_branched = True
665
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800666 # Sort alphabetically, so parent directories are handled before children
667 # directories.
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800668 for path in sorted(set(prev_float.entries) | set(next_float.entries)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800669
670 # Add repo
671 if path not in prev_float:
672 if next_float[path].is_static():
673 next_at = next_float[path].at
674 else:
675 next_at = self.code_storage.get_rev_by_time(next_float, path,
676 next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800677 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800678 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
679 next_at))
680 continue
681
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800682 # Existing path is floating.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800683 if not prev_float[path].is_static():
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800684 # Enumerates commits until next spec. Get `prev_at` and `till_at`
685 # by prev_float and next_float's timestamp.
686 #
687 # 1. Non-branched case:
688 #
689 # prev_at till_at
690 # prev branch ---> o --------> o --------> o --------> o --------> ...
691 # ^ ^
692 # prev_float.timestamp next_float.timestamp
693 #
694 # building an image between prev_at and till_at should follow
695 # prev_float's spec.
696 #
697 # 2. Branched case:
698 #
699 # till_at
700 # /------->o---------->
701 # / ^ next_float.timestamp
702 # / prev_at
703 # ---------->o---------------------->
704 # ^prev_float.timestamp
705 #
706 # building an image between prev_at and till_at should follow
707 # next_float's spec.
708 #
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800709 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
710 prev_float.timestamp)
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800711 if is_chromeos_branched:
712 till_at = self.code_storage.get_rev_by_time(next_float, path,
713 next_float.timestamp)
714 else:
715 till_at = self.code_storage.get_rev_by_time(prev_float, path,
716 next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800717 actions = self.code_storage.get_actions_between_two_commit(
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800718 prev_float, path, prev_at, till_at, ignore_not_ancestor=True)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800719
720 # Assume commits with the same timestamp as manifest/DEPS change are
721 # atomic.
722 if actions and actions[-1].timestamp == next_float.timestamp:
723 last_group.add(actions.pop())
724
725 for action in actions:
726 group = ActionGroup(action.timestamp)
727 group.add(action)
728 groups.append(group)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800729 else:
730 prev_at = till_at = prev_float[path].at
731
732 # At next_float.timestamp.
733 if path not in next_float:
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800734 if path in is_removed:
735 continue
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800736 # remove repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800737 next_at = None
Kuang-che Wucbe12432019-03-18 19:35:03 +0800738 sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')]
Kuang-che Wucbe12432019-03-18 19:35:03 +0800739 # Remove deeper repo first
740 for path2 in sorted(sub_repos, reverse=True):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800741 last_group.add(GitRemoveRepo(next_float.timestamp, path2))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800742 is_removed.add(path2)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800743 last_group.add(GitRemoveRepo(next_float.timestamp, path))
Zheng-Jie Changeb5aaf32020-01-10 16:36:58 +0800744 is_removed.add(path)
Kuang-che Wucbe12432019-03-18 19:35:03 +0800745 for path2 in sorted(set(sub_repos) & set(next_float.entries)):
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800746 last_group.add(
Kuang-che Wucbe12432019-03-18 19:35:03 +0800747 GitAddRepo(next_float.timestamp, path2,
748 next_float[path2].repo_url, prev_float[path2].at))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800749
750 elif next_float[path].is_static():
751 # pinned to certain commit on different branch
752 next_at = next_float[path].at
753
754 elif next_float[path].at == prev_float[path].at:
755 # keep floating on the same branch
756 next_at = till_at
757
758 else:
759 # switch to another branch
760 # prev_at till_at
761 # prev branch ---> o --------> o --------> o --------> o --------> ...
762 #
763 # next_at
764 # next branch ...... o ------> o --------> o -----> ...
765 # ^ ^
766 # prev_float.timestamp next_float.timestamp
767 next_at = self.code_storage.get_rev_by_time(next_float, path,
768 next_float.timestamp)
769
770 if next_at and next_at != till_at:
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800771 last_group.add(
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800772 GitCheckoutCommit(next_float.timestamp, path,
773 next_float[path].repo_url, next_at))
774
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800775 groups.sort(key=lambda x: x.timestamp)
776 if last_group.actions:
777 groups.append(last_group)
778 return groups
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800779
780 def synthesize_fixed_spec(self, float_spec, timestamp):
781 """Synthesizes fixed spec from float spec of given time.
782
783 Args:
784 float_spec: the float spec
785 timestamp: snapshot time
786
787 Returns:
788 Spec object
789 """
790 result = {}
791 for path, path_spec in float_spec.entries.items():
792 if not path_spec.is_static():
793 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
794 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
795
796 result[path] = copy.deepcopy(path_spec)
797
798 name = '%s@%s' % (float_spec.path, timestamp)
799 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
800
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800801 def match_spec(self, target, specs, start_index=0):
802 threshold = 3600
803 # ideal_index is the index of last spec before target
804 # begin and end are the range of indexes within threshold (inclusive)
805 ideal_index = None
806 begin, end = None, None
807 for i, spec in enumerate(specs[start_index:], start_index):
808 if spec.timestamp <= target.timestamp:
809 ideal_index = i
810 if abs(spec.timestamp - target.timestamp) < threshold:
811 if begin is None:
812 begin = i
813 end = i
814
815 candidates = []
816 if ideal_index is not None:
817 candidates.append(ideal_index)
818 if begin is not None:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800819 candidates.extend(list(range(begin, end + 1)))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800820 if not candidates:
821 logger.error('unable to match %s: all specs are after it', target.name)
822 return None
823
824 compatible_candidates = [
825 i for i in candidates if specs[i].is_subset(target)
826 ]
827 if not compatible_candidates:
828 logger.error('unable to match %s: no compatible specs', target.name)
829 spec = specs[candidates[0]]
830 target.diff(spec)
831 return None
832
833 scores = []
834 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800835 # Tie-break: prefer earlier timestamp and smaller difference.
836 if specs[i].timestamp <= target.timestamp:
837 timediff = 0, target.timestamp - specs[i].timestamp
838 else:
839 timediff = 1, specs[i].timestamp - target.timestamp
840 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800841 scores.sort()
842
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800843 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800844 if score != 0:
845 logger.warning('not exactly match (score=%s): %s', score, target.name)
846 target.diff(specs[index])
847
848 if index < ideal_index:
849 logger.warning(
850 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
851 target.name, target.timestamp, specs[index].timestamp,
852 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800853 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800854 if index > ideal_index:
855 logger.warning(
856 'spec committed at %d matched later commit at %d. bad server clock?',
857 target.timestamp, specs[index].timestamp)
858
859 return index
860
861 def associate_fixed_and_synthesized_specs(self, fixed_specs,
862 synthesized_specs):
863 # All fixed specs are snapshot of float specs. Theoretically, they
864 # should be identical to one of the synthesized specs.
865 # However, it's not always true for some reasons --- maybe due to race
866 # condition, maybe due to bugs of this bisect-kit.
867 # To overcome this glitch, we try to match them by similarity instead of
868 # exact match.
869 result = []
870 last_index = 0
871 for i, fixed_spec in enumerate(fixed_specs):
872 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
873 if matched_index is None:
874 if i in (0, len(fixed_specs) - 1):
875 logger.error('essential spec mismatch, unable to continue')
Kuang-che Wufe1e88a2019-09-10 21:52:25 +0800876 raise ValueError('Commit history analyze failed. '
877 'Bisector cannot deal with this version range.')
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800878 else:
879 logger.warning('%s do not match, skip', fixed_spec.name)
880 continue
881 result.append((i, matched_index))
882 last_index = matched_index
883
884 return result
885
886 def _create_make_up_actions(self, fixed_spec, synthesized):
887 timestamp = synthesized.timestamp
888 make_up = ActionGroup(
889 timestamp, comment='make up glitch for %s' % fixed_spec.name)
890 for path in set(fixed_spec.entries) & set(synthesized.entries):
891 if fixed_spec[path].at == synthesized[path].at:
892 continue
893 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
894 synthesized[path].at)
895 make_up.add(action)
896
897 if not make_up.actions:
898 return None
899 return make_up
900
901 def build_revlist(self, old, new):
902 """Build revlist.
903
904 Returns:
905 list of rev string
906 """
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800907 _, _, revlist = self.get_specs_and_revlist(old, new)
908 return revlist
909
910 def get_specs_and_revlist(self, old, new):
911 """Build revlist.
912
913 Returns:
914 (parsed fixed_specs, parsed float_specs, list of rev string)
915 """
916 logger.info('get_specs_and_revlist: old = %s, new = %s', old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800917 revlist = []
918
919 # step 1, find all float and fixed specs in the given range.
920 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800921 assert fixed_specs
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800922 for spec in fixed_specs:
923 self.spec_manager.parse_spec(spec)
924
925 float_specs = self.spec_manager.collect_float_spec(old, new, fixed_specs)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800926 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800927 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
928 float_specs.pop()
929 assert float_specs
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800930 for spec in float_specs:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800931 self.spec_manager.parse_spec(spec)
932
933 # step 2, synthesize all fixed specs in the range from float specs.
934 specs = float_specs + [fixed_specs[-1]]
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800935 action_groups = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800936 logger.debug('len(specs)=%d', len(specs))
937 for i in range(len(specs) - 1):
938 prev_float = specs[i]
939 next_float = specs[i + 1]
940 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
941 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wuae6847c2020-01-13 16:06:08 +0800942 action_groups += self.generate_action_groups_between_specs(
943 prev_float, next_float)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800944
945 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
946 synthesized = [spec.copy()]
947 for action_group in action_groups:
948 spec.apply(action_group)
949 synthesized.append(spec.copy())
950
951 # step 3, associate fixed specs with synthesized specs.
952 associated_pairs = self.associate_fixed_and_synthesized_specs(
953 fixed_specs, synthesized)
954
955 # step 4, group actions and cache them
956 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
957 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
958 revlist.append(fixed_specs[fixed_index].name)
959 this_action_groups = []
960
961 # handle glitch
962 if fixed_specs[fixed_index].similar_score(
963 synthesized[synthesized_index]) != 0:
964 assert synthesized[synthesized_index].is_subset(
965 fixed_specs[fixed_index])
966 skipped = set(fixed_specs[fixed_index].entries) - set(
967 synthesized[synthesized_index].entries)
968 if skipped:
969 logger.warning(
970 'between %s and %s, '
971 'bisect-kit cannot analyze commit history of following paths:',
972 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
973 for path in sorted(skipped):
974 logger.warning(' %s', path)
975
976 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
977 synthesized[synthesized_index])
978 if make_up:
979 this_action_groups.append(make_up)
980
981 this_action_groups.extend(
982 action_groups[synthesized_index:next_synthesized_index])
983 for idx, ag in enumerate(this_action_groups, 1):
984 rev = make_intra_rev(fixed_specs[fixed_index].name,
985 fixed_specs[next_fixed_index].name, idx)
986 ag.name = rev
987 revlist.append(rev)
988
989 self.save_action_groups_between_releases(
990 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
991 this_action_groups)
992 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
993
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800994 return fixed_specs, float_specs, revlist
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800995
996 def save_action_groups_between_releases(self, old, new, action_groups):
997 data = [ag.serialize() for ag in action_groups]
998
999 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
1000 if not os.path.exists(cache_dir):
1001 os.makedirs(cache_dir)
1002 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
Kuang-che Wuae6824b2019-08-27 22:20:01 +08001003 with open(cache_filename, 'w') as fp:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001004 json.dump(data, fp, indent=4, sort_keys=True)
1005
1006 def load_action_groups_between_releases(self, old, new):
1007 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
1008 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
1009 if not os.path.exists(cache_filename):
Kuang-che Wuce2f3be2019-10-28 19:44:54 +08001010 raise errors.InternalError(
1011 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001012
1013 result = []
Kuang-che Wuae6824b2019-08-27 22:20:01 +08001014 for data in json.load(open(cache_filename)):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001015 result.append(ActionGroup.unserialize(data))
1016
1017 return result
1018
Kuang-che Wue80bb872018-11-15 19:45:25 +08001019 def get_rev_detail(self, rev):
1020 rev_old, rev_new, index = parse_intra_rev(rev)
1021 if rev_old == rev_new:
1022 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001023
Kuang-che Wue80bb872018-11-15 19:45:25 +08001024 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
1025 # Indexes inside intra_rev are 1 based.
1026 action_group = action_groups[index - 1]
1027 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001028
1029 def switch(self, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +08001030 rev_old, action_groups = self.get_intra_and_diff(rev)
1031 self.spec_manager.sync_disk_state(rev_old)
1032 apply_actions(self.code_storage, action_groups, self.root_dir)
1033
1034 def get_intra_and_diff(self, rev):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001035 # easy case
1036 if not re.match(_re_intra_rev, rev):
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +08001037 return rev, []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08001038
1039 rev_old, rev_new, idx = parse_intra_rev(rev)
1040 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
1041 assert 0 <= idx <= len(action_groups)
1042 action_groups = action_groups[:idx]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +08001043 return rev_old, action_groups