blob: 96db56626720be2703535b16c747dd7400dfa2d5 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Model of source code organization and changes.
6
7This module modeled complex source code organization, i.e. nested git repos,
8and their version relationship, i.e. pinned or floating git repo. In other
9words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's
10repo manifest.
11"""
12
13from __future__ import print_function
14import copy
15import json
16import logging
17import os
18import re
19import shutil
Kuang-che Wube5fa2a2018-11-12 17:17:35 +080020import subprocess
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080021
22from bisect_kit import cli
Kuang-che Wue121fae2018-11-09 16:18:39 +080023from bisect_kit import errors
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080024from bisect_kit import git_util
25
26logger = logging.getLogger(__name__)
27
28_re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$'
29
30SPEC_FIXED = 'fixed'
31SPEC_FLOAT = 'float'
32_DIFF_CACHE_DIR = 'bisectkit-cache'
33
34
35def make_intra_rev(a, b, index):
36 """Makes intra-rev version string.
37
38 Between two major "named" versions a and b, there are many small changes
39 (commits) in-between. bisect-kit will identify all those instances and bisect
40 them. We give names to those instances and call these names as "intra-rev"
41 which stands for minor version numbers within two major version.
42
43 Note, a+index (without b) is not enough to identify an unique change due to
44 branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived
45 from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states
46 in 9900.1.0 and 9901.0.0.
47
48 Args:
49 a: the start version
50 b: the end version
51 index: the index number of changes between a and b
52
53 Returns:
54 the intra-rev version string
55 """
56 return '%s~%s/%d' % (a, b, index)
57
58
59def parse_intra_rev(rev):
60 """Decomposes intra-rev string.
61
62 See comments of make_intra_rev for what is intra-rev.
63
64 Args:
65 rev: intra-rev string or normal version number
66
67 Returns:
68 (start, end, index). If rev is not intra-rev, it must be normal version
69 number and returns (rev, rev, 0).
70 """
71 m = re.match(_re_intra_rev, rev)
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080072 if not m:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080073 return rev, rev, 0
74
Kuang-che Wu89ac2e72018-07-25 17:39:07 +080075 return m.group(1), m.group(2), int(m.group(3))
76
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080077
78def argtype_intra_rev(argtype):
79 """Validates argument is intra-rev.
80
81 Args:
82 argtype: argtype function which validates major version number
83
84 Returns:
85 A new argtype function which matches intra-rev
86 """
87
88 def argtype_function(s):
Kuang-che Wucab92452019-01-19 18:24:29 +080089 examples = []
90 try:
91 return argtype(s)
92 except cli.ArgTypeError as e:
93 examples += e.example
94
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080095 m = re.match(_re_intra_rev, s)
96 if m:
97 try:
98 argtype(m.group(1))
99 argtype(m.group(2))
100 return s
101 except cli.ArgTypeError as e:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800102 for example in e.example:
103 examples.append(make_intra_rev(example, example, 10))
104 raise cli.ArgTypeError('Invalid intra rev', examples)
Kuang-che Wucab92452019-01-19 18:24:29 +0800105
106 examples.append(make_intra_rev('<rev1>', '<rev2>', 10))
107 raise cli.ArgTypeError('Invalid rev', examples)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800108
109 return argtype_function
110
111
112def _normalize_repo_url(repo_url):
113 repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/',
114 r'https://chrome-internal.googlesource.com/', repo_url)
115 repo_url = re.sub(r'\.git$', '', repo_url)
116 return repo_url
117
118
119class PathSpec(object):
120 """Specified code version of one path.
121
122 Attributes:
123 path: local path, relative to project base dir
124 repo_url: code repository location
125 at: code version, could be git hash or branch name
126 """
127
128 def __init__(self, path, repo_url, at):
129 self.path = path
130 self.repo_url = repo_url
131 self.at = at
132
133 def is_static(self):
134 return git_util.is_git_rev(self.at)
135
136 def __eq__(self, rhs):
137 if self.path != rhs.path:
138 return False
139 if self.at != rhs.at:
140 return False
141 if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url):
142 return False
143 return True
144
145 def __ne__(self, rhs):
146 return not self == rhs
147
148
149class Spec(object):
150 """Collection of PathSpec.
151
152 Spec is analogy to gclient's DEPS and repo's manifest.
153
154 Attributes:
155 spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code
156 version is pinned and fixed. On the other hand, SPEC_FLOAT is not
157 pinned and the actual version (git commit) may change over time.
158 name: name of this spec, for debugging purpose. usually version number
159 or git hash
160 timestamp: timestamp of this spec
161 path: path of spec
162 entries: paths to PathSpec dict
163 """
164
165 def __init__(self, spec_type, name, timestamp, path, entries=None):
166 self.spec_type = spec_type
167 self.name = name
168 self.timestamp = timestamp
169 self.path = path
170 self.entries = entries
171
172 def copy(self):
173 return copy.deepcopy(self)
174
175 def similar_score(self, rhs):
176 """Calculates similar score to another Spec.
177
178 Returns:
179 score of similarity. Smaller value is more similar.
180 """
181 score = 0
182 for path in set(self.entries) & set(rhs.entries):
183 if rhs[path] == self[path]:
184 continue
185 if rhs[path].at == self[path].at:
186 # it's often that remote repo moved around but should be treated as the
187 # same one
188 score += 0.1
189 else:
190 score += 1
191 score += len(set(self.entries) ^ set(rhs.entries))
192 return score
193
194 def is_static(self):
195 return all(path_spec.is_static() for path_spec in self.entries.values())
196
197 def is_subset(self, rhs):
198 return set(self.entries.keys()) <= set(rhs.entries.keys())
199
200 def __getitem__(self, path):
201 return self.entries[path]
202
203 def __contains__(self, path):
204 return path in self.entries
205
206 def apply(self, action_group):
207 self.timestamp = action_group.timestamp
208 self.name = '(%s)' % self.timestamp
209 for action in action_group.actions:
210 if isinstance(action, GitAddRepo):
211 self.entries[action.path] = PathSpec(action.path, action.repo_url,
212 action.rev)
213 elif isinstance(action, GitCheckoutCommit):
214 self.entries[action.path].at = action.rev
215 elif isinstance(action, GitRemoveRepo):
216 del self.entries[action.path]
217 else:
218 assert 0, 'unknown action: %s' % action.__class__.__name__
219
220 def dump(self):
221 # for debugging
222 print(self.name, self.path, self.timestamp)
223 print('size', len(self.entries))
224 for path, path_spec in sorted(self.entries.items()):
225 print(path, path_spec.at)
226
227 def diff(self, rhs):
228 logger.info('diff between %s and %s', self.name, rhs.name)
229 expect = set(self.entries)
230 actual = set(rhs.entries)
231 common = 0
232 for path in sorted(expect - actual):
233 logger.info('-%s', path)
234 for path in sorted(actual - expect):
235 logger.info('+%s', path)
236 for path in sorted(expect & actual):
237 if self[path] == rhs[path]:
238 common += 1
239 continue
240 if self[path].at != rhs[path].at:
241 logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at)
242 if self[path].repo_url != rhs[path].repo_url:
243 logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url,
244 rhs[path].repo_url)
245 logger.info('and common=%s', common)
246
247
248class Action(object):
249 """Actions describe changes from one Spec to another.
250
251 Attributes:
252 timestamp: action time
253 path: action path, which is relative to project root
254 """
255
256 def __init__(self, timestamp, path):
257 self.timestamp = timestamp
258 self.path = path
259
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800260 def apply(self, _code_storage, _root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800261 raise NotImplementedError
262
263 def summary(self, _code_storage):
264 raise NotImplementedError
265
266 def __eq__(self, rhs):
267 return self.__dict__ == rhs.__dict__
268
269 def serialize(self):
270 return self.__class__.__name__, self.__dict__
271
272
273def unserialize_action(data):
274 classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo]
275 class_name, values = data
276 assert class_name in [cls.__name__ for cls in classes
277 ], 'unknown action class: %s' % class_name
278 for cls in classes:
279 if class_name == cls.__name__:
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800280 action = cls(**values)
281 break
282 return action
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800283
284
285class ActionGroup(object):
286 """Atomic group of Action objects
287
288 This models atomic commits (for example, gerrit topic, or circular
289 CQ-DEPEND). Otherwise, one ActionGroup usually consists only one Action
290 object.
291 """
292
293 def __init__(self, timestamp, comment=None):
294 self.timestamp = timestamp
295 self.name = None
296 self.actions = []
297 self.comment = comment
298
299 def add(self, action):
300 self.actions.append(action)
301
302 def serialize(self):
Kuang-che Wu22455262018-08-03 15:38:29 +0800303 return dict(
304 timestamp=self.timestamp,
305 name=self.name,
306 comment=self.comment,
307 actions=[a.serialize() for a in self.actions])
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800308
309 def summary(self, code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800310 result = {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800311 if self.comment:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800312 result['comment'] = self.comment
313 result['actions'] = [
314 action.summary(code_storage) for action in self.actions
315 ]
316 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800317
318 @staticmethod
319 def unserialize(data):
Kuang-che Wu22455262018-08-03 15:38:29 +0800320 ag = ActionGroup(data['timestamp'])
321 ag.name = data['name']
322 ag.comment = data['comment']
323 for x in data['actions']:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800324 ag.add(unserialize_action(x))
325 return ag
326
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800327 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800328 for action in self.actions:
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800329 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800330
331
332class GitCheckoutCommit(Action):
333 """Describes a git commit action.
334
335 Attributes:
336 repo_url: the corresponding url of git repo
337 rev: git commit to checkout
338 """
339
340 def __init__(self, timestamp, path, repo_url, rev):
341 super(GitCheckoutCommit, self).__init__(timestamp, path)
342 self.repo_url = repo_url
343 self.rev = rev
344
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800345 def apply(self, code_storage, root_dir):
346 del code_storage # unused
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800347 git_repo = os.path.join(root_dir, self.path)
348 assert git_util.is_git_root(git_repo)
349 git_util.checkout_version(git_repo, self.rev)
350
351 def summary(self, code_storage):
352 git_root = code_storage.cached_git_root(self.repo_url)
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800353 try:
Kuang-che Wue80bb872018-11-15 19:45:25 +0800354 commit_summary = git_util.get_commit_log(git_root,
355 self.rev).splitlines()[0]
Kuang-che Wube5fa2a2018-11-12 17:17:35 +0800356 except subprocess.CalledProcessError:
357 logger.warning('failed to get commit log of %s at %s', self.rev[:10],
358 git_root)
Kuang-che Wue80bb872018-11-15 19:45:25 +0800359 commit_summary = '(unknown)'
360 text = 'commit %s %s %r' % (self.rev[:10], self.path, commit_summary)
361 return dict(
362 timestamp=self.timestamp,
363 action_type='commit',
364 path=self.path,
365 commit_summary=commit_summary,
366 repo_url=self.repo_url,
367 rev=self.rev,
368 text=text,
369 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800370
371
372class GitAddRepo(Action):
373 """Describes a git repo add action.
374
375 Attributes:
376 repo_url: the corresponding url of git repo to add
377 rev: git commit to checkout
378 """
379
380 def __init__(self, timestamp, path, repo_url, rev):
381 super(GitAddRepo, self).__init__(timestamp, path)
382 self.repo_url = repo_url
383 self.rev = rev
384
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800385 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800386 git_repo = os.path.join(root_dir, self.path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800387 assert not os.path.exists(git_repo)
388
389 reference = code_storage.cached_git_root(self.repo_url)
390 git_util.clone(git_repo, self.repo_url, reference=reference)
391 git_util.checkout_version(git_repo, self.rev)
392
393 code_storage.add_to_project_list(root_dir, self.path, self.repo_url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800394
395 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800396 text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10])
397 return dict(
398 timestamp=self.timestamp,
399 action_type='add_repo',
400 path=self.path,
401 text=text,
402 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800403
404
405class GitRemoveRepo(Action):
406 """Describes a git repo remove action."""
407
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800408 def apply(self, code_storage, root_dir):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800409 assert self.path
410 git_repo = os.path.join(root_dir, self.path)
411 assert git_util.is_git_root(git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800412 shutil.rmtree(git_repo)
413
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800414 code_storage.remove_from_project_list(root_dir, self.path)
415
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800416 def summary(self, _code_storage):
Kuang-che Wue80bb872018-11-15 19:45:25 +0800417 return dict(
418 timestamp=self.timestamp,
419 action_type='remove_repo',
420 path=self.path,
421 text='remove repo %s' % self.path,
422 )
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800423
424
425def apply_actions(code_storage, action_groups, root_dir):
426 # Speed optimization: only apply the last one of consecutive commits per
427 # repo. It is possible to optimize further, but need to take care git repo
428 # add/remove within another repo.
429 commits = {}
430
431 def batch_apply(commits):
432 for i, commit_action in sorted(commits.values()):
433 logger.debug('[%d] applying "%r"', i, commit_action.summary(code_storage))
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800434 commit_action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800435
436 for i, action_group in enumerate(action_groups, 1):
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800437 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800438 if not isinstance(action, GitCheckoutCommit):
439 break
440 else:
441 # If all actions are commits, defer them for batch processing.
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800442 for action in action_group.actions:
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800443 commits[action.path] = (i, action)
444 continue
445
446 batch_apply(commits)
447 commits = {}
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800448 action.apply(code_storage, root_dir)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800449
450 batch_apply(commits)
451
452
453class SpecManager(object):
454 """Spec related abstract operations.
455
456 This class enumerates Spec instances and switch disk state to Spec.
457
458 In other words, this class abstracts:
459 - discovery of gclient's DEPS and repo's manifest
460 - gclient sync and repo sync
461 """
462
463 def collect_float_spec(self, old, new):
464 """Collects float Spec between two versions.
465
466 This method may fetch spec from network. However, it should not switch tree
467 version state.
468 """
469 raise NotImplementedError
470
471 def collect_fixed_spec(self, old, new):
472 """Collects fixed Spec between two versions.
473
474 This method may fetch spec from network. However, it should not switch tree
475 version state.
476 """
477 raise NotImplementedError
478
479 def parse_spec(self, spec):
480 """Parses information for Spec object.
481
482 Args:
483 spec: Spec object. It specifies what to parse and the parsed information
484 is stored inside.
485 """
486 raise NotImplementedError
487
488 def sync_disk_state(self, rev):
489 """Switch source tree state to given version."""
490 raise NotImplementedError
491
492
493class CodeStorage(object):
494 """Query code history and commit relationship without checkout.
495
496 Because paths inside source tree may be deleted or map to different remote
497 repo in different versions, we cannot query git information of one version
498 but the tree state is at another version. In order to query information
499 without changing tree state and fast, we need out of tree source code
500 storage.
501
502 This class assumes all git repos are mirrored somewhere on local disk.
503 Subclasses just need to implement cached_git_root() which returns the
504 location.
505
506 In other words, this class abstracts operations upon gclient's cache-dir
507 repo's mirror.
508 """
509
510 def cached_git_root(self, repo_url):
511 """The cached path of given remote git repo.
512
513 Args:
514 repo_url: URL of git remote repo
515
516 Returns:
517 path of cache folder
518 """
519 raise NotImplementedError
520
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800521 def add_to_project_list(self, project_root, path, repo_url):
522 raise NotImplementedError
523
524 def remove_from_project_list(self, project_root, path):
525 raise NotImplementedError
526
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800527 def is_ancestor_commit(self, spec, path, old, new):
528 """Determine one commit is ancestor of another.
529
530 Args:
531 spec: Spec object
532 path: local path relative to project root
533 old: commit id
534 new: commit id
535
536 Returns:
537 True if `old` is ancestor of `new`
538 """
539 git_root = self.cached_git_root(spec[path].repo_url)
540 return git_util.is_ancestor_commit(git_root, old, new)
541
542 def get_rev_by_time(self, spec, path, timestamp):
543 """Get commit hash of given spec by time.
544
545 Args:
546 spec: Spec object
547 path: local path relative to project root
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800548 timestamp: timestamp
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800549
550 Returns:
551 The commit hash of given time. If there are commits with the given
552 timestamp, returns the last commit.
553 """
554 git_root = self.cached_git_root(spec[path].repo_url)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800555 # spec[path].at is remote reference name. Since git_root is a mirror (not
556 # a local checkout), there is no need to convert the name.
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800557 return git_util.get_rev_by_time(git_root, timestamp, spec[path].at)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800558
559 def get_actions_between_two_commit(self, spec, path, old, new):
560 git_root = self.cached_git_root(spec[path].repo_url)
561 result = []
562 for timestamp, git_rev in git_util.list_commits_between_commits(
563 git_root, old, new):
564 result.append(
565 GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev))
566 return result
567
568 def is_containing_commit(self, spec, path, rev):
569 git_root = self.cached_git_root(spec[path].repo_url)
570 return git_util.is_containing_commit(git_root, rev)
571
572 def are_spec_commits_available(self, spec):
573 for path, path_spec in spec.entries.items():
574 if not path_spec.is_static():
575 continue
576 if not self.is_containing_commit(spec, path, path_spec.at):
577 return False
578 return True
579
580
581class CodeManager(object):
582 """Class to reconstruct historical source tree state.
583
584 This class can reconstruct all moments of source tree state and diffs between
585 them.
586
587 Attributes:
588 root_dir: root path of project source tree
589 spec_manager: SpecManager object
590 code_storage: CodeStorage object
591 """
592
593 def __init__(self, root_dir, spec_manager, code_storage):
594 self.root_dir = root_dir
595 self.spec_manager = spec_manager
596 self.code_storage = code_storage
597
598 def generate_actions_between_specs(self, prev_float, next_float):
599 """Generates actions between two float specs.
600
601 Args:
602 prev_float: start of spec object (exclusive)
603 next_float: end of spec object (inclusive)
604
605 Returns:
606 list of Action object (unordered)
607 """
608 actions = []
609 for path in set(prev_float.entries) | set(next_float.entries):
610
611 # Add repo
612 if path not in prev_float:
613 if next_float[path].is_static():
614 next_at = next_float[path].at
615 else:
616 next_at = self.code_storage.get_rev_by_time(next_float, path,
617 next_float.timestamp)
618 actions.append(
619 GitAddRepo(next_float.timestamp, path, next_float[path].repo_url,
620 next_at))
621 continue
622
623 # Existing path is floating, enumerates commits until next spec.
624 #
625 # prev_at till_at
626 # prev branch ---> o --------> o --------> o --------> o --------> ...
627 # ^ ^
628 # prev_float.timestamp next_float.timestamp
629 if not prev_float[path].is_static():
630 prev_at = self.code_storage.get_rev_by_time(prev_float, path,
631 prev_float.timestamp)
632 till_at = self.code_storage.get_rev_by_time(prev_float, path,
633 next_float.timestamp)
634
635 actions.extend(
636 self.code_storage.get_actions_between_two_commit(
637 prev_float, path, prev_at, till_at))
638 else:
639 prev_at = till_at = prev_float[path].at
640
641 # At next_float.timestamp.
642 if path not in next_float:
643 # remove repo
644 actions.append(GitRemoveRepo(next_float.timestamp, path))
645 next_at = None
646
647 elif next_float[path].is_static():
648 # pinned to certain commit on different branch
649 next_at = next_float[path].at
650
651 elif next_float[path].at == prev_float[path].at:
652 # keep floating on the same branch
653 next_at = till_at
654
655 else:
656 # switch to another branch
657 # prev_at till_at
658 # prev branch ---> o --------> o --------> o --------> o --------> ...
659 #
660 # next_at
661 # next branch ...... o ------> o --------> o -----> ...
662 # ^ ^
663 # prev_float.timestamp next_float.timestamp
664 next_at = self.code_storage.get_rev_by_time(next_float, path,
665 next_float.timestamp)
666
667 if next_at and next_at != till_at:
668 actions.append(
669 GitCheckoutCommit(next_float.timestamp, path,
670 next_float[path].repo_url, next_at))
671
672 return actions
673
674 def synthesize_fixed_spec(self, float_spec, timestamp):
675 """Synthesizes fixed spec from float spec of given time.
676
677 Args:
678 float_spec: the float spec
679 timestamp: snapshot time
680
681 Returns:
682 Spec object
683 """
684 result = {}
685 for path, path_spec in float_spec.entries.items():
686 if not path_spec.is_static():
687 at = self.code_storage.get_rev_by_time(float_spec, path, timestamp)
688 path_spec = PathSpec(path_spec.path, path_spec.repo_url, at)
689
690 result[path] = copy.deepcopy(path_spec)
691
692 name = '%s@%s' % (float_spec.path, timestamp)
693 return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result)
694
695 def reorder_actions(self, actions):
696 """Reorder and cluster actions.
697
698 Args:
699 actions: list of Action objects
700
701 Returns:
702 list of ActionGroup objects
703 """
704 # TODO(kcwu): support atomic commits across repos
705 actions.sort(key=lambda x: x.timestamp)
706 result = []
707 for action in actions:
708 group = ActionGroup(action.timestamp)
709 group.add(action)
710 result.append(group)
711 return result
712
713 def match_spec(self, target, specs, start_index=0):
714 threshold = 3600
715 # ideal_index is the index of last spec before target
716 # begin and end are the range of indexes within threshold (inclusive)
717 ideal_index = None
718 begin, end = None, None
719 for i, spec in enumerate(specs[start_index:], start_index):
720 if spec.timestamp <= target.timestamp:
721 ideal_index = i
722 if abs(spec.timestamp - target.timestamp) < threshold:
723 if begin is None:
724 begin = i
725 end = i
726
727 candidates = []
728 if ideal_index is not None:
729 candidates.append(ideal_index)
730 if begin is not None:
731 candidates.extend(range(begin, end + 1))
732 if not candidates:
733 logger.error('unable to match %s: all specs are after it', target.name)
734 return None
735
736 compatible_candidates = [
737 i for i in candidates if specs[i].is_subset(target)
738 ]
739 if not compatible_candidates:
740 logger.error('unable to match %s: no compatible specs', target.name)
741 spec = specs[candidates[0]]
742 target.diff(spec)
743 return None
744
745 scores = []
746 for i in compatible_candidates:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800747 # Tie-break: prefer earlier timestamp and smaller difference.
748 if specs[i].timestamp <= target.timestamp:
749 timediff = 0, target.timestamp - specs[i].timestamp
750 else:
751 timediff = 1, specs[i].timestamp - target.timestamp
752 scores.append((specs[i].similar_score(target), timediff, i))
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800753 scores.sort()
754
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800755 score, _, index = scores[0]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800756 if score != 0:
757 logger.warning('not exactly match (score=%s): %s', score, target.name)
758 target.diff(specs[index])
759
760 if index < ideal_index:
761 logger.warning(
762 '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d',
763 target.name, target.timestamp, specs[index].timestamp,
764 specs[ideal_index].timestamp,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800765 specs[index].timestamp - target.timestamp)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800766 if index > ideal_index:
767 logger.warning(
768 'spec committed at %d matched later commit at %d. bad server clock?',
769 target.timestamp, specs[index].timestamp)
770
771 return index
772
773 def associate_fixed_and_synthesized_specs(self, fixed_specs,
774 synthesized_specs):
775 # All fixed specs are snapshot of float specs. Theoretically, they
776 # should be identical to one of the synthesized specs.
777 # However, it's not always true for some reasons --- maybe due to race
778 # condition, maybe due to bugs of this bisect-kit.
779 # To overcome this glitch, we try to match them by similarity instead of
780 # exact match.
781 result = []
782 last_index = 0
783 for i, fixed_spec in enumerate(fixed_specs):
784 matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index)
785 if matched_index is None:
786 if i in (0, len(fixed_specs) - 1):
787 logger.error('essential spec mismatch, unable to continue')
788 assert 0
789 else:
790 logger.warning('%s do not match, skip', fixed_spec.name)
791 continue
792 result.append((i, matched_index))
793 last_index = matched_index
794
795 return result
796
797 def _create_make_up_actions(self, fixed_spec, synthesized):
798 timestamp = synthesized.timestamp
799 make_up = ActionGroup(
800 timestamp, comment='make up glitch for %s' % fixed_spec.name)
801 for path in set(fixed_spec.entries) & set(synthesized.entries):
802 if fixed_spec[path].at == synthesized[path].at:
803 continue
804 action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url,
805 synthesized[path].at)
806 make_up.add(action)
807
808 if not make_up.actions:
809 return None
810 return make_up
811
812 def build_revlist(self, old, new):
813 """Build revlist.
814
815 Returns:
816 list of rev string
817 """
818 logger.info('build_revlist')
819 revlist = []
820
821 # step 1, find all float and fixed specs in the given range.
822 fixed_specs = self.spec_manager.collect_fixed_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800823 assert fixed_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800824 float_specs = self.spec_manager.collect_float_spec(old, new)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800825 assert float_specs
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800826 while float_specs[-1].timestamp > fixed_specs[-1].timestamp:
827 float_specs.pop()
828 assert float_specs
829 for spec in float_specs + fixed_specs:
830 self.spec_manager.parse_spec(spec)
831
832 # step 2, synthesize all fixed specs in the range from float specs.
833 specs = float_specs + [fixed_specs[-1]]
834 actions = []
835 logger.debug('len(specs)=%d', len(specs))
836 for i in range(len(specs) - 1):
837 prev_float = specs[i]
838 next_float = specs[i + 1]
839 logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name,
840 prev_float.timestamp, next_float.name, next_float.timestamp)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800841 for action in self.generate_actions_between_specs(prev_float, next_float):
842 if action.timestamp < fixed_specs[0].timestamp:
843 continue
844 actions.append(action)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800845 action_groups = self.reorder_actions(actions)
846
847 spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp)
848 synthesized = [spec.copy()]
849 for action_group in action_groups:
850 spec.apply(action_group)
851 synthesized.append(spec.copy())
852
853 # step 3, associate fixed specs with synthesized specs.
854 associated_pairs = self.associate_fixed_and_synthesized_specs(
855 fixed_specs, synthesized)
856
857 # step 4, group actions and cache them
858 for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]):
859 next_fixed_index, next_synthesized_index = associated_pairs[i + 1]
860 revlist.append(fixed_specs[fixed_index].name)
861 this_action_groups = []
862
863 # handle glitch
864 if fixed_specs[fixed_index].similar_score(
865 synthesized[synthesized_index]) != 0:
866 assert synthesized[synthesized_index].is_subset(
867 fixed_specs[fixed_index])
868 skipped = set(fixed_specs[fixed_index].entries) - set(
869 synthesized[synthesized_index].entries)
870 if skipped:
871 logger.warning(
872 'between %s and %s, '
873 'bisect-kit cannot analyze commit history of following paths:',
874 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name)
875 for path in sorted(skipped):
876 logger.warning(' %s', path)
877
878 make_up = self._create_make_up_actions(fixed_specs[fixed_index],
879 synthesized[synthesized_index])
880 if make_up:
881 this_action_groups.append(make_up)
882
883 this_action_groups.extend(
884 action_groups[synthesized_index:next_synthesized_index])
885 for idx, ag in enumerate(this_action_groups, 1):
886 rev = make_intra_rev(fixed_specs[fixed_index].name,
887 fixed_specs[next_fixed_index].name, idx)
888 ag.name = rev
889 revlist.append(rev)
890
891 self.save_action_groups_between_releases(
892 fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name,
893 this_action_groups)
894 revlist.append(fixed_specs[associated_pairs[-1][0]].name)
895
896 return revlist
897
898 def save_action_groups_between_releases(self, old, new, action_groups):
899 data = [ag.serialize() for ag in action_groups]
900
901 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
902 if not os.path.exists(cache_dir):
903 os.makedirs(cache_dir)
904 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
905 with file(cache_filename, 'w') as fp:
906 json.dump(data, fp, indent=4, sort_keys=True)
907
908 def load_action_groups_between_releases(self, old, new):
909 cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR)
910 cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new))
911 if not os.path.exists(cache_filename):
Kuang-che Wue121fae2018-11-09 16:18:39 +0800912 raise errors.InternalError(
913 'cached revlist not found: %s' % cache_filename)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800914
915 result = []
916 for data in json.load(file(cache_filename)):
917 result.append(ActionGroup.unserialize(data))
918
919 return result
920
Kuang-che Wue80bb872018-11-15 19:45:25 +0800921 def get_rev_detail(self, rev):
922 rev_old, rev_new, index = parse_intra_rev(rev)
923 if rev_old == rev_new:
924 return {}
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800925
Kuang-che Wue80bb872018-11-15 19:45:25 +0800926 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
927 # Indexes inside intra_rev are 1 based.
928 action_group = action_groups[index - 1]
929 return action_group.summary(self.code_storage)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800930
931 def switch(self, rev):
932 # easy case
933 if not re.match(_re_intra_rev, rev):
934 self.spec_manager.sync_disk_state(rev)
935 return
936
937 rev_old, rev_new, idx = parse_intra_rev(rev)
938 action_groups = self.load_action_groups_between_releases(rev_old, rev_new)
939 assert 0 <= idx <= len(action_groups)
940 action_groups = action_groups[:idx]
941
942 self.spec_manager.sync_disk_state(rev_old)
943
944 apply_actions(self.code_storage, action_groups, self.root_dir)