Kuang-che Wu | 6e4beca | 2018-06-27 17:45:02 +0800 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 2 | # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """Model of source code organization and changes. |
| 6 | |
| 7 | This module modeled complex source code organization, i.e. nested git repos, |
| 8 | and their version relationship, i.e. pinned or floating git repo. In other |
| 9 | words, it's abstraction of chrome's gclient DEPS, and chromeos and Android's |
| 10 | repo manifest. |
| 11 | """ |
| 12 | |
| 13 | from __future__ import print_function |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 14 | import collections |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 15 | import copy |
| 16 | import json |
| 17 | import logging |
| 18 | import os |
| 19 | import re |
| 20 | import shutil |
| 21 | |
| 22 | from bisect_kit import cli |
Kuang-che Wu | e121fae | 2018-11-09 16:18:39 +0800 | [diff] [blame] | 23 | from bisect_kit import errors |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 24 | from bisect_kit import git_util |
| 25 | |
| 26 | logger = logging.getLogger(__name__) |
| 27 | |
| 28 | _re_intra_rev = r'^([^,]+)~([^,]+)/(\d+)$' |
| 29 | |
| 30 | SPEC_FIXED = 'fixed' |
| 31 | SPEC_FLOAT = 'float' |
| 32 | _DIFF_CACHE_DIR = 'bisectkit-cache' |
| 33 | |
| 34 | |
| 35 | def make_intra_rev(a, b, index): |
| 36 | """Makes intra-rev version string. |
| 37 | |
| 38 | Between two major "named" versions a and b, there are many small changes |
| 39 | (commits) in-between. bisect-kit will identify all those instances and bisect |
| 40 | them. We give names to those instances and call these names as "intra-rev" |
| 41 | which stands for minor version numbers within two major version. |
| 42 | |
| 43 | Note, a+index (without b) is not enough to identify an unique change due to |
| 44 | branches. Take chromeos as example, both 9900.1.0 and 9901.0.0 are derived |
| 45 | from 9900.0.0, so "9900.0.0 plus 100 changes" may ambiguously refer to states |
| 46 | in 9900.1.0 and 9901.0.0. |
| 47 | |
| 48 | Args: |
| 49 | a: the start version |
| 50 | b: the end version |
| 51 | index: the index number of changes between a and b |
| 52 | |
| 53 | Returns: |
| 54 | the intra-rev version string |
| 55 | """ |
| 56 | return '%s~%s/%d' % (a, b, index) |
| 57 | |
| 58 | |
| 59 | def parse_intra_rev(rev): |
| 60 | """Decomposes intra-rev string. |
| 61 | |
| 62 | See comments of make_intra_rev for what is intra-rev. |
| 63 | |
| 64 | Args: |
| 65 | rev: intra-rev string or normal version number |
| 66 | |
| 67 | Returns: |
| 68 | (start, end, index). If rev is not intra-rev, it must be normal version |
| 69 | number and returns (rev, rev, 0). |
| 70 | """ |
| 71 | m = re.match(_re_intra_rev, rev) |
Kuang-che Wu | 89ac2e7 | 2018-07-25 17:39:07 +0800 | [diff] [blame] | 72 | if not m: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 73 | return rev, rev, 0 |
| 74 | |
Kuang-che Wu | 89ac2e7 | 2018-07-25 17:39:07 +0800 | [diff] [blame] | 75 | return m.group(1), m.group(2), int(m.group(3)) |
| 76 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 77 | |
| 78 | def argtype_intra_rev(argtype): |
| 79 | """Validates argument is intra-rev. |
| 80 | |
| 81 | Args: |
| 82 | argtype: argtype function which validates major version number |
| 83 | |
| 84 | Returns: |
| 85 | A new argtype function which matches intra-rev |
| 86 | """ |
| 87 | |
| 88 | def argtype_function(s): |
Kuang-che Wu | cab9245 | 2019-01-19 18:24:29 +0800 | [diff] [blame] | 89 | examples = [] |
| 90 | try: |
| 91 | return argtype(s) |
| 92 | except cli.ArgTypeError as e: |
| 93 | examples += e.example |
| 94 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 95 | m = re.match(_re_intra_rev, s) |
| 96 | if m: |
| 97 | try: |
| 98 | argtype(m.group(1)) |
| 99 | argtype(m.group(2)) |
| 100 | return s |
| 101 | except cli.ArgTypeError as e: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 102 | for example in e.example: |
| 103 | examples.append(make_intra_rev(example, example, 10)) |
| 104 | raise cli.ArgTypeError('Invalid intra rev', examples) |
Kuang-che Wu | cab9245 | 2019-01-19 18:24:29 +0800 | [diff] [blame] | 105 | |
| 106 | examples.append(make_intra_rev('<rev1>', '<rev2>', 10)) |
| 107 | raise cli.ArgTypeError('Invalid rev', examples) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 108 | |
| 109 | return argtype_function |
| 110 | |
| 111 | |
| 112 | def _normalize_repo_url(repo_url): |
| 113 | repo_url = re.sub(r'https://chrome-internal.googlesource.com/a/', |
| 114 | r'https://chrome-internal.googlesource.com/', repo_url) |
| 115 | repo_url = re.sub(r'\.git$', '', repo_url) |
| 116 | return repo_url |
| 117 | |
| 118 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 119 | class PathSpec: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 120 | """Specified code version of one path. |
| 121 | |
| 122 | Attributes: |
| 123 | path: local path, relative to project base dir |
| 124 | repo_url: code repository location |
| 125 | at: code version, could be git hash or branch name |
| 126 | """ |
| 127 | |
| 128 | def __init__(self, path, repo_url, at): |
| 129 | self.path = path |
| 130 | self.repo_url = repo_url |
| 131 | self.at = at |
| 132 | |
| 133 | def is_static(self): |
| 134 | return git_util.is_git_rev(self.at) |
| 135 | |
| 136 | def __eq__(self, rhs): |
| 137 | if self.path != rhs.path: |
| 138 | return False |
| 139 | if self.at != rhs.at: |
| 140 | return False |
| 141 | if _normalize_repo_url(self.repo_url) != _normalize_repo_url(rhs.repo_url): |
| 142 | return False |
| 143 | return True |
| 144 | |
| 145 | def __ne__(self, rhs): |
| 146 | return not self == rhs |
| 147 | |
| 148 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 149 | class Spec: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 150 | """Collection of PathSpec. |
| 151 | |
| 152 | Spec is analogy to gclient's DEPS and repo's manifest. |
| 153 | |
| 154 | Attributes: |
| 155 | spec_type: type of spec, SPEC_FIXED or SPEC_FLOAT. SPEC_FIXED means code |
| 156 | version is pinned and fixed. On the other hand, SPEC_FLOAT is not |
| 157 | pinned and the actual version (git commit) may change over time. |
| 158 | name: name of this spec, for debugging purpose. usually version number |
| 159 | or git hash |
| 160 | timestamp: timestamp of this spec |
| 161 | path: path of spec |
| 162 | entries: paths to PathSpec dict |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 163 | revision: a commit id of manifest-internal indicates the manifest revision, |
| 164 | this argument is not used in DEPS. |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 165 | """ |
| 166 | |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 167 | def __init__(self, |
| 168 | spec_type, |
| 169 | name, |
| 170 | timestamp, |
| 171 | path, |
| 172 | entries=None, |
| 173 | revision=None): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 174 | self.spec_type = spec_type |
| 175 | self.name = name |
| 176 | self.timestamp = timestamp |
| 177 | self.path = path |
| 178 | self.entries = entries |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 179 | self.revision = revision |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 180 | |
| 181 | def copy(self): |
| 182 | return copy.deepcopy(self) |
| 183 | |
| 184 | def similar_score(self, rhs): |
| 185 | """Calculates similar score to another Spec. |
| 186 | |
| 187 | Returns: |
| 188 | score of similarity. Smaller value is more similar. |
| 189 | """ |
| 190 | score = 0 |
| 191 | for path in set(self.entries) & set(rhs.entries): |
| 192 | if rhs[path] == self[path]: |
| 193 | continue |
| 194 | if rhs[path].at == self[path].at: |
| 195 | # it's often that remote repo moved around but should be treated as the |
| 196 | # same one |
| 197 | score += 0.1 |
| 198 | else: |
| 199 | score += 1 |
| 200 | score += len(set(self.entries) ^ set(rhs.entries)) |
| 201 | return score |
| 202 | |
| 203 | def is_static(self): |
| 204 | return all(path_spec.is_static() for path_spec in self.entries.values()) |
| 205 | |
| 206 | def is_subset(self, rhs): |
| 207 | return set(self.entries.keys()) <= set(rhs.entries.keys()) |
| 208 | |
| 209 | def __getitem__(self, path): |
| 210 | return self.entries[path] |
| 211 | |
| 212 | def __contains__(self, path): |
| 213 | return path in self.entries |
| 214 | |
| 215 | def apply(self, action_group): |
| 216 | self.timestamp = action_group.timestamp |
| 217 | self.name = '(%s)' % self.timestamp |
| 218 | for action in action_group.actions: |
| 219 | if isinstance(action, GitAddRepo): |
| 220 | self.entries[action.path] = PathSpec(action.path, action.repo_url, |
| 221 | action.rev) |
| 222 | elif isinstance(action, GitCheckoutCommit): |
| 223 | self.entries[action.path].at = action.rev |
| 224 | elif isinstance(action, GitRemoveRepo): |
| 225 | del self.entries[action.path] |
| 226 | else: |
| 227 | assert 0, 'unknown action: %s' % action.__class__.__name__ |
| 228 | |
| 229 | def dump(self): |
| 230 | # for debugging |
| 231 | print(self.name, self.path, self.timestamp) |
| 232 | print('size', len(self.entries)) |
| 233 | for path, path_spec in sorted(self.entries.items()): |
| 234 | print(path, path_spec.at) |
| 235 | |
| 236 | def diff(self, rhs): |
| 237 | logger.info('diff between %s and %s', self.name, rhs.name) |
| 238 | expect = set(self.entries) |
| 239 | actual = set(rhs.entries) |
Kuang-che Wu | 4997bfd | 2019-03-18 13:09:26 +0800 | [diff] [blame] | 240 | common_count = 0 |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 241 | for path in sorted(expect - actual): |
| 242 | logger.info('-%s', path) |
| 243 | for path in sorted(actual - expect): |
| 244 | logger.info('+%s', path) |
| 245 | for path in sorted(expect & actual): |
| 246 | if self[path] == rhs[path]: |
Kuang-che Wu | 4997bfd | 2019-03-18 13:09:26 +0800 | [diff] [blame] | 247 | common_count += 1 |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 248 | continue |
| 249 | if self[path].at != rhs[path].at: |
| 250 | logger.info(' %s: at %s vs %s', path, self[path].at, rhs[path].at) |
| 251 | if self[path].repo_url != rhs[path].repo_url: |
| 252 | logger.info(' %s: repo_url %s vs %s', path, self[path].repo_url, |
| 253 | rhs[path].repo_url) |
Kuang-che Wu | 4997bfd | 2019-03-18 13:09:26 +0800 | [diff] [blame] | 254 | logger.info('and common=%s', common_count) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 255 | |
| 256 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 257 | class Action: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 258 | """Actions describe changes from one Spec to another. |
| 259 | |
| 260 | Attributes: |
| 261 | timestamp: action time |
| 262 | path: action path, which is relative to project root |
| 263 | """ |
| 264 | |
| 265 | def __init__(self, timestamp, path): |
| 266 | self.timestamp = timestamp |
| 267 | self.path = path |
| 268 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 269 | def apply(self, _code_storage, _root_dir): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 270 | raise NotImplementedError |
| 271 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 272 | def summary(self): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 273 | raise NotImplementedError |
| 274 | |
| 275 | def __eq__(self, rhs): |
| 276 | return self.__dict__ == rhs.__dict__ |
| 277 | |
| 278 | def serialize(self): |
| 279 | return self.__class__.__name__, self.__dict__ |
| 280 | |
| 281 | |
| 282 | def unserialize_action(data): |
| 283 | classes = [GitCheckoutCommit, GitAddRepo, GitRemoveRepo] |
| 284 | class_name, values = data |
| 285 | assert class_name in [cls.__name__ for cls in classes |
| 286 | ], 'unknown action class: %s' % class_name |
| 287 | for cls in classes: |
| 288 | if class_name == cls.__name__: |
Kuang-che Wu | 89ac2e7 | 2018-07-25 17:39:07 +0800 | [diff] [blame] | 289 | action = cls(**values) |
| 290 | break |
| 291 | return action |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 292 | |
| 293 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 294 | class ActionGroup: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 295 | """Atomic group of Action objects |
| 296 | |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 297 | This models atomic actions, ex: |
| 298 | - repo added/removed in the same manifest commit |
| 299 | - commits appears at the same time due to repo add |
| 300 | - gerrit topic |
| 301 | - circular CQ-DEPEND (Cq-Depend) |
| 302 | Otherwise, one ActionGroup usually consists only one Action object. |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 303 | """ |
| 304 | |
| 305 | def __init__(self, timestamp, comment=None): |
| 306 | self.timestamp = timestamp |
| 307 | self.name = None |
| 308 | self.actions = [] |
| 309 | self.comment = comment |
| 310 | |
| 311 | def add(self, action): |
| 312 | self.actions.append(action) |
| 313 | |
| 314 | def serialize(self): |
Kuang-che Wu | 2245526 | 2018-08-03 15:38:29 +0800 | [diff] [blame] | 315 | return dict( |
| 316 | timestamp=self.timestamp, |
| 317 | name=self.name, |
| 318 | comment=self.comment, |
| 319 | actions=[a.serialize() for a in self.actions]) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 320 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 321 | def summary(self): |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 322 | result = {} |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 323 | if self.comment: |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 324 | result['comment'] = self.comment |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 325 | result['actions'] = [action.summary() for action in self.actions] |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 326 | return result |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 327 | |
| 328 | @staticmethod |
| 329 | def unserialize(data): |
Kuang-che Wu | 2245526 | 2018-08-03 15:38:29 +0800 | [diff] [blame] | 330 | ag = ActionGroup(data['timestamp']) |
| 331 | ag.name = data['name'] |
| 332 | ag.comment = data['comment'] |
| 333 | for x in data['actions']: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 334 | ag.add(unserialize_action(x)) |
| 335 | return ag |
| 336 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 337 | def apply(self, code_storage, root_dir): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 338 | for action in self.actions: |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 339 | action.apply(code_storage, root_dir) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 340 | |
| 341 | |
| 342 | class GitCheckoutCommit(Action): |
| 343 | """Describes a git commit action. |
| 344 | |
| 345 | Attributes: |
| 346 | repo_url: the corresponding url of git repo |
| 347 | rev: git commit to checkout |
| 348 | """ |
| 349 | |
| 350 | def __init__(self, timestamp, path, repo_url, rev): |
| 351 | super(GitCheckoutCommit, self).__init__(timestamp, path) |
| 352 | self.repo_url = repo_url |
| 353 | self.rev = rev |
| 354 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 355 | def apply(self, code_storage, root_dir): |
| 356 | del code_storage # unused |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 357 | git_repo = os.path.join(root_dir, self.path) |
| 358 | assert git_util.is_git_root(git_repo) |
| 359 | git_util.checkout_version(git_repo, self.rev) |
| 360 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 361 | def summary(self): |
| 362 | text = 'commit %s %s' % (self.rev[:10], self.path) |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 363 | return dict( |
| 364 | timestamp=self.timestamp, |
| 365 | action_type='commit', |
| 366 | path=self.path, |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 367 | repo_url=self.repo_url, |
| 368 | rev=self.rev, |
| 369 | text=text, |
| 370 | ) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 371 | |
| 372 | |
| 373 | class GitAddRepo(Action): |
| 374 | """Describes a git repo add action. |
| 375 | |
| 376 | Attributes: |
| 377 | repo_url: the corresponding url of git repo to add |
| 378 | rev: git commit to checkout |
| 379 | """ |
| 380 | |
| 381 | def __init__(self, timestamp, path, repo_url, rev): |
| 382 | super(GitAddRepo, self).__init__(timestamp, path) |
| 383 | self.repo_url = repo_url |
| 384 | self.rev = rev |
| 385 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 386 | def apply(self, code_storage, root_dir): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 387 | git_repo = os.path.join(root_dir, self.path) |
Kuang-che Wu | df11c8a | 2019-03-18 13:21:24 +0800 | [diff] [blame] | 388 | if os.path.exists(git_repo): |
| 389 | if os.path.isdir(git_repo) and not os.listdir(git_repo): |
| 390 | # mimic gclient's behavior; don't panic |
| 391 | logger.warning( |
| 392 | 'adding repo %s; there is already an empty directory; ' |
| 393 | 'assume it is okay', git_repo) |
| 394 | else: |
| 395 | assert not os.path.exists(git_repo), '%s already exists' % git_repo |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 396 | |
| 397 | reference = code_storage.cached_git_root(self.repo_url) |
| 398 | git_util.clone(git_repo, self.repo_url, reference=reference) |
| 399 | git_util.checkout_version(git_repo, self.rev) |
| 400 | |
| 401 | code_storage.add_to_project_list(root_dir, self.path, self.repo_url) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 402 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 403 | def summary(self): |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 404 | text = 'add repo %s from %s@%s' % (self.path, self.repo_url, self.rev[:10]) |
| 405 | return dict( |
| 406 | timestamp=self.timestamp, |
| 407 | action_type='add_repo', |
| 408 | path=self.path, |
Kuang-che Wu | 356ecb9 | 2019-04-02 16:30:25 +0800 | [diff] [blame] | 409 | repo_url=self.repo_url, |
| 410 | rev=self.rev, |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 411 | text=text, |
| 412 | ) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 413 | |
| 414 | |
| 415 | class GitRemoveRepo(Action): |
| 416 | """Describes a git repo remove action.""" |
| 417 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 418 | def apply(self, code_storage, root_dir): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 419 | assert self.path |
| 420 | git_repo = os.path.join(root_dir, self.path) |
| 421 | assert git_util.is_git_root(git_repo) |
Kuang-che Wu | 067ff29 | 2019-02-14 18:16:23 +0800 | [diff] [blame] | 422 | # TODO(kcwu): other projects may be sub-tree of `git_repo`. |
| 423 | # They should not be deleted. (crbug/930047) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 424 | shutil.rmtree(git_repo) |
| 425 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 426 | code_storage.remove_from_project_list(root_dir, self.path) |
| 427 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 428 | def summary(self): |
Kuang-che Wu | e80bb87 | 2018-11-15 19:45:25 +0800 | [diff] [blame] | 429 | return dict( |
| 430 | timestamp=self.timestamp, |
| 431 | action_type='remove_repo', |
| 432 | path=self.path, |
| 433 | text='remove repo %s' % self.path, |
| 434 | ) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 435 | |
| 436 | |
| 437 | def apply_actions(code_storage, action_groups, root_dir): |
| 438 | # Speed optimization: only apply the last one of consecutive commits per |
| 439 | # repo. It is possible to optimize further, but need to take care git repo |
| 440 | # add/remove within another repo. |
| 441 | commits = {} |
| 442 | |
| 443 | def batch_apply(commits): |
Kuang-che Wu | 261174e | 2020-01-09 17:51:31 +0800 | [diff] [blame] | 444 | for i, _, commit_action in sorted(commits.values(), key=lambda x: x[:2]): |
Zheng-Jie Chang | 011bf95 | 2020-06-18 07:45:30 +0800 | [diff] [blame] | 445 | logger.debug('[%d] applying "%r"', i, commit_action.summary()) |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 446 | commit_action.apply(code_storage, root_dir) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 447 | |
| 448 | for i, action_group in enumerate(action_groups, 1): |
Kuang-che Wu | d1d45b4 | 2018-07-05 00:46:45 +0800 | [diff] [blame] | 449 | for action in action_group.actions: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 450 | if not isinstance(action, GitCheckoutCommit): |
| 451 | break |
| 452 | else: |
| 453 | # If all actions are commits, defer them for batch processing. |
Kuang-che Wu | 261174e | 2020-01-09 17:51:31 +0800 | [diff] [blame] | 454 | for j, action in enumerate(action_group.actions): |
| 455 | commits[action.path] = (i, j, action) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 456 | continue |
| 457 | |
| 458 | batch_apply(commits) |
| 459 | commits = {} |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 460 | action.apply(code_storage, root_dir) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 461 | |
| 462 | batch_apply(commits) |
| 463 | |
| 464 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 465 | class SpecManager: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 466 | """Spec related abstract operations. |
| 467 | |
| 468 | This class enumerates Spec instances and switch disk state to Spec. |
| 469 | |
| 470 | In other words, this class abstracts: |
| 471 | - discovery of gclient's DEPS and repo's manifest |
| 472 | - gclient sync and repo sync |
| 473 | """ |
| 474 | |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 475 | def collect_float_spec(self, old, new, fixed_specs=None): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 476 | """Collects float Spec between two versions. |
| 477 | |
| 478 | This method may fetch spec from network. However, it should not switch tree |
| 479 | version state. |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 480 | |
| 481 | Args: |
| 482 | old: old version |
| 483 | new: new version |
| 484 | fixed_specs: fixed specs from collect_fixed_spec(old, new) for Chrome OS |
| 485 | or None for others |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 486 | """ |
| 487 | raise NotImplementedError |
| 488 | |
| 489 | def collect_fixed_spec(self, old, new): |
| 490 | """Collects fixed Spec between two versions. |
| 491 | |
| 492 | This method may fetch spec from network. However, it should not switch tree |
| 493 | version state. |
| 494 | """ |
| 495 | raise NotImplementedError |
| 496 | |
| 497 | def parse_spec(self, spec): |
| 498 | """Parses information for Spec object. |
| 499 | |
| 500 | Args: |
| 501 | spec: Spec object. It specifies what to parse and the parsed information |
| 502 | is stored inside. |
| 503 | """ |
| 504 | raise NotImplementedError |
| 505 | |
| 506 | def sync_disk_state(self, rev): |
| 507 | """Switch source tree state to given version.""" |
| 508 | raise NotImplementedError |
| 509 | |
| 510 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 511 | class CodeStorage: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 512 | """Query code history and commit relationship without checkout. |
| 513 | |
| 514 | Because paths inside source tree may be deleted or map to different remote |
| 515 | repo in different versions, we cannot query git information of one version |
| 516 | but the tree state is at another version. In order to query information |
| 517 | without changing tree state and fast, we need out of tree source code |
| 518 | storage. |
| 519 | |
| 520 | This class assumes all git repos are mirrored somewhere on local disk. |
| 521 | Subclasses just need to implement cached_git_root() which returns the |
| 522 | location. |
| 523 | |
| 524 | In other words, this class abstracts operations upon gclient's cache-dir |
| 525 | repo's mirror. |
| 526 | """ |
| 527 | |
| 528 | def cached_git_root(self, repo_url): |
| 529 | """The cached path of given remote git repo. |
| 530 | |
| 531 | Args: |
| 532 | repo_url: URL of git remote repo |
| 533 | |
| 534 | Returns: |
| 535 | path of cache folder |
| 536 | """ |
| 537 | raise NotImplementedError |
| 538 | |
Kuang-che Wu | 6948ecc | 2018-09-11 17:43:49 +0800 | [diff] [blame] | 539 | def add_to_project_list(self, project_root, path, repo_url): |
| 540 | raise NotImplementedError |
| 541 | |
| 542 | def remove_from_project_list(self, project_root, path): |
| 543 | raise NotImplementedError |
| 544 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 545 | def is_ancestor_commit(self, spec, path, old, new): |
| 546 | """Determine one commit is ancestor of another. |
| 547 | |
| 548 | Args: |
| 549 | spec: Spec object |
| 550 | path: local path relative to project root |
| 551 | old: commit id |
| 552 | new: commit id |
| 553 | |
| 554 | Returns: |
| 555 | True if `old` is ancestor of `new` |
| 556 | """ |
| 557 | git_root = self.cached_git_root(spec[path].repo_url) |
| 558 | return git_util.is_ancestor_commit(git_root, old, new) |
| 559 | |
| 560 | def get_rev_by_time(self, spec, path, timestamp): |
| 561 | """Get commit hash of given spec by time. |
| 562 | |
| 563 | Args: |
| 564 | spec: Spec object |
| 565 | path: local path relative to project root |
Kuang-che Wu | e4bae0b | 2018-07-19 12:10:14 +0800 | [diff] [blame] | 566 | timestamp: timestamp |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 567 | |
| 568 | Returns: |
| 569 | The commit hash of given time. If there are commits with the given |
| 570 | timestamp, returns the last commit. |
| 571 | """ |
| 572 | git_root = self.cached_git_root(spec[path].repo_url) |
Kuang-che Wu | e4bae0b | 2018-07-19 12:10:14 +0800 | [diff] [blame] | 573 | # spec[path].at is remote reference name. Since git_root is a mirror (not |
| 574 | # a local checkout), there is no need to convert the name. |
Kuang-che Wu | 8a28a9d | 2018-09-11 17:43:36 +0800 | [diff] [blame] | 575 | return git_util.get_rev_by_time(git_root, timestamp, spec[path].at) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 576 | |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 577 | def get_actions_between_two_commit(self, |
| 578 | spec, |
| 579 | path, |
| 580 | old, |
| 581 | new, |
| 582 | ignore_not_ancestor=False): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 583 | git_root = self.cached_git_root(spec[path].repo_url) |
| 584 | result = [] |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 585 | # not in the same branch, regard as an atomic operation |
| 586 | # this situation happens when |
| 587 | # 1. new is branched from old and |
| 588 | # 2. commit timestamp is not reliable(i.e. commit time != merged time) |
| 589 | # old and new might not have ancestor relation |
| 590 | if ignore_not_ancestor and old != new and not git_util.is_ancestor_commit( |
| 591 | git_root, old, new): |
| 592 | timestamp = git_util.get_commit_time(git_root, new) |
| 593 | result.append( |
| 594 | GitCheckoutCommit(timestamp, path, spec[path].repo_url, new)) |
| 595 | return result |
| 596 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 597 | for timestamp, git_rev in git_util.list_commits_between_commits( |
| 598 | git_root, old, new): |
| 599 | result.append( |
| 600 | GitCheckoutCommit(timestamp, path, spec[path].repo_url, git_rev)) |
| 601 | return result |
| 602 | |
| 603 | def is_containing_commit(self, spec, path, rev): |
| 604 | git_root = self.cached_git_root(spec[path].repo_url) |
| 605 | return git_util.is_containing_commit(git_root, rev) |
| 606 | |
| 607 | def are_spec_commits_available(self, spec): |
| 608 | for path, path_spec in spec.entries.items(): |
| 609 | if not path_spec.is_static(): |
| 610 | continue |
| 611 | if not self.is_containing_commit(spec, path, path_spec.at): |
| 612 | return False |
| 613 | return True |
| 614 | |
| 615 | |
Kuang-che Wu | 23192ad | 2020-03-11 18:12:46 +0800 | [diff] [blame] | 616 | class CodeManager: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 617 | """Class to reconstruct historical source tree state. |
| 618 | |
| 619 | This class can reconstruct all moments of source tree state and diffs between |
| 620 | them. |
| 621 | |
| 622 | Attributes: |
| 623 | root_dir: root path of project source tree |
| 624 | spec_manager: SpecManager object |
| 625 | code_storage: CodeStorage object |
| 626 | """ |
| 627 | |
| 628 | def __init__(self, root_dir, spec_manager, code_storage): |
| 629 | self.root_dir = root_dir |
| 630 | self.spec_manager = spec_manager |
| 631 | self.code_storage = code_storage |
| 632 | |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 633 | def generate_action_groups_between_specs(self, prev_float, next_float): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 634 | """Generates actions between two float specs. |
| 635 | |
| 636 | Args: |
| 637 | prev_float: start of spec object (exclusive) |
| 638 | next_float: end of spec object (inclusive) |
| 639 | |
| 640 | Returns: |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 641 | list of ActionGroup object (ordered) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 642 | """ |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 643 | groups = [] |
| 644 | last_group = ActionGroup(next_float.timestamp) |
Zheng-Jie Chang | eb5aaf3 | 2020-01-10 16:36:58 +0800 | [diff] [blame] | 645 | is_removed = set() |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 646 | |
| 647 | # `branch_between_float_specs` is currently a chromeos-only logic, |
| 648 | # and branch behavior is not verified for android and chrome now. |
| 649 | is_chromeos_branched = False |
| 650 | if hasattr(self.spec_manager, 'branch_between_float_specs' |
| 651 | ) and self.spec_manager.branch_between_float_specs( |
| 652 | prev_float, next_float): |
| 653 | is_chromeos_branched = True |
| 654 | |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 655 | # Sort alphabetically, so parent directories are handled before children |
| 656 | # directories. |
Zheng-Jie Chang | eb5aaf3 | 2020-01-10 16:36:58 +0800 | [diff] [blame] | 657 | for path in sorted(set(prev_float.entries) | set(next_float.entries)): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 658 | |
| 659 | # Add repo |
| 660 | if path not in prev_float: |
| 661 | if next_float[path].is_static(): |
| 662 | next_at = next_float[path].at |
| 663 | else: |
| 664 | next_at = self.code_storage.get_rev_by_time(next_float, path, |
| 665 | next_float.timestamp) |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 666 | last_group.add( |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 667 | GitAddRepo(next_float.timestamp, path, next_float[path].repo_url, |
| 668 | next_at)) |
| 669 | continue |
| 670 | |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 671 | # Existing path is floating. |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 672 | if not prev_float[path].is_static(): |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 673 | # Enumerates commits until next spec. Get `prev_at` and `till_at` |
| 674 | # by prev_float and next_float's timestamp. |
| 675 | # |
| 676 | # 1. Non-branched case: |
| 677 | # |
| 678 | # prev_at till_at |
| 679 | # prev branch ---> o --------> o --------> o --------> o --------> ... |
| 680 | # ^ ^ |
| 681 | # prev_float.timestamp next_float.timestamp |
| 682 | # |
| 683 | # building an image between prev_at and till_at should follow |
| 684 | # prev_float's spec. |
| 685 | # |
| 686 | # 2. Branched case: |
| 687 | # |
| 688 | # till_at |
| 689 | # /------->o----------> |
| 690 | # / ^ next_float.timestamp |
| 691 | # / prev_at |
| 692 | # ---------->o----------------------> |
| 693 | # ^prev_float.timestamp |
| 694 | # |
| 695 | # building an image between prev_at and till_at should follow |
| 696 | # next_float's spec. |
| 697 | # |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 698 | prev_at = self.code_storage.get_rev_by_time(prev_float, path, |
| 699 | prev_float.timestamp) |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 700 | if is_chromeos_branched: |
| 701 | till_at = self.code_storage.get_rev_by_time(next_float, path, |
| 702 | next_float.timestamp) |
| 703 | else: |
| 704 | till_at = self.code_storage.get_rev_by_time(prev_float, path, |
| 705 | next_float.timestamp) |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 706 | actions = self.code_storage.get_actions_between_two_commit( |
Zheng-Jie Chang | 868c175 | 2020-01-21 14:42:41 +0800 | [diff] [blame] | 707 | prev_float, path, prev_at, till_at, ignore_not_ancestor=True) |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 708 | |
| 709 | # Assume commits with the same timestamp as manifest/DEPS change are |
| 710 | # atomic. |
| 711 | if actions and actions[-1].timestamp == next_float.timestamp: |
| 712 | last_group.add(actions.pop()) |
| 713 | |
| 714 | for action in actions: |
| 715 | group = ActionGroup(action.timestamp) |
| 716 | group.add(action) |
| 717 | groups.append(group) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 718 | else: |
| 719 | prev_at = till_at = prev_float[path].at |
| 720 | |
| 721 | # At next_float.timestamp. |
| 722 | if path not in next_float: |
Zheng-Jie Chang | eb5aaf3 | 2020-01-10 16:36:58 +0800 | [diff] [blame] | 723 | if path in is_removed: |
| 724 | continue |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 725 | # remove repo |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 726 | next_at = None |
Kuang-che Wu | cbe1243 | 2019-03-18 19:35:03 +0800 | [diff] [blame] | 727 | sub_repos = [p for p in prev_float.entries if p.startswith(path + '/')] |
Kuang-che Wu | cbe1243 | 2019-03-18 19:35:03 +0800 | [diff] [blame] | 728 | # Remove deeper repo first |
| 729 | for path2 in sorted(sub_repos, reverse=True): |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 730 | last_group.add(GitRemoveRepo(next_float.timestamp, path2)) |
Zheng-Jie Chang | eb5aaf3 | 2020-01-10 16:36:58 +0800 | [diff] [blame] | 731 | is_removed.add(path2) |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 732 | last_group.add(GitRemoveRepo(next_float.timestamp, path)) |
Zheng-Jie Chang | eb5aaf3 | 2020-01-10 16:36:58 +0800 | [diff] [blame] | 733 | is_removed.add(path) |
Kuang-che Wu | cbe1243 | 2019-03-18 19:35:03 +0800 | [diff] [blame] | 734 | for path2 in sorted(set(sub_repos) & set(next_float.entries)): |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 735 | last_group.add( |
Kuang-che Wu | cbe1243 | 2019-03-18 19:35:03 +0800 | [diff] [blame] | 736 | GitAddRepo(next_float.timestamp, path2, |
| 737 | next_float[path2].repo_url, prev_float[path2].at)) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 738 | |
| 739 | elif next_float[path].is_static(): |
| 740 | # pinned to certain commit on different branch |
| 741 | next_at = next_float[path].at |
| 742 | |
| 743 | elif next_float[path].at == prev_float[path].at: |
| 744 | # keep floating on the same branch |
| 745 | next_at = till_at |
| 746 | |
| 747 | else: |
| 748 | # switch to another branch |
| 749 | # prev_at till_at |
| 750 | # prev branch ---> o --------> o --------> o --------> o --------> ... |
| 751 | # |
| 752 | # next_at |
| 753 | # next branch ...... o ------> o --------> o -----> ... |
| 754 | # ^ ^ |
| 755 | # prev_float.timestamp next_float.timestamp |
| 756 | next_at = self.code_storage.get_rev_by_time(next_float, path, |
| 757 | next_float.timestamp) |
| 758 | |
| 759 | if next_at and next_at != till_at: |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 760 | last_group.add( |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 761 | GitCheckoutCommit(next_float.timestamp, path, |
| 762 | next_float[path].repo_url, next_at)) |
| 763 | |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 764 | groups.sort(key=lambda x: x.timestamp) |
| 765 | if last_group.actions: |
| 766 | groups.append(last_group) |
| 767 | return groups |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 768 | |
| 769 | def synthesize_fixed_spec(self, float_spec, timestamp): |
| 770 | """Synthesizes fixed spec from float spec of given time. |
| 771 | |
| 772 | Args: |
| 773 | float_spec: the float spec |
| 774 | timestamp: snapshot time |
| 775 | |
| 776 | Returns: |
| 777 | Spec object |
| 778 | """ |
| 779 | result = {} |
| 780 | for path, path_spec in float_spec.entries.items(): |
| 781 | if not path_spec.is_static(): |
| 782 | at = self.code_storage.get_rev_by_time(float_spec, path, timestamp) |
| 783 | path_spec = PathSpec(path_spec.path, path_spec.repo_url, at) |
| 784 | |
| 785 | result[path] = copy.deepcopy(path_spec) |
| 786 | |
| 787 | name = '%s@%s' % (float_spec.path, timestamp) |
| 788 | return Spec(SPEC_FIXED, name, timestamp, float_spec.path, result) |
| 789 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 790 | def match_spec(self, target, specs, start_index=0): |
| 791 | threshold = 3600 |
| 792 | # ideal_index is the index of last spec before target |
| 793 | # begin and end are the range of indexes within threshold (inclusive) |
| 794 | ideal_index = None |
| 795 | begin, end = None, None |
| 796 | for i, spec in enumerate(specs[start_index:], start_index): |
| 797 | if spec.timestamp <= target.timestamp: |
| 798 | ideal_index = i |
| 799 | if abs(spec.timestamp - target.timestamp) < threshold: |
| 800 | if begin is None: |
| 801 | begin = i |
| 802 | end = i |
| 803 | |
| 804 | candidates = [] |
| 805 | if ideal_index is not None: |
| 806 | candidates.append(ideal_index) |
| 807 | if begin is not None: |
Kuang-che Wu | ae6824b | 2019-08-27 22:20:01 +0800 | [diff] [blame] | 808 | candidates.extend(list(range(begin, end + 1))) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 809 | if not candidates: |
| 810 | logger.error('unable to match %s: all specs are after it', target.name) |
| 811 | return None |
| 812 | |
| 813 | compatible_candidates = [ |
| 814 | i for i in candidates if specs[i].is_subset(target) |
| 815 | ] |
| 816 | if not compatible_candidates: |
| 817 | logger.error('unable to match %s: no compatible specs', target.name) |
| 818 | spec = specs[candidates[0]] |
| 819 | target.diff(spec) |
| 820 | return None |
| 821 | |
| 822 | scores = [] |
| 823 | for i in compatible_candidates: |
Kuang-che Wu | 8a28a9d | 2018-09-11 17:43:36 +0800 | [diff] [blame] | 824 | # Tie-break: prefer earlier timestamp and smaller difference. |
| 825 | if specs[i].timestamp <= target.timestamp: |
| 826 | timediff = 0, target.timestamp - specs[i].timestamp |
| 827 | else: |
| 828 | timediff = 1, specs[i].timestamp - target.timestamp |
| 829 | scores.append((specs[i].similar_score(target), timediff, i)) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 830 | scores.sort() |
| 831 | |
Kuang-che Wu | 8a28a9d | 2018-09-11 17:43:36 +0800 | [diff] [blame] | 832 | score, _, index = scores[0] |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 833 | if score != 0: |
| 834 | logger.warning('not exactly match (score=%s): %s', score, target.name) |
| 835 | target.diff(specs[index]) |
| 836 | |
| 837 | if index < ideal_index: |
| 838 | logger.warning( |
| 839 | '%s (%s) matched earlier spec at %s instead of %s, racing? offset %d', |
| 840 | target.name, target.timestamp, specs[index].timestamp, |
| 841 | specs[ideal_index].timestamp, |
Kuang-che Wu | e4bae0b | 2018-07-19 12:10:14 +0800 | [diff] [blame] | 842 | specs[index].timestamp - target.timestamp) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 843 | if index > ideal_index: |
| 844 | logger.warning( |
| 845 | 'spec committed at %d matched later commit at %d. bad server clock?', |
| 846 | target.timestamp, specs[index].timestamp) |
| 847 | |
| 848 | return index |
| 849 | |
| 850 | def associate_fixed_and_synthesized_specs(self, fixed_specs, |
| 851 | synthesized_specs): |
| 852 | # All fixed specs are snapshot of float specs. Theoretically, they |
| 853 | # should be identical to one of the synthesized specs. |
| 854 | # However, it's not always true for some reasons --- maybe due to race |
| 855 | # condition, maybe due to bugs of this bisect-kit. |
| 856 | # To overcome this glitch, we try to match them by similarity instead of |
| 857 | # exact match. |
| 858 | result = [] |
| 859 | last_index = 0 |
| 860 | for i, fixed_spec in enumerate(fixed_specs): |
| 861 | matched_index = self.match_spec(fixed_spec, synthesized_specs, last_index) |
| 862 | if matched_index is None: |
| 863 | if i in (0, len(fixed_specs) - 1): |
| 864 | logger.error('essential spec mismatch, unable to continue') |
Kuang-che Wu | fe1e88a | 2019-09-10 21:52:25 +0800 | [diff] [blame] | 865 | raise ValueError('Commit history analyze failed. ' |
| 866 | 'Bisector cannot deal with this version range.') |
Kuang-che Wu | c0baf75 | 2020-06-29 11:32:26 +0800 | [diff] [blame] | 867 | logger.warning('%s do not match, skip', fixed_spec.name) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 868 | continue |
| 869 | result.append((i, matched_index)) |
| 870 | last_index = matched_index |
| 871 | |
| 872 | return result |
| 873 | |
| 874 | def _create_make_up_actions(self, fixed_spec, synthesized): |
| 875 | timestamp = synthesized.timestamp |
| 876 | make_up = ActionGroup( |
| 877 | timestamp, comment='make up glitch for %s' % fixed_spec.name) |
| 878 | for path in set(fixed_spec.entries) & set(synthesized.entries): |
| 879 | if fixed_spec[path].at == synthesized[path].at: |
| 880 | continue |
| 881 | action = GitCheckoutCommit(timestamp, path, synthesized[path].repo_url, |
| 882 | synthesized[path].at) |
| 883 | make_up.add(action) |
| 884 | |
| 885 | if not make_up.actions: |
| 886 | return None |
| 887 | return make_up |
| 888 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 889 | def _batch_fill_action_commit_log(self, details): |
| 890 | group_by_repo = collections.defaultdict(list) |
| 891 | for detail in details.values(): |
| 892 | for action in detail.get('actions', []): |
| 893 | if action['action_type'] == 'commit': |
| 894 | group_by_repo[action['repo_url']].append(action) |
| 895 | |
| 896 | for repo_url, actions in group_by_repo.items(): |
| 897 | git_root = self.code_storage.cached_git_root(repo_url) |
| 898 | revs = set(a['rev'] for a in actions) |
| 899 | metas = git_util.get_batch_commit_metadata(git_root, revs) |
| 900 | for action in actions: |
| 901 | meta = metas[action['rev']] |
| 902 | if meta is None: |
| 903 | commit_summary = '(unknown)' |
| 904 | else: |
| 905 | commit_summary = meta['message'].splitlines()[0] |
| 906 | action['commit_summary'] = commit_summary |
| 907 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 908 | def build_revlist(self, old, new): |
| 909 | """Build revlist. |
| 910 | |
| 911 | Returns: |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 912 | (revlist, details): |
| 913 | revlist: list of rev string |
| 914 | details: dict of rev to rev detail |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 915 | """ |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 916 | logger.info('build_revlist: old=%s, new=%s', old, new) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 917 | revlist = [] |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 918 | details = {} |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 919 | |
Kuang-che Wu | 020a118 | 2020-09-08 17:17:22 +0800 | [diff] [blame^] | 920 | # Enable cache for repetitive git operations. The space complexity is |
Kuang-che Wu | fcbcc50 | 2020-06-01 11:48:20 +0800 | [diff] [blame] | 921 | # O(number of candidates). |
| 922 | git_util.get_commit_metadata.enable_cache() |
| 923 | git_util.get_file_from_revision.enable_cache() |
Kuang-che Wu | 98d9846 | 2020-06-19 17:07:22 +0800 | [diff] [blame] | 924 | git_util.is_containing_commit.enable_cache() |
Zheng-Jie Chang | ad174a4 | 2020-06-20 15:28:10 +0800 | [diff] [blame] | 925 | git_util.is_ancestor_commit.enable_cache() |
Kuang-che Wu | fcbcc50 | 2020-06-01 11:48:20 +0800 | [diff] [blame] | 926 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 927 | # step 1, find all float and fixed specs in the given range. |
| 928 | fixed_specs = self.spec_manager.collect_fixed_spec(old, new) |
Kuang-che Wu | e4bae0b | 2018-07-19 12:10:14 +0800 | [diff] [blame] | 929 | assert fixed_specs |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 930 | for spec in fixed_specs: |
| 931 | self.spec_manager.parse_spec(spec) |
| 932 | |
| 933 | float_specs = self.spec_manager.collect_float_spec(old, new, fixed_specs) |
Kuang-che Wu | e4bae0b | 2018-07-19 12:10:14 +0800 | [diff] [blame] | 934 | assert float_specs |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 935 | while float_specs[-1].timestamp > fixed_specs[-1].timestamp: |
| 936 | float_specs.pop() |
| 937 | assert float_specs |
Zheng-Jie Chang | d968f55 | 2020-01-16 13:31:57 +0800 | [diff] [blame] | 938 | for spec in float_specs: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 939 | self.spec_manager.parse_spec(spec) |
| 940 | |
Kuang-che Wu | ed1bb62 | 2020-05-30 23:06:23 +0800 | [diff] [blame] | 941 | git_util.fast_lookup.optimize( |
| 942 | (float_specs[0].timestamp, float_specs[-1].timestamp)) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 943 | # step 2, synthesize all fixed specs in the range from float specs. |
| 944 | specs = float_specs + [fixed_specs[-1]] |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 945 | action_groups = [] |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 946 | logger.debug('len(specs)=%d', len(specs)) |
| 947 | for i in range(len(specs) - 1): |
| 948 | prev_float = specs[i] |
| 949 | next_float = specs[i + 1] |
| 950 | logger.debug('[%d], between %s (%s) and %s (%s)', i, prev_float.name, |
| 951 | prev_float.timestamp, next_float.name, next_float.timestamp) |
Kuang-che Wu | ae6847c | 2020-01-13 16:06:08 +0800 | [diff] [blame] | 952 | action_groups += self.generate_action_groups_between_specs( |
| 953 | prev_float, next_float) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 954 | |
| 955 | spec = self.synthesize_fixed_spec(float_specs[0], fixed_specs[0].timestamp) |
| 956 | synthesized = [spec.copy()] |
| 957 | for action_group in action_groups: |
| 958 | spec.apply(action_group) |
| 959 | synthesized.append(spec.copy()) |
| 960 | |
| 961 | # step 3, associate fixed specs with synthesized specs. |
| 962 | associated_pairs = self.associate_fixed_and_synthesized_specs( |
| 963 | fixed_specs, synthesized) |
| 964 | |
| 965 | # step 4, group actions and cache them |
| 966 | for i, (fixed_index, synthesized_index) in enumerate(associated_pairs[:-1]): |
| 967 | next_fixed_index, next_synthesized_index = associated_pairs[i + 1] |
| 968 | revlist.append(fixed_specs[fixed_index].name) |
| 969 | this_action_groups = [] |
| 970 | |
| 971 | # handle glitch |
| 972 | if fixed_specs[fixed_index].similar_score( |
| 973 | synthesized[synthesized_index]) != 0: |
| 974 | assert synthesized[synthesized_index].is_subset( |
| 975 | fixed_specs[fixed_index]) |
| 976 | skipped = set(fixed_specs[fixed_index].entries) - set( |
| 977 | synthesized[synthesized_index].entries) |
| 978 | if skipped: |
| 979 | logger.warning( |
| 980 | 'between %s and %s, ' |
| 981 | 'bisect-kit cannot analyze commit history of following paths:', |
| 982 | fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name) |
| 983 | for path in sorted(skipped): |
| 984 | logger.warning(' %s', path) |
| 985 | |
| 986 | make_up = self._create_make_up_actions(fixed_specs[fixed_index], |
| 987 | synthesized[synthesized_index]) |
| 988 | if make_up: |
| 989 | this_action_groups.append(make_up) |
| 990 | |
| 991 | this_action_groups.extend( |
| 992 | action_groups[synthesized_index:next_synthesized_index]) |
| 993 | for idx, ag in enumerate(this_action_groups, 1): |
| 994 | rev = make_intra_rev(fixed_specs[fixed_index].name, |
| 995 | fixed_specs[next_fixed_index].name, idx) |
| 996 | ag.name = rev |
| 997 | revlist.append(rev) |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 998 | details[rev] = ag.summary() |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 999 | |
| 1000 | self.save_action_groups_between_releases( |
| 1001 | fixed_specs[fixed_index].name, fixed_specs[next_fixed_index].name, |
| 1002 | this_action_groups) |
| 1003 | revlist.append(fixed_specs[associated_pairs[-1][0]].name) |
| 1004 | |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 1005 | self._batch_fill_action_commit_log(details) |
| 1006 | |
Kuang-che Wu | 98d9846 | 2020-06-19 17:07:22 +0800 | [diff] [blame] | 1007 | # Verify all repos in between are cached. |
| 1008 | for spec in reversed(float_specs): |
| 1009 | if self.code_storage.are_spec_commits_available(spec): |
| 1010 | continue |
| 1011 | raise errors.InternalError('Some commits in %s (%s) are unavailable' % |
| 1012 | (spec.name, spec.path)) |
| 1013 | |
Kuang-che Wu | ed1bb62 | 2020-05-30 23:06:23 +0800 | [diff] [blame] | 1014 | # Disable cache because there might be write or even destructive git |
| 1015 | # operations when switch git versions. Be conservative now. We can cache |
| 1016 | # more if we observed more slow git operations later. |
| 1017 | git_util.fast_lookup.disable() |
Kuang-che Wu | fcbcc50 | 2020-06-01 11:48:20 +0800 | [diff] [blame] | 1018 | git_util.get_commit_metadata.disable_cache() |
| 1019 | git_util.get_file_from_revision.disable_cache() |
Kuang-che Wu | 98d9846 | 2020-06-19 17:07:22 +0800 | [diff] [blame] | 1020 | git_util.is_containing_commit.disable_cache() |
Zheng-Jie Chang | ad174a4 | 2020-06-20 15:28:10 +0800 | [diff] [blame] | 1021 | git_util.is_ancestor_commit.disable_cache() |
Kuang-che Wu | 13acc7b | 2020-06-15 10:45:35 +0800 | [diff] [blame] | 1022 | |
| 1023 | return revlist, details |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1024 | |
| 1025 | def save_action_groups_between_releases(self, old, new, action_groups): |
| 1026 | data = [ag.serialize() for ag in action_groups] |
| 1027 | |
| 1028 | cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR) |
| 1029 | if not os.path.exists(cache_dir): |
| 1030 | os.makedirs(cache_dir) |
| 1031 | cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new)) |
Kuang-che Wu | ae6824b | 2019-08-27 22:20:01 +0800 | [diff] [blame] | 1032 | with open(cache_filename, 'w') as fp: |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1033 | json.dump(data, fp, indent=4, sort_keys=True) |
| 1034 | |
| 1035 | def load_action_groups_between_releases(self, old, new): |
| 1036 | cache_dir = os.path.join(self.root_dir, _DIFF_CACHE_DIR) |
| 1037 | cache_filename = os.path.join(cache_dir, '%s,%s.json' % (old, new)) |
| 1038 | if not os.path.exists(cache_filename): |
Kuang-che Wu | d1b7415 | 2020-05-20 08:46:46 +0800 | [diff] [blame] | 1039 | raise errors.InternalError('cached revlist not found: %s' % |
| 1040 | cache_filename) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1041 | |
| 1042 | result = [] |
Kuang-che Wu | 74bcb64 | 2020-02-20 18:45:53 +0800 | [diff] [blame] | 1043 | with open(cache_filename) as f: |
| 1044 | for data in json.load(f): |
| 1045 | result.append(ActionGroup.unserialize(data)) |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1046 | |
| 1047 | return result |
| 1048 | |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1049 | def switch(self, rev): |
Zheng-Jie Chang | 0fc704b | 2019-12-09 18:43:38 +0800 | [diff] [blame] | 1050 | rev_old, action_groups = self.get_intra_and_diff(rev) |
| 1051 | self.spec_manager.sync_disk_state(rev_old) |
| 1052 | apply_actions(self.code_storage, action_groups, self.root_dir) |
| 1053 | |
| 1054 | def get_intra_and_diff(self, rev): |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1055 | # easy case |
| 1056 | if not re.match(_re_intra_rev, rev): |
Zheng-Jie Chang | 0fc704b | 2019-12-09 18:43:38 +0800 | [diff] [blame] | 1057 | return rev, [] |
Kuang-che Wu | 3eb6b50 | 2018-06-06 16:15:18 +0800 | [diff] [blame] | 1058 | |
| 1059 | rev_old, rev_new, idx = parse_intra_rev(rev) |
| 1060 | action_groups = self.load_action_groups_between_releases(rev_old, rev_new) |
| 1061 | assert 0 <= idx <= len(action_groups) |
| 1062 | action_groups = action_groups[:idx] |
Zheng-Jie Chang | 0fc704b | 2019-12-09 18:43:38 +0800 | [diff] [blame] | 1063 | return rev_old, action_groups |