blob: d80e8192c9239190a1d68a6afd68d71c3b9552e6 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wu3eb6b502018-06-06 16:15:18 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Gclient utility."""
6
7from __future__ import print_function
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +08008import collections
9import itertools
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080010import logging
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080011import operator
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080012import os
Kuang-che Wu6948ecc2018-09-11 17:43:49 +080013import pprint
Kuang-che Wub17b3b92018-09-04 18:12:11 +080014import sys
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080015import urlparse
16
17from bisect_kit import codechange
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080018from bisect_kit import git_util
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080019from bisect_kit import util
20
21logger = logging.getLogger(__name__)
22
23
Kuang-che Wu41e8b592018-09-25 17:01:30 +080024def config(gclient_dir,
25 url=None,
26 cache_dir=None,
27 deps_file=None,
28 custom_var=None,
29 spec=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080030 """Simply wrapper of `gclient config`.
31
32 Args:
33 gclient_dir: root directory of gclient project
34 url: URL of gclient configuration files
35 cache_dir: gclient's git cache folder
36 deps_file: override the default DEPS file name
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080037 custom_var: custom variables
Kuang-che Wu41e8b592018-09-25 17:01:30 +080038 spec: content of gclient file
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080039 """
40 cmd = ['gclient', 'config']
41 if deps_file:
42 cmd += ['--deps-file', deps_file]
43 if cache_dir:
44 cmd += ['--cache-dir', cache_dir]
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080045 if custom_var:
46 cmd += ['--custom-var', custom_var]
Kuang-che Wu41e8b592018-09-25 17:01:30 +080047 if spec:
48 cmd += ['--spec', spec]
49 if url:
50 cmd.append(url)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080051
52 util.check_call(*cmd, cwd=gclient_dir)
53
54
Kuang-che Wudc714412018-10-17 16:06:39 +080055def sync(gclient_dir,
56 with_branch_heads=False,
57 with_tags=False,
58 ignore_locks=False,
59 jobs=8):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080060 """Simply wrapper of `gclient sync`.
61
62 Args:
63 gclient_dir: root directory of gclient project
64 with_branch_heads: whether to clone git `branch_heads` refspecs
65 with_tags: whether to clone git tags
Kuang-che Wudc714412018-10-17 16:06:39 +080066 ignore_locks: bypass gclient's lock
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080067 jobs: how many workers running in parallel
68 """
69 cmd = ['gclient', 'sync', '--jobs', str(jobs)]
70 if with_branch_heads:
71 cmd.append('--with_branch_heads')
72 if with_tags:
73 cmd.append('--with_tags')
Kuang-che Wudc714412018-10-17 16:06:39 +080074
75 # If 'gclient sync' is interrupted by ctrl-c or terminated with whatever
76 # reasons, it will leave annoying lock files on disk and thus unfriendly to
77 # bot tasks. In bisect-kit, we will use our own lock mechanism (in caller of
78 # this function) and bypass gclient's.
79 if ignore_locks:
80 cmd.append('--ignore_locks')
81
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080082 util.check_call(*cmd, cwd=gclient_dir)
83
84
Kuang-che Wub17b3b92018-09-04 18:12:11 +080085# Copied from depot_tools' gclient.py
86_PLATFORM_MAPPING = {
87 'cygwin': 'win',
88 'darwin': 'mac',
89 'linux2': 'linux',
90 'win32': 'win',
91 'aix6': 'aix',
92}
93
94
95def _detect_host_os():
96 return _PLATFORM_MAPPING[sys.platform]
97
98
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080099class Dep(object):
100 """Represent one entry of DEPS's deps.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800101
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800102 One Dep object means one subproject inside DEPS file. It recorded what to
103 checkout (like git or cipd) content of each subproject.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800104
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800105 Attributes:
106 path: subproject path, relative to project root
107 variables: the variables of the containing DEPS file; these variables will
108 be applied to fields of this object (like 'url' and 'condition') and
109 children projects.
110 condition: whether to checkout this subproject
111 dep_type: 'git' or 'cipd'
112 url: if dep_type='git', the url of remote repo and associated branch/commit
113 packages: if dep_type='cipd', cipd package version and location
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800114 """
115
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800116 def __init__(self, path, variables, entry):
117 self.path = path
118 self.variables = variables
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800119
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800120 self.url = None # only valid for dep_type='git'
121 self.packages = None # only valid for dep_type='cipd'
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800122
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800123 if isinstance(entry, str):
124 self.dep_type = 'git'
125 self.url = entry
126 self.condition = None
127 else:
128 self.dep_type = entry.get('dep_type', 'git')
129 self.condition = entry.get('condition')
130 if self.dep_type == 'git':
131 self.url = entry['url']
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800132 else:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800133 assert self.dep_type == 'cipd'
134 self.packages = entry['packages']
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800135
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800136 if self.dep_type == 'git':
137 self.url = self.url.format(**self.variables)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800138
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800139 def __eq__(self, rhs):
140 return vars(self) == vars(rhs)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800141
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800142 def __ne__(self, rhs):
143 return not self.__eq__(rhs)
144
145 def as_path_spec(self):
146 assert self.dep_type == 'git'
147
148 if '@' in self.url:
149 repo_url, at = self.url.split('@')
150 else:
151 # If the dependency is not pinned, the default is master branch.
152 repo_url, at = self.url, 'master'
153 return codechange.PathSpec(self.path, repo_url, at)
154
155 def eval_condition(self):
156 """Evaluate condition for DEPS parsing.
157
158 Returns:
159 eval result
160 """
161 if not self.condition:
162 return True
163
164 vars_dict = {
165 # default os: linux
166 'checkout_android': False,
167 'checkout_chromeos': False,
168 'checkout_fuchsia': False,
169 'checkout_ios': False,
170 'checkout_linux': True,
171 'checkout_mac': False,
172 'checkout_win': False,
173 # default cpu: x64
174 'checkout_arm64': False,
175 'checkout_arm': False,
176 'checkout_mips': False,
177 'checkout_ppc': False,
178 'checkout_s390': False,
179 'checkout_x64': True,
180 'checkout_x86': False,
181 'host_os': _detect_host_os(),
182 'False': False,
183 'None': None,
184 'True': True,
185 }
186 vars_dict.update(self.variables)
187 # pylint: disable=eval-used
188 return eval(self.condition, vars_dict)
189
190
191class Deps(object):
192 """DEPS parsed result.
193
194 Attributes:
195 variables: 'vars' dict in DEPS file; these variables will be applied
196 recursively to children.
197 entries: dict of Dep objects
198 recursedeps: list of recursive projects
199 """
200
201 def __init__(self):
202 self.variables = {}
203 self.entries = {}
204 self.recursedeps = []
205
206
207class TimeSeriesTree(object):
208 """Data structure for generating snapshots of historical dependency tree.
209
210 This is a tree structure with time information. Each tree node represents not
211 only typical tree data and tree children information, but also historical
212 value of those tree data and tree children.
213
214 To be more specific in terms of DEPS parsing, one TimeSeriesTree object
215 represent a DEPS file. The caller will add_snapshot() to add parsed result of
216 historical DEPS instances. After that, the tree root of this class can
217 reconstruct the every historical moment of the project dependency state.
218
219 This class is slight abstraction of git_util.get_history_recursively() to
220 support more than single git repo and be version control system independent.
221 """
222
223 # TODO(kcwu): refactor git_util.get_history_recursively() to reuse this class.
224
225 def __init__(self, parent_deps, name, start_time, end_time):
226 """TimeSeriesTree constructor.
227
228 Args:
229 parent_deps: parent DEPS of the given period. None if this is tree root.
230 name: project name
231 start_time: start time
232 end_time: end time
233 """
234 self.parent_deps = parent_deps
235 self.name = name
236 self.snapshots = {}
237 self.start_time = start_time
238 self.end_time = end_time
239
240 # Intermediate dict to keep track alive children for the time being.
241 # Maintained by add_snapshot() and no_more_snapshot().
242 self.alive_children = {}
243
244 # All historical children (TimeSeriesTree object) between start_time and
245 # end_time. It's possible that children with the same name appear more than
246 # once in this list because they are removed and added back to the DEPS
247 # file.
248 self.subtrees = []
249
250 def subtree_eq(self, deps_a, deps_b, child_name):
251 """Compares subtree of two Deps.
252
253 Args:
254 deps_a: Deps object
255 deps_b: Deps object
256 child_name: the subtree to compare
257
258 Returns:
259 True if the said subtree of these two Deps equal
260 """
261 # Need to compare variables because they may influence subtree parsing
262 # behavior
263 return (deps_a.entries[child_name] == deps_b.entries[child_name] and
264 deps_a.variables == deps_b.variables)
265
266 def add_snapshot(self, timestamp, deps, children_names):
267 """Adds parsed DEPS result and children.
268
269 For example, if a given DEPS file has N revisions between start_time and
270 end_time, the caller should call this method N times to feed all parsed
271 results in order (timestamp increasing).
272
273 Args:
274 timestamp: timestamp of `deps`
275 deps: Deps object
276 children_names: list of names of deps' children
277 """
278 assert timestamp not in self.snapshots
279 self.snapshots[timestamp] = deps
280
281 for child_name in set(self.alive_children.keys() + children_names):
282 # `child_name` is added at `timestamp`
283 if child_name not in self.alive_children:
284 self.alive_children[child_name] = timestamp, deps
285
286 # `child_name` is removed at `timestamp`
287 elif child_name not in children_names:
288 self.subtrees.append(
289 TimeSeriesTree(self.alive_children[child_name][1], child_name,
290 self.alive_children[child_name][0], timestamp))
291 del self.alive_children[child_name]
292
293 # `child_name` is alive before and after `timestamp`
294 else:
295 last_deps = self.alive_children[child_name][1]
296 if not self.subtree_eq(last_deps, deps, child_name):
297 self.subtrees.append(
298 TimeSeriesTree(last_deps, child_name,
299 self.alive_children[child_name][0], timestamp))
300 self.alive_children[child_name] = timestamp, deps
301
302 def no_more_snapshot(self, deps):
303 """Indicates all snapshots are added.
304
305 add_snapshot() should not be invoked after no_more_snapshot().
306 """
307 for child_name, (timestamp, deps) in self.alive_children.items():
308 if timestamp == self.end_time:
309 continue
310 self.subtrees.append(
311 TimeSeriesTree(deps, child_name, timestamp, self.end_time))
312 self.alive_children = None
313
314 def events(self):
315 """Gets children added/removed events of this subtree.
316
317 Returns:
318 list of (timestamp, deps_name, deps, end_flag):
319 timestamp: timestamp of event
320 deps_name: name of this subtree
321 deps: Deps object of given project
322 end_flag: True indicates this is the last event of this deps tree
323 """
324 assert self.snapshots
325 assert self.alive_children is None, ('events() is valid only after '
326 'no_more_snapshot() is invoked')
327
328 result = []
329
330 last_deps = None
331 for timestamp, deps in self.snapshots.items():
332 result.append((timestamp, self.name, deps, False))
333 last_deps = deps
334
335 assert last_deps
336 result.append((self.end_time, self.name, last_deps, True))
337
338 for subtree in self.subtrees:
339 for event in subtree.events():
340 result.append(event)
341
342 result.sort()
343
344 return result
345
346 def iter_path_specs(self):
347 """Iterates snapshots of project dependency state.
348
349 Yields:
350 (timestamp, path_specs):
351 timestamp: time of snapshot
352 path_specs: dict of path_spec entries
353 """
354 forest = {}
355 # Group by timestamp
356 for timestamp, events in itertools.groupby(self.events(),
357 operator.itemgetter(0)):
358 # It's possible that one deps is removed and added at the same timestamp,
359 # i.e. modification, so use counter to track.
360 end_counter = collections.Counter()
361
362 for timestamp, name, deps, end in events:
363 forest[name] = deps
364 if end:
365 end_counter[name] += 1
366 else:
367 end_counter[name] -= 1
368
369 # Merge Deps at time `timestamp` into single path_specs.
370 path_specs = {}
371 for deps in forest.values():
372 for path, dep in deps.entries.items():
373 path_specs[path] = dep.as_path_spec()
374
375 yield timestamp, path_specs
376
377 # Remove deps which are removed at this timestamp.
378 for name, count in end_counter.items():
379 assert -1 <= count <= 1, (timestamp, name)
380 if count == 1:
381 del forest[name]
382
383
384class DepsParser(object):
385 """Gclient DEPS file parser."""
386
387 def __init__(self, project_root, code_storage):
388 self.project_root = project_root
389 self.code_storage = code_storage
390
391 def parse_single_deps(self, content, parent_vars=None, parent_path=''):
392 """Parses DEPS file without recursion.
393
394 Args:
395 content: file content of DEPS file
396 parent_vars: variables inherent from parent DEPS
397 parent_path: project path of parent DEPS file
398
399 Returns:
400 Deps object
401 """
402
403 def var_function(name):
404 return '{%s}' % name
405
406 global_scope = dict(Var=var_function)
407 local_scope = {}
408 try:
409 exec (content, global_scope, local_scope) # pylint: disable=exec-used
410 except SyntaxError:
411 raise
412
413 deps = Deps()
414 local_scope.setdefault('vars', {})
415 if parent_vars:
416 local_scope['vars'].update(parent_vars)
417 deps.variables = local_scope['vars']
418
419 # Warnings for old usages which we don't support.
420 for name in deps.variables:
421 if name.startswith('RECURSEDEPS_') or name.endswith('_DEPS_file'):
422 logger.warning('%s is deprecated and not supported recursion syntax',
423 name)
424 if 'deps_os' in local_scope:
425 logger.warning('deps_os is no longer supported')
426
427 for path, dep_entry in local_scope['deps'].items():
428 if local_scope.get('use_relative_paths', False):
429 path = os.path.join(parent_path, path)
430 path = path.format(**deps.variables)
431 dep = Dep(path, deps.variables, dep_entry)
432 if not dep.eval_condition():
433 continue
434
435 # TODO(kcwu): support dep_type=cipd http://crbug.com/846564
436 if dep.dep_type != 'git':
437 logger.warning('dep_type=%s is not supported yet: %s', dep.dep_type,
438 path)
439 continue
440
441 deps.entries[path] = dep
442
443 recursedeps = []
444 for path in local_scope.get('recursedeps', []):
445 assert isinstance(path, str)
446 if local_scope.get('use_relative_paths', False):
447 path = os.path.join(parent_path, path)
448 path = path.format(**deps.variables)
449 if path in deps.entries:
450 recursedeps.append(path)
451 deps.recursedeps = recursedeps
452
453 return deps
454
455 def construct_deps_tree(self,
456 tstree,
457 repo_url,
458 at,
459 after,
460 before,
461 parent_vars=None,
462 parent_path='',
463 deps_file='DEPS'):
464 """Processes DEPS recursively of given time period.
465
466 This method parses all commits of DEPS between time `after` and `before`,
467 segments recursive dependencies into subtrees if they are changed, and
468 processes subtrees recursively.
469
470 The parsed results (multiple revisions of DEPS file) are stored in `tstree`.
471
472 Args:
473 tstree: TimeSeriesTree object
474 repo_url: remote repo url
475 at: branch or git commit id
476 after: begin of period
477 before: end of period
478 parent_vars: DEPS variables inherit from parent DEPS (including
479 custom_vars)
480 parent_path: the path of parent project of current DEPS file
481 deps_file: filename of DEPS file, relative to the git repo, repo_rul
482 """
483 if '://' in repo_url:
484 git_repo = self.code_storage.cached_git_root(repo_url)
485 else:
486 git_repo = repo_url
487
488 if git_util.is_git_rev(at):
489 history = [
490 (after, at),
491 (before, at),
492 ]
493 else:
494 history = git_util.get_history(
495 git_repo,
496 deps_file,
497 branch=at,
498 after=after,
499 before=before,
500 padding=True)
501 assert history
502
503 # If not equal, it means the file was deleted but is still referenced by
504 # its parent.
505 assert history[-1][0] == before
506
507 # TODO(kcwu): optimization: history[-1] is unused
508 for timestamp, git_rev in history[:-1]:
509 content = git_util.get_file_from_revision(git_repo, git_rev, deps_file)
510
511 deps = self.parse_single_deps(
512 content, parent_vars=parent_vars, parent_path=parent_path)
513 tstree.add_snapshot(timestamp, deps, deps.recursedeps)
514
515 tstree.no_more_snapshot(deps)
516
517 for subtree in tstree.subtrees:
518 path = subtree.name
519 path_spec = subtree.parent_deps.entries[path].as_path_spec()
520 self.construct_deps_tree(
521 subtree,
522 path_spec.repo_url,
523 path_spec.at,
524 subtree.start_time,
525 subtree.end_time,
526 parent_vars=subtree.parent_deps.variables,
527 parent_path=path)
528
529 def enumerate_path_specs(self, start_time, end_time, path):
530 tstree = TimeSeriesTree(None, path, start_time, end_time)
531 self.construct_deps_tree(tstree, path, 'master', start_time, end_time)
532 return tstree.iter_path_specs()
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800533
534
535class GclientCache(codechange.CodeStorage):
536 """Gclient git cache."""
537
538 def __init__(self, cache_dir):
539 self.cache_dir = cache_dir
540
541 def _url_to_cache_dir(self, url):
542 # ref: depot_tools' git_cache.Mirror.UrlToCacheDir
543 parsed = urlparse.urlparse(url)
544 norm_url = parsed.netloc + parsed.path
545 if norm_url.endswith('.git'):
546 norm_url = norm_url[:-len('.git')]
547 return norm_url.replace('-', '--').replace('/', '-').lower()
548
549 def cached_git_root(self, repo_url):
550 cache_path = self._url_to_cache_dir(repo_url)
551 return os.path.join(self.cache_dir, cache_path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800552
553 def _load_project_list(self, project_root):
554 repo_project_list = os.path.join(project_root, '.gclient_entries')
555 scope = {}
556 exec open(repo_project_list) in scope # pylint: disable=exec-used
557 return scope.get('entries', {})
558
559 def _save_project_list(self, project_root, projects):
560 repo_project_list = os.path.join(project_root, '.gclient_entries')
561 content = 'entries = {\n'
562 for item in sorted(projects.items()):
563 content += ' %s: %s,\n' % map(pprint.pformat, item)
564 content += '}\n'
565 with open(repo_project_list, 'w') as f:
566 f.write(content)
567
568 def add_to_project_list(self, project_root, path, repo_url):
569 projects = self._load_project_list(project_root)
570
571 projects[path] = repo_url
572
573 self._save_project_list(project_root, projects)
574
575 def remove_from_project_list(self, project_root, path):
576 projects = self._load_project_list(project_root)
577
578 if path in projects:
579 del projects[path]
580
581 self._save_project_list(project_root, projects)