blob: e671962aa266bd2762ffcf33a9518482c28f7d03 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wubfc4a642018-04-19 11:54:08 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Repo utility.
6
7This module provides wrapper for "repo" (a Google-built repository management
8tool that runs on top of git) and related utility functions.
9"""
10
11from __future__ import print_function
12import logging
13import os
14import re
Kuang-che Wu34ab7b42019-10-28 19:40:05 +080015import subprocess
Kuang-che Wud1d45b42018-07-05 00:46:45 +080016import urlparse
Kuang-che Wubfc4a642018-04-19 11:54:08 +080017import xml.etree.ElementTree
18
Kuang-che Wud1d45b42018-07-05 00:46:45 +080019from bisect_kit import codechange
Kuang-che Wue121fae2018-11-09 16:18:39 +080020from bisect_kit import errors
Kuang-che Wubfc4a642018-04-19 11:54:08 +080021from bisect_kit import git_util
22from bisect_kit import util
23
24logger = logging.getLogger(__name__)
25
26
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080027def get_manifest_url(manifest_dir):
Kuang-che Wud1d45b42018-07-05 00:46:45 +080028 """Get manifest URL of repo project.
29
30 Args:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080031 manifest_dir: path of manifest directory
Kuang-che Wud1d45b42018-07-05 00:46:45 +080032
33 Returns:
34 manifest URL.
35 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +080036 url = util.check_output(
37 'git', 'config', 'remote.origin.url', cwd=manifest_dir)
Kuang-che Wud1d45b42018-07-05 00:46:45 +080038 return url
39
40
Kuang-che Wu41e8b592018-09-25 17:01:30 +080041def find_repo_root(path):
42 """Find the root path of a repo project
43
44 Args:
45 path: path
46
47 Returns:
48 project root if path is inside a repo project; otherwise None
49 """
50 path = os.path.abspath(path)
51 while not os.path.exists(os.path.join(path, '.repo')):
52 if path == '/':
53 return None
54 path = os.path.dirname(path)
55 return path
56
57
Kuang-che Wubfc4a642018-04-19 11:54:08 +080058def init(repo_dir,
59 manifest_url,
60 manifest_branch=None,
61 manifest_name=None,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080062 repo_url=None,
Kuang-che Wu41e8b592018-09-25 17:01:30 +080063 reference=None,
64 mirror=False):
Kuang-che Wubfc4a642018-04-19 11:54:08 +080065 """Repo init.
66
67 Args:
68 repo_dir: root directory of repo
69 manifest_url: manifest repository location
70 manifest_branch: manifest branch or revision
71 manifest_name: initial manifest file name
72 repo_url: repo repository location
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080073 reference: location of mirror directory
Kuang-che Wu41e8b592018-09-25 17:01:30 +080074 mirror: indicates repo mirror
Kuang-che Wubfc4a642018-04-19 11:54:08 +080075 """
Kuang-che Wu41e8b592018-09-25 17:01:30 +080076 root = find_repo_root(repo_dir)
77 if root and root != repo_dir:
Kuang-che Wue121fae2018-11-09 16:18:39 +080078 raise errors.ExternalError(
Kuang-che Wu41e8b592018-09-25 17:01:30 +080079 '%s should not be inside another repo project at %s' % (repo_dir, root))
80
Kuang-che Wubfc4a642018-04-19 11:54:08 +080081 cmd = ['repo', 'init', '--manifest-url', manifest_url]
82 if manifest_name:
83 cmd += ['--manifest-name', manifest_name]
84 if manifest_branch:
85 cmd += ['--manifest-branch', manifest_branch]
86 if repo_url:
87 cmd += ['--repo-url', repo_url]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080088 if reference:
89 cmd += ['--reference', reference]
Kuang-che Wu41e8b592018-09-25 17:01:30 +080090 if mirror:
91 cmd.append('--mirror')
Kuang-che Wubfc4a642018-04-19 11:54:08 +080092 util.check_call(*cmd, cwd=repo_dir)
93
94
Kuang-che Wuea3abce2018-10-04 17:50:42 +080095def cleanup_repo_generated_files(repo_dir, manifest_name='default.xml'):
96 """Cleanup files generated by <copyfile> <linkfile> tags.
97
98 Args:
99 repo_dir: root directory of repo
100 manifest_name: filename of manifest
101 """
102 manifest_dir = os.path.join(repo_dir, '.repo', 'manifests')
Kuang-che Wu35080a72018-10-05 14:14:33 +0800103 manifest_path = os.path.join(manifest_dir, manifest_name)
104 if os.path.islink(manifest_path):
105 manifest_name = os.readlink(manifest_path)
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800106 parser = ManifestParser(manifest_dir)
107 manifest = parser.parse_xml_recursive('HEAD', manifest_name)
108
109 for copyfile in manifest.findall('.//copyfile'):
110 dest = copyfile.get('dest')
111 if not dest:
112 continue
113 # `dest` is relative to the top of the tree
114 dest_path = os.path.join(repo_dir, dest)
115 if not os.path.isfile(dest_path):
116 continue
117 logger.debug('delete file %r', dest_path)
118 os.unlink(dest_path)
119
120 for linkfile in manifest.findall('.//linkfile'):
121 dest = linkfile.get('dest')
122 if not dest:
123 continue
124 # `dest` is relative to the top of the tree
125 dest_path = os.path.join(repo_dir, dest)
126 if not os.path.islink(dest_path):
127 continue
128 logger.debug('delete link %r', dest_path)
129 os.unlink(dest_path)
130
131
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800132def sync(repo_dir, jobs=16, manifest_name=None, current_branch=False):
133 """Repo sync.
134
135 Args:
136 repo_dir: root directory of repo
137 jobs: projects to fetch simultaneously
138 manifest_name: filename of manifest
139 current_branch: fetch only current branch
140 """
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800141 # Workaround to prevent garbage files left between repo syncs
142 # (http://crbug.com/881783).
143 cleanup_repo_generated_files(repo_dir)
144
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800145 cmd = ['repo', 'sync', '-q', '--force-sync']
146 if jobs:
147 cmd += ['-j', str(jobs)]
148 if manifest_name:
149 cmd += ['--manifest-name', manifest_name]
150 if current_branch:
151 cmd += ['--current-branch']
152 util.check_call(*cmd, cwd=repo_dir)
153
154
155def abandon(repo_dir, branch_name):
156 """Repo abandon.
157
158 Args:
159 repo_dir: root directory of repo
160 branch_name: branch name to abandon
161 """
162 # Ignore errors if failed, which means the branch didn't exist beforehand.
163 util.call('repo', 'abandon', branch_name, cwd=repo_dir)
164
165
166def info(repo_dir, query):
167 """Repo info.
168
169 Args:
170 repo_dir: root directory of repo
171 query: key to query
172 """
Kuang-che Wu34ab7b42019-10-28 19:40:05 +0800173 try:
174 output = util.check_output('repo', 'info', '.', cwd=repo_dir)
175 except subprocess.CalledProcessError as e:
176 if 'Manifest branch:' not in e.output:
177 raise
178 # "repo info" may exit with error while the data we want is already
179 # printed. Ignore errors for such case.
180 output = e.output
181 for line in output.splitlines():
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800182 key, value = map(str.strip, line.split(':'))
183 if key == query:
184 return value
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800185
186 return None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800187
188
189def get_current_branch(repo_dir):
190 """Get manifest branch of existing repo directory."""
191 return info(repo_dir, 'Manifest branch')
192
193
194def get_manifest_groups(repo_dir):
195 """Get manifest group of existing repo directory."""
196 return info(repo_dir, 'Manifest groups')
197
198
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800199def list_projects(repo_dir):
200 """Repo list.
201
202 Args:
203 repo_dir: root directory of repo
204
205 Returns:
206 list of paths, relative to repo_dir
207 """
208 result = []
209 for line in util.check_output(
210 'repo', 'list', '--path-only', cwd=repo_dir).splitlines():
211 result.append(line)
212 return result
213
214
215def cleanup_unexpected_files(repo_dir):
216 """Clean up unexpected files in repo tree.
217
218 Note this is not fully equivalent to 'repo sync' from scratch because:
219 - This only handle git repo folders. In other words, directories under
220 repo_dir not inside any git repo will not be touched.
221 - It ignores files if matching gitignore pattern.
222 So we can keep cache files to speed up incremental build next time.
223
224 If you want truly clean tree, delete entire tree and repo sync directly
225 instead.
226
227 Args:
228 repo_dir: root directory of repo
229 """
230 projects = list_projects(repo_dir)
231
232 # When we clean up project X, we don't want to touch files under X's
233 # subprojects. Collect the nested project relationship here.
234 nested = {}
235 # By sorting, parent directory will loop before subdirectories.
236 for project_path in sorted(projects):
237 components = project_path.split(os.sep)
238 for i in range(len(components) - 1, 0, -1):
239 head = os.sep.join(components[:i])
240 tail = os.sep.join(components[i:])
241 if head in nested:
242 nested[head].append(tail)
243 break
244 nested[project_path] = []
245
246 for project_path in projects:
247 git_repo = os.path.join(repo_dir, project_path)
248 if not os.path.exists(git_repo):
249 # It should be harmless to ignore git repo nonexistence because 'repo
250 # sync' will restore them.
251 logger.warning('git repo not found: %s', git_repo)
252 continue
253 git_util.distclean(git_repo, nested[project_path])
254
255
Kuang-che Wubfa64482018-10-16 11:49:49 +0800256def _urljoin(base, url):
257 # urlparse.urljoin doesn't recognize "persistent-https://" protocol.
258 # Following hack replaces "persistent-https" by obsolete protocol "gopher"
259 # before urlparse.urljoin and replaces back after urlparse.urljoin calls.
260 dummy_scheme = 'gopher://'
261 new_scheme = 'persistent-https://'
262 assert not base.startswith(dummy_scheme)
263 assert not url.startswith(dummy_scheme)
264 base = re.sub('^' + new_scheme, dummy_scheme, base)
265 url = re.sub('^' + new_scheme, dummy_scheme, url)
266 result = urlparse.urljoin(base, url)
267 result = re.sub('^' + dummy_scheme, new_scheme, result)
268 return result
269
270
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800271class ManifestParser(object):
272 """Enumerates historical manifest files and parses them."""
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800273
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800274 def __init__(self, manifest_dir):
275 self.manifest_dir = manifest_dir
276 self.manifest_url = get_manifest_url(self.manifest_dir)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800277
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800278 def parse_single_xml(self, content, allow_include=False):
279 root = xml.etree.ElementTree.fromstring(content)
280 if not allow_include and root.find('include') is not None:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800281 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800282 'Expects self-contained manifest. <include> is not allowed')
283 return root
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800284
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800285 def parse_xml_recursive(self, git_rev, path):
286 content = git_util.get_file_from_revision(self.manifest_dir, git_rev, path)
287 root = self.parse_single_xml(content, allow_include=True)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800288
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800289 result = xml.etree.ElementTree.Element('manifest')
290 for node in root:
291 if node.tag == 'include':
292 for subnode in self.parse_xml_recursive(git_rev, node.get('name')):
293 result.append(subnode)
294 else:
295 result.append(node)
296 return result
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800297
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800298 def process_parsed_result(self, root):
299 result = {}
300 default = root.find('default')
301 if default is None:
302 default = {}
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800303
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800304 remote_fetch_map = {}
305 for remote in root.findall('.//remote'):
306 name = remote.get('name')
Kuang-che Wubfa64482018-10-16 11:49:49 +0800307 fetch_url = _urljoin(self.manifest_url, remote.get('fetch'))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800308 if urlparse.urlparse(fetch_url).path not in ('', '/'):
309 # TODO(kcwu): support remote url with sub folders
Kuang-che Wue121fae2018-11-09 16:18:39 +0800310 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800311 'only support git repo at root path of remote server: %s' %
312 fetch_url)
313 remote_fetch_map[name] = fetch_url
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800314
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800315 assert root.find('include') is None
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800316
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800317 for project in root.findall('.//project'):
318 if 'notdefault' in project.get('groups', ''):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800319 continue
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800320 for subproject in project.findall('.//project'):
321 logger.warning('nested project %s.%s is not supported and ignored',
322 project.get('name'), subproject.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800323
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800324 # default path is its name
325 path = project.get('path', project.get('name'))
326 revision = project.get('revision', default.get('revision'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800327
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800328 remote_name = project.get('remote', default.get('remote'))
329 if remote_name not in remote_fetch_map:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800330 raise errors.InternalError('unknown remote name=%s' % remote_name)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800331 fetch_url = remote_fetch_map.get(remote_name)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800332 repo_url = _urljoin(fetch_url, project.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800333
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800334 result[path] = codechange.PathSpec(path, repo_url, revision)
335 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800336
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800337 def enumerate_manifest_commits(self, start_time, end_time, path):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800338
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800339 def parse_dependencies(path, content):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800340 try:
341 root = self.parse_single_xml(content, allow_include=True)
342 except xml.etree.ElementTree.ParseError:
343 logger.warning('%s syntax error, skip', path)
344 return None
345
346 result = []
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800347 for include in root.findall('.//include'):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800348 result.append(include.get('name'))
349 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800350
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800351 return git_util.get_history_recursively(self.manifest_dir, path, start_time,
352 end_time, parse_dependencies)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800353
354
355class RepoMirror(codechange.CodeStorage):
356 """Repo git mirror."""
357
358 def __init__(self, mirror_dir):
359 self.mirror_dir = mirror_dir
360
361 def _url_to_cache_dir(self, url):
362 # Here we assume remote fetch url is always at root of server url, so we can
363 # simply treat whole path as repo project name.
364 path = urlparse.urlparse(url).path
365 assert path[0] == '/'
366 return '%s.git' % path[1:]
367
368 def cached_git_root(self, repo_url):
369 cache_path = self._url_to_cache_dir(repo_url)
Kuang-che Wua4f14d62018-10-15 15:59:47 +0800370
371 # The location of chromeos manifest-internal repo mirror is irregular
372 # (http://crbug.com/895957). This is a workaround.
373 if cache_path == 'chromeos/manifest-internal.git':
374 cache_path = 'manifest-internal.git'
375
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800376 return os.path.join(self.mirror_dir, cache_path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800377
378 def _load_project_list(self, project_root):
379 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
380 return open(repo_project_list).readlines()
381
382 def _save_project_list(self, project_root, lines):
383 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
384 with open(repo_project_list, 'w') as f:
385 f.write(''.join(sorted(lines)))
386
387 def add_to_project_list(self, project_root, path, repo_url):
388 lines = self._load_project_list(project_root)
389
390 line = path + '\n'
391 if line not in lines:
392 lines.append(line)
393
394 self._save_project_list(project_root, lines)
395
396 def remove_from_project_list(self, project_root, path):
397 lines = self._load_project_list(project_root)
398
399 line = path + '\n'
400 if line in lines:
401 lines.remove(line)
402
403 self._save_project_list(project_root, lines)