blob: cb1d6850bf8e16d545390dcc179aaeabd029fb9f [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wubfc4a642018-04-19 11:54:08 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Repo utility.
6
7This module provides wrapper for "repo" (a Google-built repository management
8tool that runs on top of git) and related utility functions.
9"""
10
11from __future__ import print_function
12import logging
13import os
14import re
Kuang-che Wud1d45b42018-07-05 00:46:45 +080015import urlparse
Kuang-che Wubfc4a642018-04-19 11:54:08 +080016import xml.etree.ElementTree
17
Kuang-che Wud1d45b42018-07-05 00:46:45 +080018from bisect_kit import codechange
Kuang-che Wue121fae2018-11-09 16:18:39 +080019from bisect_kit import errors
Kuang-che Wubfc4a642018-04-19 11:54:08 +080020from bisect_kit import git_util
21from bisect_kit import util
22
23logger = logging.getLogger(__name__)
24
25
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080026def get_manifest_url(manifest_dir):
Kuang-che Wud1d45b42018-07-05 00:46:45 +080027 """Get manifest URL of repo project.
28
29 Args:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080030 manifest_dir: path of manifest directory
Kuang-che Wud1d45b42018-07-05 00:46:45 +080031
32 Returns:
33 manifest URL.
34 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +080035 url = util.check_output(
36 'git', 'config', 'remote.origin.url', cwd=manifest_dir)
Kuang-che Wud1d45b42018-07-05 00:46:45 +080037 return url
38
39
Kuang-che Wu41e8b592018-09-25 17:01:30 +080040def find_repo_root(path):
41 """Find the root path of a repo project
42
43 Args:
44 path: path
45
46 Returns:
47 project root if path is inside a repo project; otherwise None
48 """
49 path = os.path.abspath(path)
50 while not os.path.exists(os.path.join(path, '.repo')):
51 if path == '/':
52 return None
53 path = os.path.dirname(path)
54 return path
55
56
Kuang-che Wubfc4a642018-04-19 11:54:08 +080057def init(repo_dir,
58 manifest_url,
59 manifest_branch=None,
60 manifest_name=None,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080061 repo_url=None,
Kuang-che Wu41e8b592018-09-25 17:01:30 +080062 reference=None,
63 mirror=False):
Kuang-che Wubfc4a642018-04-19 11:54:08 +080064 """Repo init.
65
66 Args:
67 repo_dir: root directory of repo
68 manifest_url: manifest repository location
69 manifest_branch: manifest branch or revision
70 manifest_name: initial manifest file name
71 repo_url: repo repository location
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080072 reference: location of mirror directory
Kuang-che Wu41e8b592018-09-25 17:01:30 +080073 mirror: indicates repo mirror
Kuang-che Wubfc4a642018-04-19 11:54:08 +080074 """
Kuang-che Wu41e8b592018-09-25 17:01:30 +080075 root = find_repo_root(repo_dir)
76 if root and root != repo_dir:
Kuang-che Wue121fae2018-11-09 16:18:39 +080077 raise errors.ExternalError(
Kuang-che Wu41e8b592018-09-25 17:01:30 +080078 '%s should not be inside another repo project at %s' % (repo_dir, root))
79
Kuang-che Wubfc4a642018-04-19 11:54:08 +080080 cmd = ['repo', 'init', '--manifest-url', manifest_url]
81 if manifest_name:
82 cmd += ['--manifest-name', manifest_name]
83 if manifest_branch:
84 cmd += ['--manifest-branch', manifest_branch]
85 if repo_url:
86 cmd += ['--repo-url', repo_url]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080087 if reference:
88 cmd += ['--reference', reference]
Kuang-che Wu41e8b592018-09-25 17:01:30 +080089 if mirror:
90 cmd.append('--mirror')
Kuang-che Wubfc4a642018-04-19 11:54:08 +080091 util.check_call(*cmd, cwd=repo_dir)
92
93
Kuang-che Wuea3abce2018-10-04 17:50:42 +080094def cleanup_repo_generated_files(repo_dir, manifest_name='default.xml'):
95 """Cleanup files generated by <copyfile> <linkfile> tags.
96
97 Args:
98 repo_dir: root directory of repo
99 manifest_name: filename of manifest
100 """
101 manifest_dir = os.path.join(repo_dir, '.repo', 'manifests')
Kuang-che Wu35080a72018-10-05 14:14:33 +0800102 manifest_path = os.path.join(manifest_dir, manifest_name)
103 if os.path.islink(manifest_path):
104 manifest_name = os.readlink(manifest_path)
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800105 parser = ManifestParser(manifest_dir)
106 manifest = parser.parse_xml_recursive('HEAD', manifest_name)
107
108 for copyfile in manifest.findall('.//copyfile'):
109 dest = copyfile.get('dest')
110 if not dest:
111 continue
112 # `dest` is relative to the top of the tree
113 dest_path = os.path.join(repo_dir, dest)
114 if not os.path.isfile(dest_path):
115 continue
116 logger.debug('delete file %r', dest_path)
117 os.unlink(dest_path)
118
119 for linkfile in manifest.findall('.//linkfile'):
120 dest = linkfile.get('dest')
121 if not dest:
122 continue
123 # `dest` is relative to the top of the tree
124 dest_path = os.path.join(repo_dir, dest)
125 if not os.path.islink(dest_path):
126 continue
127 logger.debug('delete link %r', dest_path)
128 os.unlink(dest_path)
129
130
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800131def sync(repo_dir, jobs=16, manifest_name=None, current_branch=False):
132 """Repo sync.
133
134 Args:
135 repo_dir: root directory of repo
136 jobs: projects to fetch simultaneously
137 manifest_name: filename of manifest
138 current_branch: fetch only current branch
139 """
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800140 # Workaround to prevent garbage files left between repo syncs
141 # (http://crbug.com/881783).
142 cleanup_repo_generated_files(repo_dir)
143
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800144 cmd = ['repo', 'sync', '-q', '--force-sync']
145 if jobs:
146 cmd += ['-j', str(jobs)]
147 if manifest_name:
148 cmd += ['--manifest-name', manifest_name]
149 if current_branch:
150 cmd += ['--current-branch']
151 util.check_call(*cmd, cwd=repo_dir)
152
153
154def abandon(repo_dir, branch_name):
155 """Repo abandon.
156
157 Args:
158 repo_dir: root directory of repo
159 branch_name: branch name to abandon
160 """
161 # Ignore errors if failed, which means the branch didn't exist beforehand.
162 util.call('repo', 'abandon', branch_name, cwd=repo_dir)
163
164
165def info(repo_dir, query):
166 """Repo info.
167
168 Args:
169 repo_dir: root directory of repo
170 query: key to query
171 """
172 for line in util.check_output('repo', 'info', '.', cwd=repo_dir).splitlines():
173 key, value = map(str.strip, line.split(':'))
174 if key == query:
175 return value
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800176
177 return None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800178
179
180def get_current_branch(repo_dir):
181 """Get manifest branch of existing repo directory."""
182 return info(repo_dir, 'Manifest branch')
183
184
185def get_manifest_groups(repo_dir):
186 """Get manifest group of existing repo directory."""
187 return info(repo_dir, 'Manifest groups')
188
189
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800190def list_projects(repo_dir):
191 """Repo list.
192
193 Args:
194 repo_dir: root directory of repo
195
196 Returns:
197 list of paths, relative to repo_dir
198 """
199 result = []
200 for line in util.check_output(
201 'repo', 'list', '--path-only', cwd=repo_dir).splitlines():
202 result.append(line)
203 return result
204
205
206def cleanup_unexpected_files(repo_dir):
207 """Clean up unexpected files in repo tree.
208
209 Note this is not fully equivalent to 'repo sync' from scratch because:
210 - This only handle git repo folders. In other words, directories under
211 repo_dir not inside any git repo will not be touched.
212 - It ignores files if matching gitignore pattern.
213 So we can keep cache files to speed up incremental build next time.
214
215 If you want truly clean tree, delete entire tree and repo sync directly
216 instead.
217
218 Args:
219 repo_dir: root directory of repo
220 """
221 projects = list_projects(repo_dir)
222
223 # When we clean up project X, we don't want to touch files under X's
224 # subprojects. Collect the nested project relationship here.
225 nested = {}
226 # By sorting, parent directory will loop before subdirectories.
227 for project_path in sorted(projects):
228 components = project_path.split(os.sep)
229 for i in range(len(components) - 1, 0, -1):
230 head = os.sep.join(components[:i])
231 tail = os.sep.join(components[i:])
232 if head in nested:
233 nested[head].append(tail)
234 break
235 nested[project_path] = []
236
237 for project_path in projects:
238 git_repo = os.path.join(repo_dir, project_path)
239 if not os.path.exists(git_repo):
240 # It should be harmless to ignore git repo nonexistence because 'repo
241 # sync' will restore them.
242 logger.warning('git repo not found: %s', git_repo)
243 continue
244 git_util.distclean(git_repo, nested[project_path])
245
246
Kuang-che Wubfa64482018-10-16 11:49:49 +0800247def _urljoin(base, url):
248 # urlparse.urljoin doesn't recognize "persistent-https://" protocol.
249 # Following hack replaces "persistent-https" by obsolete protocol "gopher"
250 # before urlparse.urljoin and replaces back after urlparse.urljoin calls.
251 dummy_scheme = 'gopher://'
252 new_scheme = 'persistent-https://'
253 assert not base.startswith(dummy_scheme)
254 assert not url.startswith(dummy_scheme)
255 base = re.sub('^' + new_scheme, dummy_scheme, base)
256 url = re.sub('^' + new_scheme, dummy_scheme, url)
257 result = urlparse.urljoin(base, url)
258 result = re.sub('^' + dummy_scheme, new_scheme, result)
259 return result
260
261
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800262class ManifestParser(object):
263 """Enumerates historical manifest files and parses them."""
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800264
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800265 def __init__(self, manifest_dir):
266 self.manifest_dir = manifest_dir
267 self.manifest_url = get_manifest_url(self.manifest_dir)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800268
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800269 def parse_single_xml(self, content, allow_include=False):
270 root = xml.etree.ElementTree.fromstring(content)
271 if not allow_include and root.find('include') is not None:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800272 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800273 'Expects self-contained manifest. <include> is not allowed')
274 return root
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800275
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800276 def parse_xml_recursive(self, git_rev, path):
277 content = git_util.get_file_from_revision(self.manifest_dir, git_rev, path)
278 root = self.parse_single_xml(content, allow_include=True)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800279
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800280 result = xml.etree.ElementTree.Element('manifest')
281 for node in root:
282 if node.tag == 'include':
283 for subnode in self.parse_xml_recursive(git_rev, node.get('name')):
284 result.append(subnode)
285 else:
286 result.append(node)
287 return result
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800288
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800289 def process_parsed_result(self, root):
290 result = {}
291 default = root.find('default')
292 if default is None:
293 default = {}
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800294
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800295 remote_fetch_map = {}
296 for remote in root.findall('.//remote'):
297 name = remote.get('name')
Kuang-che Wubfa64482018-10-16 11:49:49 +0800298 fetch_url = _urljoin(self.manifest_url, remote.get('fetch'))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800299 if urlparse.urlparse(fetch_url).path not in ('', '/'):
300 # TODO(kcwu): support remote url with sub folders
Kuang-che Wue121fae2018-11-09 16:18:39 +0800301 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800302 'only support git repo at root path of remote server: %s' %
303 fetch_url)
304 remote_fetch_map[name] = fetch_url
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800305
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800306 assert root.find('include') is None
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800307
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800308 for project in root.findall('.//project'):
309 if 'notdefault' in project.get('groups', ''):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800310 continue
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800311 for subproject in project.findall('.//project'):
312 logger.warning('nested project %s.%s is not supported and ignored',
313 project.get('name'), subproject.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800314
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800315 # default path is its name
316 path = project.get('path', project.get('name'))
317 revision = project.get('revision', default.get('revision'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800318
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800319 remote_name = project.get('remote', default.get('remote'))
320 if remote_name not in remote_fetch_map:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800321 raise errors.InternalError('unknown remote name=%s' % remote_name)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800322 fetch_url = remote_fetch_map.get(remote_name)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800323 repo_url = _urljoin(fetch_url, project.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800324
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800325 result[path] = codechange.PathSpec(path, repo_url, revision)
326 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800327
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800328 def enumerate_manifest_commits(self, start_time, end_time, path):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800329
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800330 def parse_dependencies(path, content):
331 del path # unused
332 root = self.parse_single_xml(content, allow_include=True)
333 for include in root.findall('.//include'):
334 yield include.get('name')
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800335
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800336 return git_util.get_history_recursively(self.manifest_dir, path, start_time,
337 end_time, parse_dependencies)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800338
339
340class RepoMirror(codechange.CodeStorage):
341 """Repo git mirror."""
342
343 def __init__(self, mirror_dir):
344 self.mirror_dir = mirror_dir
345
346 def _url_to_cache_dir(self, url):
347 # Here we assume remote fetch url is always at root of server url, so we can
348 # simply treat whole path as repo project name.
349 path = urlparse.urlparse(url).path
350 assert path[0] == '/'
351 return '%s.git' % path[1:]
352
353 def cached_git_root(self, repo_url):
354 cache_path = self._url_to_cache_dir(repo_url)
Kuang-che Wua4f14d62018-10-15 15:59:47 +0800355
356 # The location of chromeos manifest-internal repo mirror is irregular
357 # (http://crbug.com/895957). This is a workaround.
358 if cache_path == 'chromeos/manifest-internal.git':
359 cache_path = 'manifest-internal.git'
360
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800361 return os.path.join(self.mirror_dir, cache_path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800362
363 def _load_project_list(self, project_root):
364 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
365 return open(repo_project_list).readlines()
366
367 def _save_project_list(self, project_root, lines):
368 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
369 with open(repo_project_list, 'w') as f:
370 f.write(''.join(sorted(lines)))
371
372 def add_to_project_list(self, project_root, path, repo_url):
373 lines = self._load_project_list(project_root)
374
375 line = path + '\n'
376 if line not in lines:
377 lines.append(line)
378
379 self._save_project_list(project_root, lines)
380
381 def remove_from_project_list(self, project_root, path):
382 lines = self._load_project_list(project_root)
383
384 line = path + '\n'
385 if line in lines:
386 lines.remove(line)
387
388 self._save_project_list(project_root, lines)