blob: d7ecb252efd993a34e2874678885bd993f74ee11 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wubfc4a642018-04-19 11:54:08 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Repo utility.
6
7This module provides wrapper for "repo" (a Google-built repository management
8tool that runs on top of git) and related utility functions.
9"""
10
11from __future__ import print_function
12import logging
13import os
14import re
Kuang-che Wu34ab7b42019-10-28 19:40:05 +080015import subprocess
Kuang-che Wubfc4a642018-04-19 11:54:08 +080016import xml.etree.ElementTree
17
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +080018from six.moves import urllib
19
Kuang-che Wud1d45b42018-07-05 00:46:45 +080020from bisect_kit import codechange
Kuang-che Wue121fae2018-11-09 16:18:39 +080021from bisect_kit import errors
Kuang-che Wubfc4a642018-04-19 11:54:08 +080022from bisect_kit import git_util
23from bisect_kit import util
24
25logger = logging.getLogger(__name__)
26
27
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080028def get_manifest_url(manifest_dir):
Kuang-che Wud1d45b42018-07-05 00:46:45 +080029 """Get manifest URL of repo project.
30
31 Args:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080032 manifest_dir: path of manifest directory
Kuang-che Wud1d45b42018-07-05 00:46:45 +080033
34 Returns:
35 manifest URL.
36 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +080037 url = util.check_output(
38 'git', 'config', 'remote.origin.url', cwd=manifest_dir)
Kuang-che Wud1d45b42018-07-05 00:46:45 +080039 return url
40
41
Kuang-che Wu41e8b592018-09-25 17:01:30 +080042def find_repo_root(path):
43 """Find the root path of a repo project
44
45 Args:
46 path: path
47
48 Returns:
49 project root if path is inside a repo project; otherwise None
50 """
51 path = os.path.abspath(path)
52 while not os.path.exists(os.path.join(path, '.repo')):
53 if path == '/':
54 return None
55 path = os.path.dirname(path)
56 return path
57
58
Kuang-che Wubfc4a642018-04-19 11:54:08 +080059def init(repo_dir,
60 manifest_url,
61 manifest_branch=None,
62 manifest_name=None,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080063 repo_url=None,
Kuang-che Wu41e8b592018-09-25 17:01:30 +080064 reference=None,
65 mirror=False):
Kuang-che Wubfc4a642018-04-19 11:54:08 +080066 """Repo init.
67
68 Args:
69 repo_dir: root directory of repo
70 manifest_url: manifest repository location
71 manifest_branch: manifest branch or revision
72 manifest_name: initial manifest file name
73 repo_url: repo repository location
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080074 reference: location of mirror directory
Kuang-che Wu41e8b592018-09-25 17:01:30 +080075 mirror: indicates repo mirror
Kuang-che Wubfc4a642018-04-19 11:54:08 +080076 """
Kuang-che Wu41e8b592018-09-25 17:01:30 +080077 root = find_repo_root(repo_dir)
78 if root and root != repo_dir:
Kuang-che Wue121fae2018-11-09 16:18:39 +080079 raise errors.ExternalError(
Kuang-che Wu41e8b592018-09-25 17:01:30 +080080 '%s should not be inside another repo project at %s' % (repo_dir, root))
81
Kuang-che Wubfc4a642018-04-19 11:54:08 +080082 cmd = ['repo', 'init', '--manifest-url', manifest_url]
83 if manifest_name:
84 cmd += ['--manifest-name', manifest_name]
85 if manifest_branch:
86 cmd += ['--manifest-branch', manifest_branch]
87 if repo_url:
88 cmd += ['--repo-url', repo_url]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080089 if reference:
90 cmd += ['--reference', reference]
Kuang-che Wu41e8b592018-09-25 17:01:30 +080091 if mirror:
92 cmd.append('--mirror')
Kuang-che Wubfc4a642018-04-19 11:54:08 +080093 util.check_call(*cmd, cwd=repo_dir)
94
95
Kuang-che Wuea3abce2018-10-04 17:50:42 +080096def cleanup_repo_generated_files(repo_dir, manifest_name='default.xml'):
97 """Cleanup files generated by <copyfile> <linkfile> tags.
98
99 Args:
100 repo_dir: root directory of repo
101 manifest_name: filename of manifest
102 """
103 manifest_dir = os.path.join(repo_dir, '.repo', 'manifests')
Kuang-che Wu35080a72018-10-05 14:14:33 +0800104 manifest_path = os.path.join(manifest_dir, manifest_name)
105 if os.path.islink(manifest_path):
106 manifest_name = os.readlink(manifest_path)
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800107 parser = ManifestParser(manifest_dir)
108 manifest = parser.parse_xml_recursive('HEAD', manifest_name)
109
110 for copyfile in manifest.findall('.//copyfile'):
111 dest = copyfile.get('dest')
112 if not dest:
113 continue
114 # `dest` is relative to the top of the tree
115 dest_path = os.path.join(repo_dir, dest)
116 if not os.path.isfile(dest_path):
117 continue
118 logger.debug('delete file %r', dest_path)
119 os.unlink(dest_path)
120
121 for linkfile in manifest.findall('.//linkfile'):
122 dest = linkfile.get('dest')
123 if not dest:
124 continue
125 # `dest` is relative to the top of the tree
126 dest_path = os.path.join(repo_dir, dest)
127 if not os.path.islink(dest_path):
128 continue
129 logger.debug('delete link %r', dest_path)
130 os.unlink(dest_path)
131
132
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800133def sync(repo_dir, jobs=16, manifest_name=None, current_branch=False):
134 """Repo sync.
135
136 Args:
137 repo_dir: root directory of repo
138 jobs: projects to fetch simultaneously
139 manifest_name: filename of manifest
140 current_branch: fetch only current branch
141 """
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800142 # Workaround to prevent garbage files left between repo syncs
143 # (http://crbug.com/881783).
144 cleanup_repo_generated_files(repo_dir)
145
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800146 cmd = ['repo', 'sync', '-q', '--force-sync']
147 if jobs:
148 cmd += ['-j', str(jobs)]
149 if manifest_name:
150 cmd += ['--manifest-name', manifest_name]
151 if current_branch:
152 cmd += ['--current-branch']
153 util.check_call(*cmd, cwd=repo_dir)
154
155
156def abandon(repo_dir, branch_name):
157 """Repo abandon.
158
159 Args:
160 repo_dir: root directory of repo
161 branch_name: branch name to abandon
162 """
163 # Ignore errors if failed, which means the branch didn't exist beforehand.
164 util.call('repo', 'abandon', branch_name, cwd=repo_dir)
165
166
167def info(repo_dir, query):
168 """Repo info.
169
170 Args:
171 repo_dir: root directory of repo
172 query: key to query
173 """
Kuang-che Wu34ab7b42019-10-28 19:40:05 +0800174 try:
175 output = util.check_output('repo', 'info', '.', cwd=repo_dir)
176 except subprocess.CalledProcessError as e:
177 if 'Manifest branch:' not in e.output:
178 raise
179 # "repo info" may exit with error while the data we want is already
180 # printed. Ignore errors for such case.
181 output = e.output
182 for line in output.splitlines():
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800183 key, value = map(str.strip, line.split(':'))
184 if key == query:
185 return value
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800186
187 return None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800188
189
190def get_current_branch(repo_dir):
191 """Get manifest branch of existing repo directory."""
192 return info(repo_dir, 'Manifest branch')
193
194
195def get_manifest_groups(repo_dir):
196 """Get manifest group of existing repo directory."""
197 return info(repo_dir, 'Manifest groups')
198
199
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800200def list_projects(repo_dir):
201 """Repo list.
202
203 Args:
204 repo_dir: root directory of repo
205
206 Returns:
207 list of paths, relative to repo_dir
208 """
209 result = []
210 for line in util.check_output(
211 'repo', 'list', '--path-only', cwd=repo_dir).splitlines():
212 result.append(line)
213 return result
214
215
216def cleanup_unexpected_files(repo_dir):
217 """Clean up unexpected files in repo tree.
218
219 Note this is not fully equivalent to 'repo sync' from scratch because:
220 - This only handle git repo folders. In other words, directories under
221 repo_dir not inside any git repo will not be touched.
222 - It ignores files if matching gitignore pattern.
223 So we can keep cache files to speed up incremental build next time.
224
225 If you want truly clean tree, delete entire tree and repo sync directly
226 instead.
227
228 Args:
229 repo_dir: root directory of repo
230 """
231 projects = list_projects(repo_dir)
232
233 # When we clean up project X, we don't want to touch files under X's
234 # subprojects. Collect the nested project relationship here.
235 nested = {}
236 # By sorting, parent directory will loop before subdirectories.
237 for project_path in sorted(projects):
238 components = project_path.split(os.sep)
239 for i in range(len(components) - 1, 0, -1):
240 head = os.sep.join(components[:i])
241 tail = os.sep.join(components[i:])
242 if head in nested:
243 nested[head].append(tail)
244 break
245 nested[project_path] = []
246
247 for project_path in projects:
248 git_repo = os.path.join(repo_dir, project_path)
249 if not os.path.exists(git_repo):
250 # It should be harmless to ignore git repo nonexistence because 'repo
251 # sync' will restore them.
252 logger.warning('git repo not found: %s', git_repo)
253 continue
254 git_util.distclean(git_repo, nested[project_path])
255
256
Kuang-che Wubfa64482018-10-16 11:49:49 +0800257def _urljoin(base, url):
258 # urlparse.urljoin doesn't recognize "persistent-https://" protocol.
259 # Following hack replaces "persistent-https" by obsolete protocol "gopher"
260 # before urlparse.urljoin and replaces back after urlparse.urljoin calls.
261 dummy_scheme = 'gopher://'
262 new_scheme = 'persistent-https://'
263 assert not base.startswith(dummy_scheme)
264 assert not url.startswith(dummy_scheme)
265 base = re.sub('^' + new_scheme, dummy_scheme, base)
266 url = re.sub('^' + new_scheme, dummy_scheme, url)
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800267 result = urllib.parse.urljoin(base, url)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800268 result = re.sub('^' + dummy_scheme, new_scheme, result)
269 return result
270
271
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800272class ManifestParser(object):
273 """Enumerates historical manifest files and parses them."""
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800274
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800275 def __init__(self, manifest_dir):
276 self.manifest_dir = manifest_dir
277 self.manifest_url = get_manifest_url(self.manifest_dir)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800278
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800279 def parse_single_xml(self, content, allow_include=False):
280 root = xml.etree.ElementTree.fromstring(content)
281 if not allow_include and root.find('include') is not None:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800282 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800283 'Expects self-contained manifest. <include> is not allowed')
284 return root
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800285
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800286 def parse_xml_recursive(self, git_rev, path):
287 content = git_util.get_file_from_revision(self.manifest_dir, git_rev, path)
288 root = self.parse_single_xml(content, allow_include=True)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800289
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800290 result = xml.etree.ElementTree.Element('manifest')
291 for node in root:
292 if node.tag == 'include':
293 for subnode in self.parse_xml_recursive(git_rev, node.get('name')):
294 result.append(subnode)
295 else:
296 result.append(node)
297 return result
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800298
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800299 def process_parsed_result(self, root):
300 result = {}
301 default = root.find('default')
302 if default is None:
303 default = {}
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800304
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800305 remote_fetch_map = {}
306 for remote in root.findall('.//remote'):
307 name = remote.get('name')
Kuang-che Wubfa64482018-10-16 11:49:49 +0800308 fetch_url = _urljoin(self.manifest_url, remote.get('fetch'))
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800309 if urllib.parse.urlparse(fetch_url).path not in ('', '/'):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800310 # TODO(kcwu): support remote url with sub folders
Kuang-che Wue121fae2018-11-09 16:18:39 +0800311 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800312 'only support git repo at root path of remote server: %s' %
313 fetch_url)
314 remote_fetch_map[name] = fetch_url
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800315
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800316 assert root.find('include') is None
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800317
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800318 for project in root.findall('.//project'):
319 if 'notdefault' in project.get('groups', ''):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800320 continue
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800321 for subproject in project.findall('.//project'):
322 logger.warning('nested project %s.%s is not supported and ignored',
323 project.get('name'), subproject.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800324
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800325 # default path is its name
326 path = project.get('path', project.get('name'))
327 revision = project.get('revision', default.get('revision'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800328
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800329 remote_name = project.get('remote', default.get('remote'))
330 if remote_name not in remote_fetch_map:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800331 raise errors.InternalError('unknown remote name=%s' % remote_name)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800332 fetch_url = remote_fetch_map.get(remote_name)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800333 repo_url = _urljoin(fetch_url, project.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800334
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800335 result[path] = codechange.PathSpec(path, repo_url, revision)
336 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800337
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800338 def enumerate_manifest_commits(self, start_time, end_time, path):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800339
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800340 def parse_dependencies(path, content):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800341 try:
342 root = self.parse_single_xml(content, allow_include=True)
343 except xml.etree.ElementTree.ParseError:
344 logger.warning('%s syntax error, skip', path)
345 return None
346
347 result = []
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800348 for include in root.findall('.//include'):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800349 result.append(include.get('name'))
350 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800351
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800352 return git_util.get_history_recursively(self.manifest_dir, path, start_time,
353 end_time, parse_dependencies)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800354
355
356class RepoMirror(codechange.CodeStorage):
357 """Repo git mirror."""
358
359 def __init__(self, mirror_dir):
360 self.mirror_dir = mirror_dir
361
362 def _url_to_cache_dir(self, url):
363 # Here we assume remote fetch url is always at root of server url, so we can
364 # simply treat whole path as repo project name.
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800365 path = urllib.parse.urlparse(url).path
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800366 assert path[0] == '/'
367 return '%s.git' % path[1:]
368
369 def cached_git_root(self, repo_url):
370 cache_path = self._url_to_cache_dir(repo_url)
Kuang-che Wua4f14d62018-10-15 15:59:47 +0800371
372 # The location of chromeos manifest-internal repo mirror is irregular
373 # (http://crbug.com/895957). This is a workaround.
374 if cache_path == 'chromeos/manifest-internal.git':
375 cache_path = 'manifest-internal.git'
376
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800377 return os.path.join(self.mirror_dir, cache_path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800378
379 def _load_project_list(self, project_root):
380 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
381 return open(repo_project_list).readlines()
382
383 def _save_project_list(self, project_root, lines):
384 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
385 with open(repo_project_list, 'w') as f:
386 f.write(''.join(sorted(lines)))
387
388 def add_to_project_list(self, project_root, path, repo_url):
389 lines = self._load_project_list(project_root)
390
391 line = path + '\n'
392 if line not in lines:
393 lines.append(line)
394
395 self._save_project_list(project_root, lines)
396
397 def remove_from_project_list(self, project_root, path):
398 lines = self._load_project_list(project_root)
399
400 line = path + '\n'
401 if line in lines:
402 lines.remove(line)
403
404 self._save_project_list(project_root, lines)