blob: 670ca4ddab110c55d0d18c6327deee87d2c081c6 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wubfc4a642018-04-19 11:54:08 +08002# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Repo utility.
6
7This module provides wrapper for "repo" (a Google-built repository management
8tool that runs on top of git) and related utility functions.
9"""
10
11from __future__ import print_function
12import logging
13import os
14import re
Kuang-che Wu34ab7b42019-10-28 19:40:05 +080015import subprocess
Kuang-che Wubfc4a642018-04-19 11:54:08 +080016import xml.etree.ElementTree
17
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +080018from six.moves import urllib
19
Kuang-che Wud1d45b42018-07-05 00:46:45 +080020from bisect_kit import codechange
Kuang-che Wue121fae2018-11-09 16:18:39 +080021from bisect_kit import errors
Kuang-che Wubfc4a642018-04-19 11:54:08 +080022from bisect_kit import git_util
23from bisect_kit import util
24
25logger = logging.getLogger(__name__)
26
27
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080028def get_manifest_url(manifest_dir):
Kuang-che Wud1d45b42018-07-05 00:46:45 +080029 """Get manifest URL of repo project.
30
31 Args:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080032 manifest_dir: path of manifest directory
Kuang-che Wud1d45b42018-07-05 00:46:45 +080033
34 Returns:
35 manifest URL.
36 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +080037 url = util.check_output(
38 'git', 'config', 'remote.origin.url', cwd=manifest_dir)
Kuang-che Wud1d45b42018-07-05 00:46:45 +080039 return url
40
41
Kuang-che Wu41e8b592018-09-25 17:01:30 +080042def find_repo_root(path):
43 """Find the root path of a repo project
44
45 Args:
46 path: path
47
48 Returns:
49 project root if path is inside a repo project; otherwise None
50 """
51 path = os.path.abspath(path)
52 while not os.path.exists(os.path.join(path, '.repo')):
53 if path == '/':
54 return None
55 path = os.path.dirname(path)
56 return path
57
58
Kuang-che Wubfc4a642018-04-19 11:54:08 +080059def init(repo_dir,
60 manifest_url,
61 manifest_branch=None,
62 manifest_name=None,
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080063 repo_url=None,
Kuang-che Wu41e8b592018-09-25 17:01:30 +080064 reference=None,
65 mirror=False):
Kuang-che Wubfc4a642018-04-19 11:54:08 +080066 """Repo init.
67
68 Args:
69 repo_dir: root directory of repo
70 manifest_url: manifest repository location
71 manifest_branch: manifest branch or revision
72 manifest_name: initial manifest file name
73 repo_url: repo repository location
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080074 reference: location of mirror directory
Kuang-che Wu41e8b592018-09-25 17:01:30 +080075 mirror: indicates repo mirror
Kuang-che Wubfc4a642018-04-19 11:54:08 +080076 """
Kuang-che Wu41e8b592018-09-25 17:01:30 +080077 root = find_repo_root(repo_dir)
78 if root and root != repo_dir:
Kuang-che Wue121fae2018-11-09 16:18:39 +080079 raise errors.ExternalError(
Kuang-che Wu41e8b592018-09-25 17:01:30 +080080 '%s should not be inside another repo project at %s' % (repo_dir, root))
81
Kuang-che Wubfc4a642018-04-19 11:54:08 +080082 cmd = ['repo', 'init', '--manifest-url', manifest_url]
83 if manifest_name:
84 cmd += ['--manifest-name', manifest_name]
85 if manifest_branch:
86 cmd += ['--manifest-branch', manifest_branch]
87 if repo_url:
88 cmd += ['--repo-url', repo_url]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +080089 if reference:
90 cmd += ['--reference', reference]
Kuang-che Wu41e8b592018-09-25 17:01:30 +080091 if mirror:
92 cmd.append('--mirror')
Kuang-che Wubfc4a642018-04-19 11:54:08 +080093 util.check_call(*cmd, cwd=repo_dir)
94
95
Kuang-che Wuea3abce2018-10-04 17:50:42 +080096def cleanup_repo_generated_files(repo_dir, manifest_name='default.xml'):
97 """Cleanup files generated by <copyfile> <linkfile> tags.
98
99 Args:
100 repo_dir: root directory of repo
101 manifest_name: filename of manifest
102 """
103 manifest_dir = os.path.join(repo_dir, '.repo', 'manifests')
Kuang-che Wu35080a72018-10-05 14:14:33 +0800104 manifest_path = os.path.join(manifest_dir, manifest_name)
105 if os.path.islink(manifest_path):
106 manifest_name = os.readlink(manifest_path)
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800107 parser = ManifestParser(manifest_dir)
108 manifest = parser.parse_xml_recursive('HEAD', manifest_name)
109
110 for copyfile in manifest.findall('.//copyfile'):
111 dest = copyfile.get('dest')
112 if not dest:
113 continue
114 # `dest` is relative to the top of the tree
115 dest_path = os.path.join(repo_dir, dest)
116 if not os.path.isfile(dest_path):
117 continue
118 logger.debug('delete file %r', dest_path)
119 os.unlink(dest_path)
120
121 for linkfile in manifest.findall('.//linkfile'):
122 dest = linkfile.get('dest')
123 if not dest:
124 continue
125 # `dest` is relative to the top of the tree
126 dest_path = os.path.join(repo_dir, dest)
127 if not os.path.islink(dest_path):
128 continue
129 logger.debug('delete link %r', dest_path)
130 os.unlink(dest_path)
131
132
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800133def sync(repo_dir, jobs=16, manifest_name=None, current_branch=False):
134 """Repo sync.
135
136 Args:
137 repo_dir: root directory of repo
138 jobs: projects to fetch simultaneously
139 manifest_name: filename of manifest
140 current_branch: fetch only current branch
141 """
Kuang-che Wuea3abce2018-10-04 17:50:42 +0800142 # Workaround to prevent garbage files left between repo syncs
143 # (http://crbug.com/881783).
144 cleanup_repo_generated_files(repo_dir)
145
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800146 cmd = ['repo', 'sync', '-q', '--force-sync']
147 if jobs:
148 cmd += ['-j', str(jobs)]
149 if manifest_name:
150 cmd += ['--manifest-name', manifest_name]
151 if current_branch:
152 cmd += ['--current-branch']
153 util.check_call(*cmd, cwd=repo_dir)
154
155
156def abandon(repo_dir, branch_name):
157 """Repo abandon.
158
159 Args:
160 repo_dir: root directory of repo
161 branch_name: branch name to abandon
162 """
163 # Ignore errors if failed, which means the branch didn't exist beforehand.
164 util.call('repo', 'abandon', branch_name, cwd=repo_dir)
165
166
167def info(repo_dir, query):
168 """Repo info.
169
170 Args:
171 repo_dir: root directory of repo
172 query: key to query
173 """
Kuang-che Wu34ab7b42019-10-28 19:40:05 +0800174 try:
175 output = util.check_output('repo', 'info', '.', cwd=repo_dir)
176 except subprocess.CalledProcessError as e:
177 if 'Manifest branch:' not in e.output:
178 raise
179 # "repo info" may exit with error while the data we want is already
180 # printed. Ignore errors for such case.
181 output = e.output
182 for line in output.splitlines():
Kuang-che Wuc89f2a22019-11-26 15:30:50 +0800183 key, value = line.split(':', 1)
184 key, value = key.strip(), value.strip()
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800185 if key == query:
186 return value
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800187
188 return None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800189
190
191def get_current_branch(repo_dir):
192 """Get manifest branch of existing repo directory."""
193 return info(repo_dir, 'Manifest branch')
194
195
196def get_manifest_groups(repo_dir):
197 """Get manifest group of existing repo directory."""
198 return info(repo_dir, 'Manifest groups')
199
200
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800201def list_projects(repo_dir):
202 """Repo list.
203
204 Args:
205 repo_dir: root directory of repo
206
207 Returns:
208 list of paths, relative to repo_dir
209 """
210 result = []
211 for line in util.check_output(
212 'repo', 'list', '--path-only', cwd=repo_dir).splitlines():
213 result.append(line)
214 return result
215
216
217def cleanup_unexpected_files(repo_dir):
218 """Clean up unexpected files in repo tree.
219
220 Note this is not fully equivalent to 'repo sync' from scratch because:
221 - This only handle git repo folders. In other words, directories under
222 repo_dir not inside any git repo will not be touched.
223 - It ignores files if matching gitignore pattern.
224 So we can keep cache files to speed up incremental build next time.
225
226 If you want truly clean tree, delete entire tree and repo sync directly
227 instead.
228
229 Args:
230 repo_dir: root directory of repo
231 """
232 projects = list_projects(repo_dir)
233
234 # When we clean up project X, we don't want to touch files under X's
235 # subprojects. Collect the nested project relationship here.
236 nested = {}
237 # By sorting, parent directory will loop before subdirectories.
238 for project_path in sorted(projects):
239 components = project_path.split(os.sep)
240 for i in range(len(components) - 1, 0, -1):
241 head = os.sep.join(components[:i])
242 tail = os.sep.join(components[i:])
243 if head in nested:
244 nested[head].append(tail)
245 break
246 nested[project_path] = []
247
248 for project_path in projects:
249 git_repo = os.path.join(repo_dir, project_path)
250 if not os.path.exists(git_repo):
251 # It should be harmless to ignore git repo nonexistence because 'repo
252 # sync' will restore them.
253 logger.warning('git repo not found: %s', git_repo)
254 continue
255 git_util.distclean(git_repo, nested[project_path])
256
257
Kuang-che Wubfa64482018-10-16 11:49:49 +0800258def _urljoin(base, url):
259 # urlparse.urljoin doesn't recognize "persistent-https://" protocol.
260 # Following hack replaces "persistent-https" by obsolete protocol "gopher"
261 # before urlparse.urljoin and replaces back after urlparse.urljoin calls.
262 dummy_scheme = 'gopher://'
263 new_scheme = 'persistent-https://'
264 assert not base.startswith(dummy_scheme)
265 assert not url.startswith(dummy_scheme)
266 base = re.sub('^' + new_scheme, dummy_scheme, base)
267 url = re.sub('^' + new_scheme, dummy_scheme, url)
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800268 result = urllib.parse.urljoin(base, url)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800269 result = re.sub('^' + dummy_scheme, new_scheme, result)
270 return result
271
272
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800273class ManifestParser(object):
274 """Enumerates historical manifest files and parses them."""
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800275
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800276 def __init__(self, manifest_dir):
277 self.manifest_dir = manifest_dir
278 self.manifest_url = get_manifest_url(self.manifest_dir)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800279
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800280 def parse_single_xml(self, content, allow_include=False):
281 root = xml.etree.ElementTree.fromstring(content)
282 if not allow_include and root.find('include') is not None:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800283 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800284 'Expects self-contained manifest. <include> is not allowed')
285 return root
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800286
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800287 def parse_xml_recursive(self, git_rev, path):
288 content = git_util.get_file_from_revision(self.manifest_dir, git_rev, path)
289 root = self.parse_single_xml(content, allow_include=True)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800290
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800291 result = xml.etree.ElementTree.Element('manifest')
292 for node in root:
293 if node.tag == 'include':
294 for subnode in self.parse_xml_recursive(git_rev, node.get('name')):
295 result.append(subnode)
296 else:
297 result.append(node)
298 return result
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800299
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800300 def process_parsed_result(self, root):
301 result = {}
302 default = root.find('default')
303 if default is None:
304 default = {}
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800305
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800306 remote_fetch_map = {}
307 for remote in root.findall('.//remote'):
308 name = remote.get('name')
Kuang-che Wubfa64482018-10-16 11:49:49 +0800309 fetch_url = _urljoin(self.manifest_url, remote.get('fetch'))
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800310 if urllib.parse.urlparse(fetch_url).path not in ('', '/'):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800311 # TODO(kcwu): support remote url with sub folders
Kuang-che Wue121fae2018-11-09 16:18:39 +0800312 raise errors.InternalError(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800313 'only support git repo at root path of remote server: %s' %
314 fetch_url)
315 remote_fetch_map[name] = fetch_url
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800316
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800317 assert root.find('include') is None
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800318
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800319 for project in root.findall('.//project'):
320 if 'notdefault' in project.get('groups', ''):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800321 continue
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800322 for subproject in project.findall('.//project'):
323 logger.warning('nested project %s.%s is not supported and ignored',
324 project.get('name'), subproject.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800325
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800326 # default path is its name
327 path = project.get('path', project.get('name'))
328 revision = project.get('revision', default.get('revision'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800329
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800330 remote_name = project.get('remote', default.get('remote'))
331 if remote_name not in remote_fetch_map:
Kuang-che Wue121fae2018-11-09 16:18:39 +0800332 raise errors.InternalError('unknown remote name=%s' % remote_name)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800333 fetch_url = remote_fetch_map.get(remote_name)
Kuang-che Wubfa64482018-10-16 11:49:49 +0800334 repo_url = _urljoin(fetch_url, project.get('name'))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800335
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800336 result[path] = codechange.PathSpec(path, repo_url, revision)
337 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800338
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800339 def enumerate_manifest_commits(self, start_time, end_time, path):
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800340
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800341 def parse_dependencies(path, content):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800342 try:
343 root = self.parse_single_xml(content, allow_include=True)
344 except xml.etree.ElementTree.ParseError:
345 logger.warning('%s syntax error, skip', path)
346 return None
347
348 result = []
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800349 for include in root.findall('.//include'):
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800350 result.append(include.get('name'))
351 return result
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800352
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800353 return git_util.get_history_recursively(self.manifest_dir, path, start_time,
354 end_time, parse_dependencies)
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800355
356
357class RepoMirror(codechange.CodeStorage):
358 """Repo git mirror."""
359
360 def __init__(self, mirror_dir):
361 self.mirror_dir = mirror_dir
362
363 def _url_to_cache_dir(self, url):
364 # Here we assume remote fetch url is always at root of server url, so we can
365 # simply treat whole path as repo project name.
Kuang-che Wua7ddf9b2019-11-25 18:59:57 +0800366 path = urllib.parse.urlparse(url).path
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800367 assert path[0] == '/'
368 return '%s.git' % path[1:]
369
370 def cached_git_root(self, repo_url):
371 cache_path = self._url_to_cache_dir(repo_url)
Kuang-che Wua4f14d62018-10-15 15:59:47 +0800372
373 # The location of chromeos manifest-internal repo mirror is irregular
374 # (http://crbug.com/895957). This is a workaround.
375 if cache_path == 'chromeos/manifest-internal.git':
376 cache_path = 'manifest-internal.git'
377
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800378 return os.path.join(self.mirror_dir, cache_path)
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800379
380 def _load_project_list(self, project_root):
381 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
Kuang-che Wua5723492019-11-25 20:59:34 +0800382 with open(repo_project_list) as f:
383 return f.readlines()
Kuang-che Wu6948ecc2018-09-11 17:43:49 +0800384
385 def _save_project_list(self, project_root, lines):
386 repo_project_list = os.path.join(project_root, '.repo', 'project.list')
387 with open(repo_project_list, 'w') as f:
388 f.write(''.join(sorted(lines)))
389
390 def add_to_project_list(self, project_root, path, repo_url):
391 lines = self._load_project_list(project_root)
392
393 line = path + '\n'
394 if line not in lines:
395 lines.append(line)
396
397 self._save_project_list(project_root, lines)
398
399 def remove_from_project_list(self, project_root, path):
400 lines = self._load_project_list(project_root)
401
402 line = path + '\n'
403 if line in lines:
404 lines.remove(line)
405
406 self._save_project_list(project_root, lines)