blob: 9e2390562d9699b81428e53541d47f884400a6c0 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -08009import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000010import errno
11import logging
12import optparse
13import os
szager@chromium.org174766f2014-05-13 21:27:46 +000014import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000016import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000017import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import subprocess
19import sys
20import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000021import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000022
hinoka@google.com563559c2014-04-02 00:36:24 +000023from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000024import gclient_utils
25import subcommand
26
szager@chromium.org301a7c32014-06-16 17:13:50 +000027# Analogous to gc.autopacklimit git config.
28GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090029
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000030GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
31
szager@chromium.org848fd492014-04-09 19:06:44 +000032try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080033 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000034 WinErr = WindowsError
35except NameError:
36 class WinErr(Exception):
37 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000038
Vadim Shtayura08049e22017-10-11 00:14:52 +000039class LockError(Exception):
40 pass
41
hinokadcd84042016-06-09 14:26:17 -070042class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000043 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
dnj4625b5a2016-11-10 18:23:26 -080045
46def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
47 sleep_time=0.25, printerr=None):
48 """Executes |fn| up to |count| times, backing off exponentially.
49
50 Args:
51 fn (callable): The function to execute. If this raises a handled
52 exception, the function will retry with exponential backoff.
53 excs (tuple): A tuple of Exception types to handle. If one of these is
54 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
55 that is not in this list, it will immediately pass through. If |excs|
56 is empty, the Exception base class will be used.
57 name (str): Optional operation name to print in the retry string.
58 count (int): The number of times to try before allowing the exception to
59 pass through.
60 sleep_time (float): The initial number of seconds to sleep in between
61 retries. This will be doubled each retry.
62 printerr (callable): Function that will be called with the error string upon
63 failures. If None, |logging.warning| will be used.
64
65 Returns: The return value of the successful fn.
66 """
67 printerr = printerr or logging.warning
68 for i in xrange(count):
69 try:
70 return fn()
71 except excs as e:
72 if (i+1) >= count:
73 raise
74
75 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
76 (name or 'operation'), sleep_time, (i+1), count, e))
77 time.sleep(sleep_time)
78 sleep_time *= 2
79
80
Vadim Shtayura08049e22017-10-11 00:14:52 +000081class Lockfile(object):
82 """Class to represent a cross-platform process-specific lockfile."""
83
84 def __init__(self, path, timeout=0):
85 self.path = os.path.abspath(path)
86 self.timeout = timeout
87 self.lockfile = self.path + ".lock"
88 self.pid = os.getpid()
89
90 def _read_pid(self):
91 """Read the pid stored in the lockfile.
92
93 Note: This method is potentially racy. By the time it returns the lockfile
94 may have been unlocked, removed, or stolen by some other process.
95 """
96 try:
97 with open(self.lockfile, 'r') as f:
98 pid = int(f.readline().strip())
99 except (IOError, ValueError):
100 pid = None
101 return pid
102
103 def _make_lockfile(self):
104 """Safely creates a lockfile containing the current pid."""
105 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
106 fd = os.open(self.lockfile, open_flags, 0o644)
107 f = os.fdopen(fd, 'w')
108 print(self.pid, file=f)
109 f.close()
110
111 def _remove_lockfile(self):
112 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
113
114 See gclient_utils.py:rmtree docstring for more explanation on the
115 windows case.
116 """
117 if sys.platform == 'win32':
118 lockfile = os.path.normcase(self.lockfile)
119
120 def delete():
121 exitcode = subprocess.call(['cmd.exe', '/c',
122 'del', '/f', '/q', lockfile])
123 if exitcode != 0:
124 raise LockError('Failed to remove lock: %s' % (lockfile,))
125 exponential_backoff_retry(
126 delete,
127 excs=(LockError,),
128 name='del [%s]' % (lockfile,))
129 else:
130 os.remove(self.lockfile)
131
132 def lock(self):
133 """Acquire the lock.
134
135 This will block with a deadline of self.timeout seconds.
136 """
137 elapsed = 0
138 while True:
139 try:
140 self._make_lockfile()
141 return
142 except OSError as e:
143 if elapsed < self.timeout:
144 sleep_time = max(10, min(3, self.timeout - elapsed))
145 logging.info('Could not create git cache lockfile; '
146 'will retry after sleep(%d).', sleep_time);
147 elapsed += sleep_time
148 time.sleep(sleep_time)
149 continue
150 if e.errno == errno.EEXIST:
151 raise LockError("%s is already locked" % self.path)
152 else:
153 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
154
155 def unlock(self):
156 """Release the lock."""
157 try:
158 if not self.is_locked():
159 raise LockError("%s is not locked" % self.path)
160 if not self.i_am_locking():
161 raise LockError("%s is locked, but not by me" % self.path)
162 self._remove_lockfile()
163 except WinErr:
164 # Windows is unreliable when it comes to file locking. YMMV.
165 pass
166
167 def break_lock(self):
168 """Remove the lock, even if it was created by someone else."""
169 try:
170 self._remove_lockfile()
171 return True
172 except OSError as exc:
173 if exc.errno == errno.ENOENT:
174 return False
175 else:
176 raise
177
178 def is_locked(self):
179 """Test if the file is locked by anyone.
180
181 Note: This method is potentially racy. By the time it returns the lockfile
182 may have been unlocked, removed, or stolen by some other process.
183 """
184 return os.path.exists(self.lockfile)
185
186 def i_am_locking(self):
187 """Test if the file is locked by this process."""
188 return self.is_locked() and self.pid == self._read_pid()
189
190
szager@chromium.org848fd492014-04-09 19:06:44 +0000191class Mirror(object):
192
193 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
194 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000195 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000196 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000197
Robert Iannuccia19649b2018-06-29 16:31:45 +0000198 UNSET_CACHEPATH = object()
199
200 # Used for tests
201 _GIT_CONFIG_LOCATION = []
202
szager@chromium.org66c8b852015-09-22 23:19:07 +0000203 @staticmethod
204 def parse_fetch_spec(spec):
205 """Parses and canonicalizes a fetch spec.
206
207 Returns (fetchspec, value_regex), where value_regex can be used
208 with 'git config --replace-all'.
209 """
210 parts = spec.split(':', 1)
211 src = parts[0].lstrip('+').rstrip('/')
212 if not src.startswith('refs/'):
213 src = 'refs/heads/%s' % src
214 dest = parts[1].rstrip('/') if len(parts) > 1 else src
215 regex = r'\+%s:.*' % src.replace('*', r'\*')
216 return ('+%s:%s' % (src, dest), regex)
217
szager@chromium.org848fd492014-04-09 19:06:44 +0000218 def __init__(self, url, refs=None, print_func=None):
219 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000220 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000221 self.basedir = self.UrlToCacheDir(url)
222 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000223 if print_func:
224 self.print = self.print_without_file
225 self.print_func = print_func
226 else:
227 self.print = print
228
dnj4625b5a2016-11-10 18:23:26 -0800229 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000230 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000231
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800232 @contextlib.contextmanager
233 def print_duration_of(self, what):
234 start = time.time()
235 try:
236 yield
237 finally:
238 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
239
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000240 @property
241 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700242 u = urlparse.urlparse(self.url)
243 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000244 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700245 elif u.netloc == 'chrome-internal.googlesource.com':
246 return 'chrome-git-cache'
247 # Not recognized.
248 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000249
szager@chromium.org174766f2014-05-13 21:27:46 +0000250 @classmethod
251 def FromPath(cls, path):
252 return cls(cls.CacheDirToUrl(path))
253
szager@chromium.org848fd492014-04-09 19:06:44 +0000254 @staticmethod
255 def UrlToCacheDir(url):
256 """Convert a git url to a normalized form for the cache dir path."""
257 parsed = urlparse.urlparse(url)
258 norm_url = parsed.netloc + parsed.path
259 if norm_url.endswith('.git'):
260 norm_url = norm_url[:-len('.git')]
261 return norm_url.replace('-', '--').replace('/', '-').lower()
262
263 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000264 def CacheDirToUrl(path):
265 """Convert a cache dir path to its corresponding url."""
266 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
267 return 'https://%s' % netpath
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @classmethod
270 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000271 with cls.cachepath_lock:
272 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000273
274 @classmethod
275 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000276 with cls.cachepath_lock:
277 if not hasattr(cls, 'cachepath'):
278 try:
279 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000280 [cls.git_exe, 'config'] +
281 cls._GIT_CONFIG_LOCATION +
282 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000283 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000284 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000285 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000286
287 ret = getattr(cls, 'cachepath')
288 if ret is cls.UNSET_CACHEPATH:
289 raise RuntimeError('No cache.cachepath git configuration or '
290 '$GIT_CACHE_PATH is set.')
291 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
dnj4625b5a2016-11-10 18:23:26 -0800293 def Rename(self, src, dst):
294 # This is somehow racy on Windows.
295 # Catching OSError because WindowsError isn't portable and
296 # pylint complains.
297 exponential_backoff_retry(
298 lambda: os.rename(src, dst),
299 excs=(OSError,),
300 name='rename [%s] => [%s]' % (src, dst),
301 printerr=self.print)
302
szager@chromium.org848fd492014-04-09 19:06:44 +0000303 def RunGit(self, cmd, **kwargs):
304 """Run git in a subprocess."""
305 cwd = kwargs.setdefault('cwd', self.mirror_path)
306 kwargs.setdefault('print_stdout', False)
307 kwargs.setdefault('filter_fn', self.print)
308 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
309 env.setdefault('GIT_ASKPASS', 'true')
310 env.setdefault('SSH_ASKPASS', 'true')
311 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
312 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
313
Edward Lemur579c9862018-07-13 23:17:51 +0000314 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000315 if cwd is None:
316 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000317
Edward Lemur579c9862018-07-13 23:17:51 +0000318 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000319 try:
320 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
321 except subprocess.CalledProcessError as e:
322 # If exit code was 5, it means we attempted to unset a config that
323 # didn't exist. Ignore it.
324 if e.returncode != 5:
325 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000326
szager@chromium.org301a7c32014-06-16 17:13:50 +0000327 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700328 try:
329 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
330 except subprocess.CalledProcessError:
331 # Hard error, need to clobber.
332 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000333
334 # Don't combine pack files into one big pack file. It's really slow for
335 # repositories, and there's no way to track progress and make sure it's
336 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700337 if self.supported_project():
338 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000339
340 # Allocate more RAM for cache-ing delta chains, for better performance
341 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000342 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000343 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000344
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000345 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000346 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000347 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000348 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000349 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000350 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000351 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000352
353 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800354 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000355
356 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
357 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700358 if not self.bootstrap_bucket:
359 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25 +0000360 python_fallback = (
361 (sys.platform.startswith('win') and
362 not gclient_utils.FindExecutable('7z')) or
363 (not gclient_utils.FindExecutable('unzip')) or
364 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
365 )
szager@chromium.org848fd492014-04-09 19:06:44 +0000366
367 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000368 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000369 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800370 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000371
372 def compare_filenames(a, b):
373 # |a| and |b| look like gs://.../.../9999.zip. They both have the same
374 # gs://bootstrap_bucket/basedir/ prefix because they come from the same
375 # `gsutil ls`.
376 # This function only compares the numeral parts before .zip.
377 regex_pattern = r'/(\d+)\.zip$'
378 match_a = re.search(regex_pattern, a)
379 match_b = re.search(regex_pattern, b)
380 if (match_a is not None) and (match_b is not None):
381 num_a = int(match_a.group(1))
382 num_b = int(match_b.group(1))
383 return cmp(num_a, num_b)
384 # If it doesn't match the format, fallback to string comparison.
385 return cmp(a, b)
386
387 ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
szager@chromium.org848fd492014-04-09 19:06:44 +0000388 if not ls_out_sorted:
389 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800390 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
391 (self.mirror_path, self.bootstrap_bucket,
392 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000393 return False
394 latest_checkout = ls_out_sorted[-1]
395
396 # Download zip file to a temporary directory.
397 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000398 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000399 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800400 with self.print_duration_of('download'):
401 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000402 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000403 return False
404 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
405
hinoka@google.com776a2c32014-04-25 07:54:25 +0000406 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800407 with self.print_duration_of('unzip'):
408 if not python_fallback:
409 if sys.platform.startswith('win'):
410 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
411 else:
412 cmd = ['unzip', filename, '-d', directory]
413 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000414 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800415 try:
416 with zipfile.ZipFile(filename, 'r') as f:
417 f.printdir()
418 f.extractall(directory)
419 except Exception as e:
420 self.print('Encountered error: %s' % str(e), file=sys.stderr)
421 retcode = 1
422 else:
423 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000424 finally:
425 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800426 #
427 # This is somehow racy on Windows.
428 # Catching OSError because WindowsError isn't portable and
429 # pylint complains.
430 exponential_backoff_retry(
431 lambda: gclient_utils.rm_file_or_tree(tempdir),
432 excs=(OSError,),
433 name='rmtree [%s]' % (tempdir,),
434 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000435
436 if retcode:
437 self.print(
438 'Extracting bootstrap zipfile %s failed.\n'
439 'Resuming normal operations.' % filename)
440 return False
441 return True
442
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800443 def contains_revision(self, revision):
444 if not self.exists():
445 return False
446
447 if sys.platform.startswith('win'):
448 # Windows .bat scripts use ^ as escape sequence, which means we have to
449 # escape it with itself for every .bat invocation.
450 needle = '%s^^^^{commit}' % revision
451 else:
452 needle = '%s^{commit}' % revision
453 try:
454 # cat-file exits with 0 on success, that is git object of given hash was
455 # found.
456 self.RunGit(['cat-file', '-e', needle])
457 return True
458 except subprocess.CalledProcessError:
459 return False
460
szager@chromium.org848fd492014-04-09 19:06:44 +0000461 def exists(self):
462 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
463
Ryan Tseng3beabd02017-03-15 13:57:58 -0700464 def supported_project(self):
465 """Returns true if this repo is known to have a bootstrap zip file."""
466 u = urlparse.urlparse(self.url)
467 return u.netloc in [
468 'chromium.googlesource.com',
469 'chrome-internal.googlesource.com']
470
szager@chromium.org66c8b852015-09-22 23:19:07 +0000471 def _preserve_fetchspec(self):
472 """Read and preserve remote.origin.fetch from an existing mirror.
473
474 This modifies self.fetch_specs.
475 """
476 if not self.exists():
477 return
478 try:
479 config_fetchspecs = subprocess.check_output(
480 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
481 cwd=self.mirror_path)
482 for fetchspec in config_fetchspecs.splitlines():
483 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
484 except subprocess.CalledProcessError:
485 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
486 'existing cache directory. You may need to manually edit '
487 '%s and "git cache fetch" again.'
488 % os.path.join(self.mirror_path, 'config'))
489
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000490 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
491 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000492 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
493 pack_files = []
494
495 if os.path.isdir(pack_dir):
496 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800497 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
498 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000499
500 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000501 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000502 len(pack_files) > GC_AUTOPACKLIMIT)
503 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000504 if self.exists():
505 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
506 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000507 tempdir = tempfile.mkdtemp(
508 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
509 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
510 if bootstrapped:
511 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000512 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700513 elif not self.exists() or not self.supported_project():
514 # Bootstrap failed due to either
515 # 1. No previous cache
516 # 2. Project doesn't have a bootstrap zip file
517 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000518 self.RunGit(['init', '--bare'], cwd=tempdir)
519 else:
520 # Bootstrap failed, previous cache exists; warn and continue.
521 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800522 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
523 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000524 % len(pack_files))
525 gclient_utils.rmtree(tempdir)
526 tempdir = None
527 else:
528 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
529 logging.warn(
530 'Shallow fetch requested, but repo cache already exists.')
531 return tempdir
532
Edward Lemur579c9862018-07-13 23:17:51 +0000533 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
534 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000535 v = []
536 d = []
537 if verbose:
538 v = ['-v', '--progress']
539 if depth:
540 d = ['--depth', str(depth)]
541 fetch_cmd = ['fetch'] + v + d + ['origin']
542 fetch_specs = subprocess.check_output(
543 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
544 cwd=rundir).strip().splitlines()
545 for spec in fetch_specs:
546 try:
547 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800548 with self.print_duration_of('fetch %s' % spec):
549 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000550 except subprocess.CalledProcessError:
551 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700552 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000553 logging.warn('Fetch of %s failed' % spec)
554
Vadim Shtayura08049e22017-10-11 00:14:52 +0000555 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000556 verbose=False, ignore_lock=False, lock_timeout=0,
557 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000558 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000559 if shallow and not depth:
560 depth = 10000
561 gclient_utils.safe_makedirs(self.GetCachePath())
562
Vadim Shtayura08049e22017-10-11 00:14:52 +0000563 lockfile = Lockfile(self.mirror_path, lock_timeout)
564 if not ignore_lock:
565 lockfile.lock()
566
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000567 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000568 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000569 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000570 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000571 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700572 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000573 # This is a major failure, we need to clean and force a bootstrap.
574 gclient_utils.rmtree(rundir)
575 self.print(GIT_CACHE_CORRUPT_MESSAGE)
576 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
577 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000578 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000579 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000580 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800581 if os.path.exists(self.mirror_path):
582 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800583 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000584 if not ignore_lock:
585 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000586
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000587 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000588 # The files are named <git number>.zip
589 gen_number = subprocess.check_output(
590 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000591 # Run Garbage Collect to compress packfile.
592 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000593 # Creating a temp file and then deleting it ensures we can use this name.
594 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
595 os.remove(tmp_zipfile)
596 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
597 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000598 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
599 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000600 gsutil.call('cp', tmp_zipfile, dest_name)
601 os.remove(tmp_zipfile)
602
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000603 # Remove all other files in the same directory.
604 if prune:
605 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
606 for filename in ls_out.splitlines():
607 if filename == dest_name:
608 continue
609 gsutil.call('rm', filename)
610
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000611 @staticmethod
612 def DeleteTmpPackFiles(path):
613 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000614 if not os.path.isdir(pack_dir):
615 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000616 pack_files = [f for f in os.listdir(pack_dir) if
617 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
618 for f in pack_files:
619 f = os.path.join(pack_dir, f)
620 try:
621 os.remove(f)
622 logging.warn('Deleted stale temporary pack file %s' % f)
623 except OSError:
624 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000625
Vadim Shtayura08049e22017-10-11 00:14:52 +0000626 @classmethod
627 def BreakLocks(cls, path):
628 did_unlock = False
629 lf = Lockfile(path)
630 if lf.break_lock():
631 did_unlock = True
632 # Look for lock files that might have been left behind by an interrupted
633 # git process.
634 lf = os.path.join(path, 'config.lock')
635 if os.path.exists(lf):
636 os.remove(lf)
637 did_unlock = True
638 cls.DeleteTmpPackFiles(path)
639 return did_unlock
640
641 def unlock(self):
642 return self.BreakLocks(self.mirror_path)
643
644 @classmethod
645 def UnlockAll(cls):
646 cachepath = cls.GetCachePath()
647 if not cachepath:
648 return
649 dirlist = os.listdir(cachepath)
650 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
651 if os.path.isdir(os.path.join(cachepath, path))])
652 for dirent in dirlist:
653 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
654 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
655 elif (dirent.endswith('.lock') and
656 os.path.isfile(os.path.join(cachepath, dirent))):
657 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
658
659 unlocked_repos = []
660 for repo_dir in repo_dirs:
661 if cls.BreakLocks(repo_dir):
662 unlocked_repos.append(repo_dir)
663
664 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000665
agable@chromium.org5a306a22014-02-24 22:13:59 +0000666@subcommand.usage('[url of repo to check for caching]')
667def CMDexists(parser, args):
668 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000669 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000670 if not len(args) == 1:
671 parser.error('git cache exists only takes exactly one repo url.')
672 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000673 mirror = Mirror(url)
674 if mirror.exists():
675 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000676 return 0
677 return 1
678
679
hinoka@google.com563559c2014-04-02 00:36:24 +0000680@subcommand.usage('[url of repo to create a bootstrap zip file]')
681def CMDupdate_bootstrap(parser, args):
682 """Create and uploads a bootstrap tarball."""
683 # Lets just assert we can't do this on Windows.
684 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000685 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000686 return 1
687
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000688 parser.add_option('--prune', action='store_true',
689 help='Prune all other cached zipballs of the same repo.')
690
hinoka@google.com563559c2014-04-02 00:36:24 +0000691 # First, we need to ensure the cache is populated.
692 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000693 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000694 CMDpopulate(parser, populate_args)
695
696 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000697 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000698 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000699 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000700 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000701 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000702
703
agable@chromium.org5a306a22014-02-24 22:13:59 +0000704@subcommand.usage('[url of repo to add to or update in cache]')
705def CMDpopulate(parser, args):
706 """Ensure that the cache has all up-to-date objects for the given repo."""
707 parser.add_option('--depth', type='int',
708 help='Only cache DEPTH commits of history')
709 parser.add_option('--shallow', '-s', action='store_true',
710 help='Only cache 10000 commits of history')
711 parser.add_option('--ref', action='append',
712 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000713 parser.add_option('--no_bootstrap', '--no-bootstrap',
714 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000715 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000716 parser.add_option('--ignore_locks', '--ignore-locks',
717 action='store_true',
718 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000719 parser.add_option('--reset-fetch-config', action='store_true', default=False,
720 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000721
agable@chromium.org5a306a22014-02-24 22:13:59 +0000722 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000723 if not len(args) == 1:
724 parser.error('git cache populate only takes exactly one repo url.')
725 url = args[0]
726
szager@chromium.org848fd492014-04-09 19:06:44 +0000727 mirror = Mirror(url, refs=options.ref)
728 kwargs = {
729 'verbose': options.verbose,
730 'shallow': options.shallow,
731 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000732 'ignore_lock': options.ignore_locks,
733 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000734 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000735 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000736 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000737 kwargs['depth'] = options.depth
738 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000739
740
szager@chromium.orgf3145112014-08-07 21:02:36 +0000741@subcommand.usage('Fetch new commits into cache and current checkout')
742def CMDfetch(parser, args):
743 """Update mirror, and fetch in cwd."""
744 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000745 parser.add_option('--no_bootstrap', '--no-bootstrap',
746 action='store_true',
747 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000748 options, args = parser.parse_args(args)
749
750 # Figure out which remotes to fetch. This mimics the behavior of regular
751 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
752 # this will NOT try to traverse up the branching structure to find the
753 # ultimate remote to update.
754 remotes = []
755 if options.all:
756 assert not args, 'fatal: fetch --all does not take a repository argument'
757 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
758 elif args:
759 remotes = args
760 else:
761 current_branch = subprocess.check_output(
762 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
763 if current_branch != 'HEAD':
764 upstream = subprocess.check_output(
765 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
766 ).strip()
767 if upstream and upstream != '.':
768 remotes = [upstream]
769 if not remotes:
770 remotes = ['origin']
771
772 cachepath = Mirror.GetCachePath()
773 git_dir = os.path.abspath(subprocess.check_output(
774 [Mirror.git_exe, 'rev-parse', '--git-dir']))
775 git_dir = os.path.abspath(git_dir)
776 if git_dir.startswith(cachepath):
777 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000778 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000779 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000780 return 0
781 for remote in remotes:
782 remote_url = subprocess.check_output(
783 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
784 if remote_url.startswith(cachepath):
785 mirror = Mirror.FromPath(remote_url)
786 mirror.print = lambda *args: None
787 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000788 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000789 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000790 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
791 return 0
792
793
Vadim Shtayura08049e22017-10-11 00:14:52 +0000794@subcommand.usage('[url of repo to unlock, or -a|--all]')
795def CMDunlock(parser, args):
796 """Unlock one or all repos if their lock files are still around."""
797 parser.add_option('--force', '-f', action='store_true',
798 help='Actually perform the action')
799 parser.add_option('--all', '-a', action='store_true',
800 help='Unlock all repository caches')
801 options, args = parser.parse_args(args)
802 if len(args) > 1 or (len(args) == 0 and not options.all):
803 parser.error('git cache unlock takes exactly one repo url, or --all')
804
805 if not options.force:
806 cachepath = Mirror.GetCachePath()
807 lockfiles = [os.path.join(cachepath, path)
808 for path in os.listdir(cachepath)
809 if path.endswith('.lock') and os.path.isfile(path)]
810 parser.error('git cache unlock requires -f|--force to do anything. '
811 'Refusing to unlock the following repo caches: '
812 ', '.join(lockfiles))
813
814 unlocked_repos = []
815 if options.all:
816 unlocked_repos.extend(Mirror.UnlockAll())
817 else:
818 m = Mirror(args[0])
819 if m.unlock():
820 unlocked_repos.append(m.mirror_path)
821
822 if unlocked_repos:
823 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
824 unlocked_repos))
825
826
agable@chromium.org5a306a22014-02-24 22:13:59 +0000827class OptionParser(optparse.OptionParser):
828 """Wrapper class for OptionParser to handle global options."""
829
830 def __init__(self, *args, **kwargs):
831 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
832 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000833 help=(
834 'Path to the directory containing the caches. Normally '
835 'deduced from git config cache.cachepath or '
836 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000837 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000838 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000839 self.add_option('-q', '--quiet', action='store_true',
840 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000841 self.add_option('--timeout', type='int', default=0,
842 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000843
844 def parse_args(self, args=None, values=None):
845 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000846 if options.quiet:
847 options.verbose = 0
848
849 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
850 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000851
852 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000853 global_cache_dir = Mirror.GetCachePath()
854 except RuntimeError:
855 global_cache_dir = None
856 if options.cache_dir:
857 if global_cache_dir and (
858 os.path.abspath(options.cache_dir) !=
859 os.path.abspath(global_cache_dir)):
860 logging.warn('Overriding globally-configured cache directory.')
861 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000862
agable@chromium.org5a306a22014-02-24 22:13:59 +0000863 return options, args
864
865
866def main(argv):
867 dispatcher = subcommand.CommandDispatcher(__name__)
868 return dispatcher.execute(OptionParser(), argv)
869
870
871if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000872 try:
873 sys.exit(main(sys.argv[1:]))
874 except KeyboardInterrupt:
875 sys.stderr.write('interrupted\n')
876 sys.exit(1)