blob: f17f466da8f2e388dff12f4b5583885658d90e41 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000016import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000017import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000018import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000019import subprocess
20import sys
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700248 u = urlparse.urlparse(self.url)
249 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000250 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 elif u.netloc == 'chrome-internal.googlesource.com':
252 return 'chrome-git-cache'
253 # Not recognized.
254 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000255
szager@chromium.org174766f2014-05-13 21:27:46 +0000256 @classmethod
257 def FromPath(cls, path):
258 return cls(cls.CacheDirToUrl(path))
259
szager@chromium.org848fd492014-04-09 19:06:44 +0000260 @staticmethod
261 def UrlToCacheDir(url):
262 """Convert a git url to a normalized form for the cache dir path."""
263 parsed = urlparse.urlparse(url)
264 norm_url = parsed.netloc + parsed.path
265 if norm_url.endswith('.git'):
266 norm_url = norm_url[:-len('.git')]
267 return norm_url.replace('-', '--').replace('/', '-').lower()
268
269 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000270 def CacheDirToUrl(path):
271 """Convert a cache dir path to its corresponding url."""
272 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
273 return 'https://%s' % netpath
274
szager@chromium.org848fd492014-04-09 19:06:44 +0000275 @classmethod
276 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000277 with cls.cachepath_lock:
278 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000279
280 @classmethod
281 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000282 with cls.cachepath_lock:
283 if not hasattr(cls, 'cachepath'):
284 try:
285 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000286 [cls.git_exe, 'config'] +
287 cls._GIT_CONFIG_LOCATION +
288 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000289 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000290 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000291 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000292
293 ret = getattr(cls, 'cachepath')
294 if ret is cls.UNSET_CACHEPATH:
295 raise RuntimeError('No cache.cachepath git configuration or '
296 '$GIT_CACHE_PATH is set.')
297 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000298
dnj4625b5a2016-11-10 18:23:26 -0800299 def Rename(self, src, dst):
300 # This is somehow racy on Windows.
301 # Catching OSError because WindowsError isn't portable and
302 # pylint complains.
303 exponential_backoff_retry(
304 lambda: os.rename(src, dst),
305 excs=(OSError,),
306 name='rename [%s] => [%s]' % (src, dst),
307 printerr=self.print)
308
szager@chromium.org848fd492014-04-09 19:06:44 +0000309 def RunGit(self, cmd, **kwargs):
310 """Run git in a subprocess."""
311 cwd = kwargs.setdefault('cwd', self.mirror_path)
312 kwargs.setdefault('print_stdout', False)
313 kwargs.setdefault('filter_fn', self.print)
314 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
315 env.setdefault('GIT_ASKPASS', 'true')
316 env.setdefault('SSH_ASKPASS', 'true')
317 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
318 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
319
Edward Lemur579c9862018-07-13 23:17:51 +0000320 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000321 if cwd is None:
322 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000323
Edward Lemur579c9862018-07-13 23:17:51 +0000324 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000325 try:
326 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
327 except subprocess.CalledProcessError as e:
328 # If exit code was 5, it means we attempted to unset a config that
329 # didn't exist. Ignore it.
330 if e.returncode != 5:
331 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000332
szager@chromium.org301a7c32014-06-16 17:13:50 +0000333 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700334 try:
335 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
336 except subprocess.CalledProcessError:
337 # Hard error, need to clobber.
338 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000339
340 # Don't combine pack files into one big pack file. It's really slow for
341 # repositories, and there's no way to track progress and make sure it's
342 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700343 if self.supported_project():
344 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000345
346 # Allocate more RAM for cache-ing delta chains, for better performance
347 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000348 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000349 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000350
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000351 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000352 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000353 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000354 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000355 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000356 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000357 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000358
359 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800360 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000361
362 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
363 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700364 if not self.bootstrap_bucket:
365 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25 +0000366 python_fallback = (
367 (sys.platform.startswith('win') and
368 not gclient_utils.FindExecutable('7z')) or
369 (not gclient_utils.FindExecutable('unzip')) or
370 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
371 )
szager@chromium.org848fd492014-04-09 19:06:44 +0000372
373 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000374 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000375 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800376 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000377
378 def compare_filenames(a, b):
379 # |a| and |b| look like gs://.../.../9999.zip. They both have the same
380 # gs://bootstrap_bucket/basedir/ prefix because they come from the same
381 # `gsutil ls`.
382 # This function only compares the numeral parts before .zip.
383 regex_pattern = r'/(\d+)\.zip$'
384 match_a = re.search(regex_pattern, a)
385 match_b = re.search(regex_pattern, b)
386 if (match_a is not None) and (match_b is not None):
387 num_a = int(match_a.group(1))
388 num_b = int(match_b.group(1))
389 return cmp(num_a, num_b)
390 # If it doesn't match the format, fallback to string comparison.
391 return cmp(a, b)
392
393 ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
szager@chromium.org848fd492014-04-09 19:06:44 +0000394 if not ls_out_sorted:
395 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800396 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
397 (self.mirror_path, self.bootstrap_bucket,
398 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000399 return False
400 latest_checkout = ls_out_sorted[-1]
401
402 # Download zip file to a temporary directory.
403 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000404 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000405 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800406 with self.print_duration_of('download'):
407 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000408 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000409 return False
410 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
411
hinoka@google.com776a2c32014-04-25 07:54:25 +0000412 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800413 with self.print_duration_of('unzip'):
414 if not python_fallback:
415 if sys.platform.startswith('win'):
416 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
417 else:
418 cmd = ['unzip', filename, '-d', directory]
419 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000420 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800421 try:
422 with zipfile.ZipFile(filename, 'r') as f:
423 f.printdir()
424 f.extractall(directory)
425 except Exception as e:
426 self.print('Encountered error: %s' % str(e), file=sys.stderr)
427 retcode = 1
428 else:
429 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000430 finally:
431 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800432 #
433 # This is somehow racy on Windows.
434 # Catching OSError because WindowsError isn't portable and
435 # pylint complains.
436 exponential_backoff_retry(
437 lambda: gclient_utils.rm_file_or_tree(tempdir),
438 excs=(OSError,),
439 name='rmtree [%s]' % (tempdir,),
440 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000441
442 if retcode:
443 self.print(
444 'Extracting bootstrap zipfile %s failed.\n'
445 'Resuming normal operations.' % filename)
446 return False
447 return True
448
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800449 def contains_revision(self, revision):
450 if not self.exists():
451 return False
452
453 if sys.platform.startswith('win'):
454 # Windows .bat scripts use ^ as escape sequence, which means we have to
455 # escape it with itself for every .bat invocation.
456 needle = '%s^^^^{commit}' % revision
457 else:
458 needle = '%s^{commit}' % revision
459 try:
460 # cat-file exits with 0 on success, that is git object of given hash was
461 # found.
462 self.RunGit(['cat-file', '-e', needle])
463 return True
464 except subprocess.CalledProcessError:
465 return False
466
szager@chromium.org848fd492014-04-09 19:06:44 +0000467 def exists(self):
468 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
469
Ryan Tseng3beabd02017-03-15 13:57:58 -0700470 def supported_project(self):
471 """Returns true if this repo is known to have a bootstrap zip file."""
472 u = urlparse.urlparse(self.url)
473 return u.netloc in [
474 'chromium.googlesource.com',
475 'chrome-internal.googlesource.com']
476
szager@chromium.org66c8b852015-09-22 23:19:07 +0000477 def _preserve_fetchspec(self):
478 """Read and preserve remote.origin.fetch from an existing mirror.
479
480 This modifies self.fetch_specs.
481 """
482 if not self.exists():
483 return
484 try:
485 config_fetchspecs = subprocess.check_output(
486 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
487 cwd=self.mirror_path)
488 for fetchspec in config_fetchspecs.splitlines():
489 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
490 except subprocess.CalledProcessError:
491 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
492 'existing cache directory. You may need to manually edit '
493 '%s and "git cache fetch" again.'
494 % os.path.join(self.mirror_path, 'config'))
495
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000496 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
497 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000498 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
499 pack_files = []
500
501 if os.path.isdir(pack_dir):
502 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800503 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
504 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000505
506 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000507 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000508 len(pack_files) > GC_AUTOPACKLIMIT)
509 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000510 if self.exists():
511 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
512 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000513 tempdir = tempfile.mkdtemp(
514 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
515 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
516 if bootstrapped:
517 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000518 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700519 elif not self.exists() or not self.supported_project():
520 # Bootstrap failed due to either
521 # 1. No previous cache
522 # 2. Project doesn't have a bootstrap zip file
523 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000524 self.RunGit(['init', '--bare'], cwd=tempdir)
525 else:
526 # Bootstrap failed, previous cache exists; warn and continue.
527 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800528 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
529 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000530 % len(pack_files))
531 gclient_utils.rmtree(tempdir)
532 tempdir = None
533 else:
534 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
535 logging.warn(
536 'Shallow fetch requested, but repo cache already exists.')
537 return tempdir
538
Edward Lemur579c9862018-07-13 23:17:51 +0000539 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
540 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000541 v = []
542 d = []
543 if verbose:
544 v = ['-v', '--progress']
545 if depth:
546 d = ['--depth', str(depth)]
547 fetch_cmd = ['fetch'] + v + d + ['origin']
548 fetch_specs = subprocess.check_output(
549 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
550 cwd=rundir).strip().splitlines()
551 for spec in fetch_specs:
552 try:
553 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800554 with self.print_duration_of('fetch %s' % spec):
555 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000556 except subprocess.CalledProcessError:
557 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700558 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000559 logging.warn('Fetch of %s failed' % spec)
560
Vadim Shtayura08049e22017-10-11 00:14:52 +0000561 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000562 verbose=False, ignore_lock=False, lock_timeout=0,
563 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000564 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000565 if shallow and not depth:
566 depth = 10000
567 gclient_utils.safe_makedirs(self.GetCachePath())
568
Vadim Shtayura08049e22017-10-11 00:14:52 +0000569 lockfile = Lockfile(self.mirror_path, lock_timeout)
570 if not ignore_lock:
571 lockfile.lock()
572
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000573 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000574 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000575 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000576 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000577 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700578 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000579 # This is a major failure, we need to clean and force a bootstrap.
580 gclient_utils.rmtree(rundir)
581 self.print(GIT_CACHE_CORRUPT_MESSAGE)
582 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
583 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000584 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000585 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000586 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800587 if os.path.exists(self.mirror_path):
588 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800589 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000590 if not ignore_lock:
591 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000592
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000593 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000594 # The files are named <git number>.zip
595 gen_number = subprocess.check_output(
596 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000597 # Run Garbage Collect to compress packfile.
598 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000599 # Creating a temp file and then deleting it ensures we can use this name.
600 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
601 os.remove(tmp_zipfile)
602 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
603 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000604 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
605 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000606 gsutil.call('cp', tmp_zipfile, dest_name)
607 os.remove(tmp_zipfile)
608
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000609 # Remove all other files in the same directory.
610 if prune:
611 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
612 for filename in ls_out.splitlines():
613 if filename == dest_name:
614 continue
615 gsutil.call('rm', filename)
616
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000617 @staticmethod
618 def DeleteTmpPackFiles(path):
619 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000620 if not os.path.isdir(pack_dir):
621 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000622 pack_files = [f for f in os.listdir(pack_dir) if
623 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
624 for f in pack_files:
625 f = os.path.join(pack_dir, f)
626 try:
627 os.remove(f)
628 logging.warn('Deleted stale temporary pack file %s' % f)
629 except OSError:
630 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000631
Vadim Shtayura08049e22017-10-11 00:14:52 +0000632 @classmethod
633 def BreakLocks(cls, path):
634 did_unlock = False
635 lf = Lockfile(path)
636 if lf.break_lock():
637 did_unlock = True
638 # Look for lock files that might have been left behind by an interrupted
639 # git process.
640 lf = os.path.join(path, 'config.lock')
641 if os.path.exists(lf):
642 os.remove(lf)
643 did_unlock = True
644 cls.DeleteTmpPackFiles(path)
645 return did_unlock
646
647 def unlock(self):
648 return self.BreakLocks(self.mirror_path)
649
650 @classmethod
651 def UnlockAll(cls):
652 cachepath = cls.GetCachePath()
653 if not cachepath:
654 return
655 dirlist = os.listdir(cachepath)
656 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
657 if os.path.isdir(os.path.join(cachepath, path))])
658 for dirent in dirlist:
659 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
660 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
661 elif (dirent.endswith('.lock') and
662 os.path.isfile(os.path.join(cachepath, dirent))):
663 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
664
665 unlocked_repos = []
666 for repo_dir in repo_dirs:
667 if cls.BreakLocks(repo_dir):
668 unlocked_repos.append(repo_dir)
669
670 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000671
agable@chromium.org5a306a22014-02-24 22:13:59 +0000672@subcommand.usage('[url of repo to check for caching]')
673def CMDexists(parser, args):
674 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000675 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000676 if not len(args) == 1:
677 parser.error('git cache exists only takes exactly one repo url.')
678 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000679 mirror = Mirror(url)
680 if mirror.exists():
681 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000682 return 0
683 return 1
684
685
hinoka@google.com563559c2014-04-02 00:36:24 +0000686@subcommand.usage('[url of repo to create a bootstrap zip file]')
687def CMDupdate_bootstrap(parser, args):
688 """Create and uploads a bootstrap tarball."""
689 # Lets just assert we can't do this on Windows.
690 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000691 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000692 return 1
693
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000694 parser.add_option('--prune', action='store_true',
695 help='Prune all other cached zipballs of the same repo.')
696
hinoka@google.com563559c2014-04-02 00:36:24 +0000697 # First, we need to ensure the cache is populated.
698 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000699 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000700 CMDpopulate(parser, populate_args)
701
702 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000703 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000704 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000705 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000706 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000707 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000708
709
agable@chromium.org5a306a22014-02-24 22:13:59 +0000710@subcommand.usage('[url of repo to add to or update in cache]')
711def CMDpopulate(parser, args):
712 """Ensure that the cache has all up-to-date objects for the given repo."""
713 parser.add_option('--depth', type='int',
714 help='Only cache DEPTH commits of history')
715 parser.add_option('--shallow', '-s', action='store_true',
716 help='Only cache 10000 commits of history')
717 parser.add_option('--ref', action='append',
718 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000719 parser.add_option('--no_bootstrap', '--no-bootstrap',
720 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000721 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000722 parser.add_option('--ignore_locks', '--ignore-locks',
723 action='store_true',
724 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000725 parser.add_option('--reset-fetch-config', action='store_true', default=False,
726 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000727
agable@chromium.org5a306a22014-02-24 22:13:59 +0000728 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000729 if not len(args) == 1:
730 parser.error('git cache populate only takes exactly one repo url.')
731 url = args[0]
732
szager@chromium.org848fd492014-04-09 19:06:44 +0000733 mirror = Mirror(url, refs=options.ref)
734 kwargs = {
735 'verbose': options.verbose,
736 'shallow': options.shallow,
737 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000738 'ignore_lock': options.ignore_locks,
739 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000740 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000741 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000742 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000743 kwargs['depth'] = options.depth
744 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000745
746
szager@chromium.orgf3145112014-08-07 21:02:36 +0000747@subcommand.usage('Fetch new commits into cache and current checkout')
748def CMDfetch(parser, args):
749 """Update mirror, and fetch in cwd."""
750 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000751 parser.add_option('--no_bootstrap', '--no-bootstrap',
752 action='store_true',
753 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000754 options, args = parser.parse_args(args)
755
756 # Figure out which remotes to fetch. This mimics the behavior of regular
757 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
758 # this will NOT try to traverse up the branching structure to find the
759 # ultimate remote to update.
760 remotes = []
761 if options.all:
762 assert not args, 'fatal: fetch --all does not take a repository argument'
763 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
764 elif args:
765 remotes = args
766 else:
767 current_branch = subprocess.check_output(
768 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
769 if current_branch != 'HEAD':
770 upstream = subprocess.check_output(
771 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
772 ).strip()
773 if upstream and upstream != '.':
774 remotes = [upstream]
775 if not remotes:
776 remotes = ['origin']
777
778 cachepath = Mirror.GetCachePath()
779 git_dir = os.path.abspath(subprocess.check_output(
780 [Mirror.git_exe, 'rev-parse', '--git-dir']))
781 git_dir = os.path.abspath(git_dir)
782 if git_dir.startswith(cachepath):
783 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000784 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000785 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000786 return 0
787 for remote in remotes:
788 remote_url = subprocess.check_output(
789 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
790 if remote_url.startswith(cachepath):
791 mirror = Mirror.FromPath(remote_url)
792 mirror.print = lambda *args: None
793 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000794 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000795 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000796 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
797 return 0
798
799
Vadim Shtayura08049e22017-10-11 00:14:52 +0000800@subcommand.usage('[url of repo to unlock, or -a|--all]')
801def CMDunlock(parser, args):
802 """Unlock one or all repos if their lock files are still around."""
803 parser.add_option('--force', '-f', action='store_true',
804 help='Actually perform the action')
805 parser.add_option('--all', '-a', action='store_true',
806 help='Unlock all repository caches')
807 options, args = parser.parse_args(args)
808 if len(args) > 1 or (len(args) == 0 and not options.all):
809 parser.error('git cache unlock takes exactly one repo url, or --all')
810
811 if not options.force:
812 cachepath = Mirror.GetCachePath()
813 lockfiles = [os.path.join(cachepath, path)
814 for path in os.listdir(cachepath)
815 if path.endswith('.lock') and os.path.isfile(path)]
816 parser.error('git cache unlock requires -f|--force to do anything. '
817 'Refusing to unlock the following repo caches: '
818 ', '.join(lockfiles))
819
820 unlocked_repos = []
821 if options.all:
822 unlocked_repos.extend(Mirror.UnlockAll())
823 else:
824 m = Mirror(args[0])
825 if m.unlock():
826 unlocked_repos.append(m.mirror_path)
827
828 if unlocked_repos:
829 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
830 unlocked_repos))
831
832
agable@chromium.org5a306a22014-02-24 22:13:59 +0000833class OptionParser(optparse.OptionParser):
834 """Wrapper class for OptionParser to handle global options."""
835
836 def __init__(self, *args, **kwargs):
837 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
838 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000839 help=(
840 'Path to the directory containing the caches. Normally '
841 'deduced from git config cache.cachepath or '
842 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000843 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000844 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000845 self.add_option('-q', '--quiet', action='store_true',
846 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000847 self.add_option('--timeout', type='int', default=0,
848 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000849
850 def parse_args(self, args=None, values=None):
851 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000852 if options.quiet:
853 options.verbose = 0
854
855 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
856 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000857
858 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000859 global_cache_dir = Mirror.GetCachePath()
860 except RuntimeError:
861 global_cache_dir = None
862 if options.cache_dir:
863 if global_cache_dir and (
864 os.path.abspath(options.cache_dir) !=
865 os.path.abspath(global_cache_dir)):
866 logging.warn('Overriding globally-configured cache directory.')
867 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000868
agable@chromium.org5a306a22014-02-24 22:13:59 +0000869 return options, args
870
871
872def main(argv):
873 dispatcher = subcommand.CommandDispatcher(__name__)
874 return dispatcher.execute(OptionParser(), argv)
875
876
877if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000878 try:
879 sys.exit(main(sys.argv[1:]))
880 except KeyboardInterrupt:
881 sys.stderr.write('interrupted\n')
882 sys.exit(1)