blob: 02f8209c6c233be356f59dac5c1ec2750e2cd03f [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -08009import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000010import errno
11import logging
12import optparse
13import os
szager@chromium.org174766f2014-05-13 21:27:46 +000014import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000016import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000017import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import subprocess
19import sys
20import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000021import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000022
hinoka@google.com563559c2014-04-02 00:36:24 +000023from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000024import gclient_utils
25import subcommand
26
szager@chromium.org301a7c32014-06-16 17:13:50 +000027# Analogous to gc.autopacklimit git config.
28GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090029
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000030GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
31
szager@chromium.org848fd492014-04-09 19:06:44 +000032try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080033 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000034 WinErr = WindowsError
35except NameError:
36 class WinErr(Exception):
37 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000038
Vadim Shtayura08049e22017-10-11 00:14:52 +000039class LockError(Exception):
40 pass
41
hinokadcd84042016-06-09 14:26:17 -070042class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000043 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
dnj4625b5a2016-11-10 18:23:26 -080045
46def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
47 sleep_time=0.25, printerr=None):
48 """Executes |fn| up to |count| times, backing off exponentially.
49
50 Args:
51 fn (callable): The function to execute. If this raises a handled
52 exception, the function will retry with exponential backoff.
53 excs (tuple): A tuple of Exception types to handle. If one of these is
54 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
55 that is not in this list, it will immediately pass through. If |excs|
56 is empty, the Exception base class will be used.
57 name (str): Optional operation name to print in the retry string.
58 count (int): The number of times to try before allowing the exception to
59 pass through.
60 sleep_time (float): The initial number of seconds to sleep in between
61 retries. This will be doubled each retry.
62 printerr (callable): Function that will be called with the error string upon
63 failures. If None, |logging.warning| will be used.
64
65 Returns: The return value of the successful fn.
66 """
67 printerr = printerr or logging.warning
68 for i in xrange(count):
69 try:
70 return fn()
71 except excs as e:
72 if (i+1) >= count:
73 raise
74
75 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
76 (name or 'operation'), sleep_time, (i+1), count, e))
77 time.sleep(sleep_time)
78 sleep_time *= 2
79
80
Vadim Shtayura08049e22017-10-11 00:14:52 +000081class Lockfile(object):
82 """Class to represent a cross-platform process-specific lockfile."""
83
84 def __init__(self, path, timeout=0):
85 self.path = os.path.abspath(path)
86 self.timeout = timeout
87 self.lockfile = self.path + ".lock"
88 self.pid = os.getpid()
89
90 def _read_pid(self):
91 """Read the pid stored in the lockfile.
92
93 Note: This method is potentially racy. By the time it returns the lockfile
94 may have been unlocked, removed, or stolen by some other process.
95 """
96 try:
97 with open(self.lockfile, 'r') as f:
98 pid = int(f.readline().strip())
99 except (IOError, ValueError):
100 pid = None
101 return pid
102
103 def _make_lockfile(self):
104 """Safely creates a lockfile containing the current pid."""
105 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
106 fd = os.open(self.lockfile, open_flags, 0o644)
107 f = os.fdopen(fd, 'w')
108 print(self.pid, file=f)
109 f.close()
110
111 def _remove_lockfile(self):
112 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
113
114 See gclient_utils.py:rmtree docstring for more explanation on the
115 windows case.
116 """
117 if sys.platform == 'win32':
118 lockfile = os.path.normcase(self.lockfile)
119
120 def delete():
121 exitcode = subprocess.call(['cmd.exe', '/c',
122 'del', '/f', '/q', lockfile])
123 if exitcode != 0:
124 raise LockError('Failed to remove lock: %s' % (lockfile,))
125 exponential_backoff_retry(
126 delete,
127 excs=(LockError,),
128 name='del [%s]' % (lockfile,))
129 else:
130 os.remove(self.lockfile)
131
132 def lock(self):
133 """Acquire the lock.
134
135 This will block with a deadline of self.timeout seconds.
136 """
137 elapsed = 0
138 while True:
139 try:
140 self._make_lockfile()
141 return
142 except OSError as e:
143 if elapsed < self.timeout:
144 sleep_time = max(10, min(3, self.timeout - elapsed))
145 logging.info('Could not create git cache lockfile; '
146 'will retry after sleep(%d).', sleep_time);
147 elapsed += sleep_time
148 time.sleep(sleep_time)
149 continue
150 if e.errno == errno.EEXIST:
151 raise LockError("%s is already locked" % self.path)
152 else:
153 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
154
155 def unlock(self):
156 """Release the lock."""
157 try:
158 if not self.is_locked():
159 raise LockError("%s is not locked" % self.path)
160 if not self.i_am_locking():
161 raise LockError("%s is locked, but not by me" % self.path)
162 self._remove_lockfile()
163 except WinErr:
164 # Windows is unreliable when it comes to file locking. YMMV.
165 pass
166
167 def break_lock(self):
168 """Remove the lock, even if it was created by someone else."""
169 try:
170 self._remove_lockfile()
171 return True
172 except OSError as exc:
173 if exc.errno == errno.ENOENT:
174 return False
175 else:
176 raise
177
178 def is_locked(self):
179 """Test if the file is locked by anyone.
180
181 Note: This method is potentially racy. By the time it returns the lockfile
182 may have been unlocked, removed, or stolen by some other process.
183 """
184 return os.path.exists(self.lockfile)
185
186 def i_am_locking(self):
187 """Test if the file is locked by this process."""
188 return self.is_locked() and self.pid == self._read_pid()
189
190
szager@chromium.org848fd492014-04-09 19:06:44 +0000191class Mirror(object):
192
193 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
194 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000195 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000196 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000197
Robert Iannuccia19649b2018-06-29 16:31:45 +0000198 UNSET_CACHEPATH = object()
199
200 # Used for tests
201 _GIT_CONFIG_LOCATION = []
202
szager@chromium.org66c8b852015-09-22 23:19:07 +0000203 @staticmethod
204 def parse_fetch_spec(spec):
205 """Parses and canonicalizes a fetch spec.
206
207 Returns (fetchspec, value_regex), where value_regex can be used
208 with 'git config --replace-all'.
209 """
210 parts = spec.split(':', 1)
211 src = parts[0].lstrip('+').rstrip('/')
212 if not src.startswith('refs/'):
213 src = 'refs/heads/%s' % src
214 dest = parts[1].rstrip('/') if len(parts) > 1 else src
215 regex = r'\+%s:.*' % src.replace('*', r'\*')
216 return ('+%s:%s' % (src, dest), regex)
217
szager@chromium.org848fd492014-04-09 19:06:44 +0000218 def __init__(self, url, refs=None, print_func=None):
219 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000220 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000221 self.basedir = self.UrlToCacheDir(url)
222 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000223 if print_func:
224 self.print = self.print_without_file
225 self.print_func = print_func
226 else:
227 self.print = print
228
dnj4625b5a2016-11-10 18:23:26 -0800229 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000230 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000231
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800232 @contextlib.contextmanager
233 def print_duration_of(self, what):
234 start = time.time()
235 try:
236 yield
237 finally:
238 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
239
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000240 @property
241 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700242 u = urlparse.urlparse(self.url)
243 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000244 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700245 elif u.netloc == 'chrome-internal.googlesource.com':
246 return 'chrome-git-cache'
247 # Not recognized.
248 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000249
szager@chromium.org174766f2014-05-13 21:27:46 +0000250 @classmethod
251 def FromPath(cls, path):
252 return cls(cls.CacheDirToUrl(path))
253
szager@chromium.org848fd492014-04-09 19:06:44 +0000254 @staticmethod
255 def UrlToCacheDir(url):
256 """Convert a git url to a normalized form for the cache dir path."""
257 parsed = urlparse.urlparse(url)
258 norm_url = parsed.netloc + parsed.path
259 if norm_url.endswith('.git'):
260 norm_url = norm_url[:-len('.git')]
261 return norm_url.replace('-', '--').replace('/', '-').lower()
262
263 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000264 def CacheDirToUrl(path):
265 """Convert a cache dir path to its corresponding url."""
266 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
267 return 'https://%s' % netpath
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @classmethod
270 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000271 with cls.cachepath_lock:
272 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000273
274 @classmethod
275 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000276 with cls.cachepath_lock:
277 if not hasattr(cls, 'cachepath'):
278 try:
279 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000280 [cls.git_exe, 'config'] +
281 cls._GIT_CONFIG_LOCATION +
282 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000283 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000284 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000285 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000286
287 ret = getattr(cls, 'cachepath')
288 if ret is cls.UNSET_CACHEPATH:
289 raise RuntimeError('No cache.cachepath git configuration or '
290 '$GIT_CACHE_PATH is set.')
291 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
dnj4625b5a2016-11-10 18:23:26 -0800293 def Rename(self, src, dst):
294 # This is somehow racy on Windows.
295 # Catching OSError because WindowsError isn't portable and
296 # pylint complains.
297 exponential_backoff_retry(
298 lambda: os.rename(src, dst),
299 excs=(OSError,),
300 name='rename [%s] => [%s]' % (src, dst),
301 printerr=self.print)
302
szager@chromium.org848fd492014-04-09 19:06:44 +0000303 def RunGit(self, cmd, **kwargs):
304 """Run git in a subprocess."""
305 cwd = kwargs.setdefault('cwd', self.mirror_path)
306 kwargs.setdefault('print_stdout', False)
307 kwargs.setdefault('filter_fn', self.print)
308 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
309 env.setdefault('GIT_ASKPASS', 'true')
310 env.setdefault('SSH_ASKPASS', 'true')
311 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
312 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
313
314 def config(self, cwd=None):
315 if cwd is None:
316 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000317
318 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700319 try:
320 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
321 except subprocess.CalledProcessError:
322 # Hard error, need to clobber.
323 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000324
325 # Don't combine pack files into one big pack file. It's really slow for
326 # repositories, and there's no way to track progress and make sure it's
327 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700328 if self.supported_project():
329 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000330
331 # Allocate more RAM for cache-ing delta chains, for better performance
332 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000333 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000334 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000335
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000336 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000337 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000338 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000339 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000340 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000341 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000342 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000343
344 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800345 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000346
347 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
348 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700349 if not self.bootstrap_bucket:
350 return False
hinoka@google.com776a2c32014-04-25 07:54:25 +0000351 python_fallback = False
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000352 if (sys.platform.startswith('win') and
353 not gclient_utils.FindExecutable('7z')):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000354 python_fallback = True
355 elif sys.platform.startswith('darwin'):
356 # The OSX version of unzip doesn't support zip64.
357 python_fallback = True
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000358 elif not gclient_utils.FindExecutable('unzip'):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000359 python_fallback = True
szager@chromium.org848fd492014-04-09 19:06:44 +0000360
361 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000362 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000363 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800364 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
szager@chromium.org848fd492014-04-09 19:06:44 +0000365 ls_out_sorted = sorted(ls_out.splitlines())
366 if not ls_out_sorted:
367 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800368 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
369 (self.mirror_path, self.bootstrap_bucket,
370 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000371 return False
372 latest_checkout = ls_out_sorted[-1]
373
374 # Download zip file to a temporary directory.
375 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000376 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000377 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800378 with self.print_duration_of('download'):
379 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000380 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000381 return False
382 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
383
hinoka@google.com776a2c32014-04-25 07:54:25 +0000384 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800385 with self.print_duration_of('unzip'):
386 if not python_fallback:
387 if sys.platform.startswith('win'):
388 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
389 else:
390 cmd = ['unzip', filename, '-d', directory]
391 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000392 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800393 try:
394 with zipfile.ZipFile(filename, 'r') as f:
395 f.printdir()
396 f.extractall(directory)
397 except Exception as e:
398 self.print('Encountered error: %s' % str(e), file=sys.stderr)
399 retcode = 1
400 else:
401 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000402 finally:
403 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800404 #
405 # This is somehow racy on Windows.
406 # Catching OSError because WindowsError isn't portable and
407 # pylint complains.
408 exponential_backoff_retry(
409 lambda: gclient_utils.rm_file_or_tree(tempdir),
410 excs=(OSError,),
411 name='rmtree [%s]' % (tempdir,),
412 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000413
414 if retcode:
415 self.print(
416 'Extracting bootstrap zipfile %s failed.\n'
417 'Resuming normal operations.' % filename)
418 return False
419 return True
420
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800421 def contains_revision(self, revision):
422 if not self.exists():
423 return False
424
425 if sys.platform.startswith('win'):
426 # Windows .bat scripts use ^ as escape sequence, which means we have to
427 # escape it with itself for every .bat invocation.
428 needle = '%s^^^^{commit}' % revision
429 else:
430 needle = '%s^{commit}' % revision
431 try:
432 # cat-file exits with 0 on success, that is git object of given hash was
433 # found.
434 self.RunGit(['cat-file', '-e', needle])
435 return True
436 except subprocess.CalledProcessError:
437 return False
438
szager@chromium.org848fd492014-04-09 19:06:44 +0000439 def exists(self):
440 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
441
Ryan Tseng3beabd02017-03-15 13:57:58 -0700442 def supported_project(self):
443 """Returns true if this repo is known to have a bootstrap zip file."""
444 u = urlparse.urlparse(self.url)
445 return u.netloc in [
446 'chromium.googlesource.com',
447 'chrome-internal.googlesource.com']
448
szager@chromium.org66c8b852015-09-22 23:19:07 +0000449 def _preserve_fetchspec(self):
450 """Read and preserve remote.origin.fetch from an existing mirror.
451
452 This modifies self.fetch_specs.
453 """
454 if not self.exists():
455 return
456 try:
457 config_fetchspecs = subprocess.check_output(
458 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
459 cwd=self.mirror_path)
460 for fetchspec in config_fetchspecs.splitlines():
461 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
462 except subprocess.CalledProcessError:
463 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
464 'existing cache directory. You may need to manually edit '
465 '%s and "git cache fetch" again.'
466 % os.path.join(self.mirror_path, 'config'))
467
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000468 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
469 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000470 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
471 pack_files = []
472
473 if os.path.isdir(pack_dir):
474 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800475 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
476 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000477
478 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000479 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000480 len(pack_files) > GC_AUTOPACKLIMIT)
481 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000482 if self.exists():
483 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
484 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000485 tempdir = tempfile.mkdtemp(
486 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
487 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
488 if bootstrapped:
489 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000490 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700491 elif not self.exists() or not self.supported_project():
492 # Bootstrap failed due to either
493 # 1. No previous cache
494 # 2. Project doesn't have a bootstrap zip file
495 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000496 self.RunGit(['init', '--bare'], cwd=tempdir)
497 else:
498 # Bootstrap failed, previous cache exists; warn and continue.
499 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800500 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
501 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000502 % len(pack_files))
503 gclient_utils.rmtree(tempdir)
504 tempdir = None
505 else:
506 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
507 logging.warn(
508 'Shallow fetch requested, but repo cache already exists.')
509 return tempdir
510
511 def _fetch(self, rundir, verbose, depth):
512 self.config(rundir)
513 v = []
514 d = []
515 if verbose:
516 v = ['-v', '--progress']
517 if depth:
518 d = ['--depth', str(depth)]
519 fetch_cmd = ['fetch'] + v + d + ['origin']
520 fetch_specs = subprocess.check_output(
521 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
522 cwd=rundir).strip().splitlines()
523 for spec in fetch_specs:
524 try:
525 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800526 with self.print_duration_of('fetch %s' % spec):
527 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000528 except subprocess.CalledProcessError:
529 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700530 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000531 logging.warn('Fetch of %s failed' % spec)
532
Vadim Shtayura08049e22017-10-11 00:14:52 +0000533 def populate(self, depth=None, shallow=False, bootstrap=False,
534 verbose=False, ignore_lock=False, lock_timeout=0):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000535 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000536 if shallow and not depth:
537 depth = 10000
538 gclient_utils.safe_makedirs(self.GetCachePath())
539
Vadim Shtayura08049e22017-10-11 00:14:52 +0000540 lockfile = Lockfile(self.mirror_path, lock_timeout)
541 if not ignore_lock:
542 lockfile.lock()
543
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000544 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000545 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000546 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000547 rundir = tempdir or self.mirror_path
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000548 self._fetch(rundir, verbose, depth)
hinokadcd84042016-06-09 14:26:17 -0700549 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000550 # This is a major failure, we need to clean and force a bootstrap.
551 gclient_utils.rmtree(rundir)
552 self.print(GIT_CACHE_CORRUPT_MESSAGE)
553 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
554 assert tempdir
Andrii Shyshkalov82d8dcd2017-11-22 17:07:28 -0800555 self._fetch(tempdir, verbose, depth)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000556 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000557 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800558 if os.path.exists(self.mirror_path):
559 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800560 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000561 if not ignore_lock:
562 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000563
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000564 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000565 # The files are named <git number>.zip
566 gen_number = subprocess.check_output(
567 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000568 # Run Garbage Collect to compress packfile.
569 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000570 # Creating a temp file and then deleting it ensures we can use this name.
571 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
572 os.remove(tmp_zipfile)
573 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
574 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000575 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
576 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000577 gsutil.call('cp', tmp_zipfile, dest_name)
578 os.remove(tmp_zipfile)
579
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000580 # Remove all other files in the same directory.
581 if prune:
582 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
583 for filename in ls_out.splitlines():
584 if filename == dest_name:
585 continue
586 gsutil.call('rm', filename)
587
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000588 @staticmethod
589 def DeleteTmpPackFiles(path):
590 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000591 if not os.path.isdir(pack_dir):
592 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000593 pack_files = [f for f in os.listdir(pack_dir) if
594 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
595 for f in pack_files:
596 f = os.path.join(pack_dir, f)
597 try:
598 os.remove(f)
599 logging.warn('Deleted stale temporary pack file %s' % f)
600 except OSError:
601 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000602
Vadim Shtayura08049e22017-10-11 00:14:52 +0000603 @classmethod
604 def BreakLocks(cls, path):
605 did_unlock = False
606 lf = Lockfile(path)
607 if lf.break_lock():
608 did_unlock = True
609 # Look for lock files that might have been left behind by an interrupted
610 # git process.
611 lf = os.path.join(path, 'config.lock')
612 if os.path.exists(lf):
613 os.remove(lf)
614 did_unlock = True
615 cls.DeleteTmpPackFiles(path)
616 return did_unlock
617
618 def unlock(self):
619 return self.BreakLocks(self.mirror_path)
620
621 @classmethod
622 def UnlockAll(cls):
623 cachepath = cls.GetCachePath()
624 if not cachepath:
625 return
626 dirlist = os.listdir(cachepath)
627 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
628 if os.path.isdir(os.path.join(cachepath, path))])
629 for dirent in dirlist:
630 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
631 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
632 elif (dirent.endswith('.lock') and
633 os.path.isfile(os.path.join(cachepath, dirent))):
634 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
635
636 unlocked_repos = []
637 for repo_dir in repo_dirs:
638 if cls.BreakLocks(repo_dir):
639 unlocked_repos.append(repo_dir)
640
641 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000642
agable@chromium.org5a306a22014-02-24 22:13:59 +0000643@subcommand.usage('[url of repo to check for caching]')
644def CMDexists(parser, args):
645 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000646 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000647 if not len(args) == 1:
648 parser.error('git cache exists only takes exactly one repo url.')
649 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000650 mirror = Mirror(url)
651 if mirror.exists():
652 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000653 return 0
654 return 1
655
656
hinoka@google.com563559c2014-04-02 00:36:24 +0000657@subcommand.usage('[url of repo to create a bootstrap zip file]')
658def CMDupdate_bootstrap(parser, args):
659 """Create and uploads a bootstrap tarball."""
660 # Lets just assert we can't do this on Windows.
661 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000662 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000663 return 1
664
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000665 parser.add_option('--prune', action='store_true',
666 help='Prune all other cached zipballs of the same repo.')
667
hinoka@google.com563559c2014-04-02 00:36:24 +0000668 # First, we need to ensure the cache is populated.
669 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000670 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000671 CMDpopulate(parser, populate_args)
672
673 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000674 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000675 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000676 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000677 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000678 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000679
680
agable@chromium.org5a306a22014-02-24 22:13:59 +0000681@subcommand.usage('[url of repo to add to or update in cache]')
682def CMDpopulate(parser, args):
683 """Ensure that the cache has all up-to-date objects for the given repo."""
684 parser.add_option('--depth', type='int',
685 help='Only cache DEPTH commits of history')
686 parser.add_option('--shallow', '-s', action='store_true',
687 help='Only cache 10000 commits of history')
688 parser.add_option('--ref', action='append',
689 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000690 parser.add_option('--no_bootstrap', '--no-bootstrap',
691 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000692 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000693 parser.add_option('--ignore_locks', '--ignore-locks',
694 action='store_true',
695 help='Don\'t try to lock repository')
hinoka@google.com563559c2014-04-02 00:36:24 +0000696
agable@chromium.org5a306a22014-02-24 22:13:59 +0000697 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000698 if not len(args) == 1:
699 parser.error('git cache populate only takes exactly one repo url.')
700 url = args[0]
701
szager@chromium.org848fd492014-04-09 19:06:44 +0000702 mirror = Mirror(url, refs=options.ref)
703 kwargs = {
704 'verbose': options.verbose,
705 'shallow': options.shallow,
706 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000707 'ignore_lock': options.ignore_locks,
708 'lock_timeout': options.timeout,
szager@chromium.org848fd492014-04-09 19:06:44 +0000709 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000710 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000711 kwargs['depth'] = options.depth
712 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000713
714
szager@chromium.orgf3145112014-08-07 21:02:36 +0000715@subcommand.usage('Fetch new commits into cache and current checkout')
716def CMDfetch(parser, args):
717 """Update mirror, and fetch in cwd."""
718 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000719 parser.add_option('--no_bootstrap', '--no-bootstrap',
720 action='store_true',
721 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000722 options, args = parser.parse_args(args)
723
724 # Figure out which remotes to fetch. This mimics the behavior of regular
725 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
726 # this will NOT try to traverse up the branching structure to find the
727 # ultimate remote to update.
728 remotes = []
729 if options.all:
730 assert not args, 'fatal: fetch --all does not take a repository argument'
731 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
732 elif args:
733 remotes = args
734 else:
735 current_branch = subprocess.check_output(
736 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
737 if current_branch != 'HEAD':
738 upstream = subprocess.check_output(
739 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
740 ).strip()
741 if upstream and upstream != '.':
742 remotes = [upstream]
743 if not remotes:
744 remotes = ['origin']
745
746 cachepath = Mirror.GetCachePath()
747 git_dir = os.path.abspath(subprocess.check_output(
748 [Mirror.git_exe, 'rev-parse', '--git-dir']))
749 git_dir = os.path.abspath(git_dir)
750 if git_dir.startswith(cachepath):
751 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000752 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000753 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000754 return 0
755 for remote in remotes:
756 remote_url = subprocess.check_output(
757 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
758 if remote_url.startswith(cachepath):
759 mirror = Mirror.FromPath(remote_url)
760 mirror.print = lambda *args: None
761 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000762 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000763 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000764 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
765 return 0
766
767
Vadim Shtayura08049e22017-10-11 00:14:52 +0000768@subcommand.usage('[url of repo to unlock, or -a|--all]')
769def CMDunlock(parser, args):
770 """Unlock one or all repos if their lock files are still around."""
771 parser.add_option('--force', '-f', action='store_true',
772 help='Actually perform the action')
773 parser.add_option('--all', '-a', action='store_true',
774 help='Unlock all repository caches')
775 options, args = parser.parse_args(args)
776 if len(args) > 1 or (len(args) == 0 and not options.all):
777 parser.error('git cache unlock takes exactly one repo url, or --all')
778
779 if not options.force:
780 cachepath = Mirror.GetCachePath()
781 lockfiles = [os.path.join(cachepath, path)
782 for path in os.listdir(cachepath)
783 if path.endswith('.lock') and os.path.isfile(path)]
784 parser.error('git cache unlock requires -f|--force to do anything. '
785 'Refusing to unlock the following repo caches: '
786 ', '.join(lockfiles))
787
788 unlocked_repos = []
789 if options.all:
790 unlocked_repos.extend(Mirror.UnlockAll())
791 else:
792 m = Mirror(args[0])
793 if m.unlock():
794 unlocked_repos.append(m.mirror_path)
795
796 if unlocked_repos:
797 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
798 unlocked_repos))
799
800
agable@chromium.org5a306a22014-02-24 22:13:59 +0000801class OptionParser(optparse.OptionParser):
802 """Wrapper class for OptionParser to handle global options."""
803
804 def __init__(self, *args, **kwargs):
805 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
806 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000807 help=(
808 'Path to the directory containing the caches. Normally '
809 'deduced from git config cache.cachepath or '
810 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000811 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000812 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000813 self.add_option('-q', '--quiet', action='store_true',
814 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000815 self.add_option('--timeout', type='int', default=0,
816 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000817
818 def parse_args(self, args=None, values=None):
819 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000820 if options.quiet:
821 options.verbose = 0
822
823 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
824 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000825
826 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000827 global_cache_dir = Mirror.GetCachePath()
828 except RuntimeError:
829 global_cache_dir = None
830 if options.cache_dir:
831 if global_cache_dir and (
832 os.path.abspath(options.cache_dir) !=
833 os.path.abspath(global_cache_dir)):
834 logging.warn('Overriding globally-configured cache directory.')
835 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000836
agable@chromium.org5a306a22014-02-24 22:13:59 +0000837 return options, args
838
839
840def main(argv):
841 dispatcher = subcommand.CommandDispatcher(__name__)
842 return dispatcher.execute(OptionParser(), argv)
843
844
845if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000846 try:
847 sys.exit(main(sys.argv[1:]))
848 except KeyboardInterrupt:
849 sys.stderr.write('interrupted\n')
850 sys.exit(1)