blob: 16b49cbcb8ea206b90e6a3c54eaac877c099a032 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -08009import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000010import errno
11import logging
12import optparse
13import os
szager@chromium.org174766f2014-05-13 21:27:46 +000014import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000016import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000017import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import subprocess
19import sys
20import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000021import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000022
hinoka@google.com563559c2014-04-02 00:36:24 +000023from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000024import gclient_utils
25import subcommand
26
szager@chromium.org301a7c32014-06-16 17:13:50 +000027# Analogous to gc.autopacklimit git config.
28GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090029
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000030GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
31
szager@chromium.org848fd492014-04-09 19:06:44 +000032try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080033 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000034 WinErr = WindowsError
35except NameError:
36 class WinErr(Exception):
37 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000038
Vadim Shtayura08049e22017-10-11 00:14:52 +000039class LockError(Exception):
40 pass
41
hinokadcd84042016-06-09 14:26:17 -070042class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000043 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
dnj4625b5a2016-11-10 18:23:26 -080045
46def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
47 sleep_time=0.25, printerr=None):
48 """Executes |fn| up to |count| times, backing off exponentially.
49
50 Args:
51 fn (callable): The function to execute. If this raises a handled
52 exception, the function will retry with exponential backoff.
53 excs (tuple): A tuple of Exception types to handle. If one of these is
54 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
55 that is not in this list, it will immediately pass through. If |excs|
56 is empty, the Exception base class will be used.
57 name (str): Optional operation name to print in the retry string.
58 count (int): The number of times to try before allowing the exception to
59 pass through.
60 sleep_time (float): The initial number of seconds to sleep in between
61 retries. This will be doubled each retry.
62 printerr (callable): Function that will be called with the error string upon
63 failures. If None, |logging.warning| will be used.
64
65 Returns: The return value of the successful fn.
66 """
67 printerr = printerr or logging.warning
68 for i in xrange(count):
69 try:
70 return fn()
71 except excs as e:
72 if (i+1) >= count:
73 raise
74
75 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
76 (name or 'operation'), sleep_time, (i+1), count, e))
77 time.sleep(sleep_time)
78 sleep_time *= 2
79
80
Vadim Shtayura08049e22017-10-11 00:14:52 +000081class Lockfile(object):
82 """Class to represent a cross-platform process-specific lockfile."""
83
84 def __init__(self, path, timeout=0):
85 self.path = os.path.abspath(path)
86 self.timeout = timeout
87 self.lockfile = self.path + ".lock"
88 self.pid = os.getpid()
89
90 def _read_pid(self):
91 """Read the pid stored in the lockfile.
92
93 Note: This method is potentially racy. By the time it returns the lockfile
94 may have been unlocked, removed, or stolen by some other process.
95 """
96 try:
97 with open(self.lockfile, 'r') as f:
98 pid = int(f.readline().strip())
99 except (IOError, ValueError):
100 pid = None
101 return pid
102
103 def _make_lockfile(self):
104 """Safely creates a lockfile containing the current pid."""
105 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
106 fd = os.open(self.lockfile, open_flags, 0o644)
107 f = os.fdopen(fd, 'w')
108 print(self.pid, file=f)
109 f.close()
110
111 def _remove_lockfile(self):
112 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
113
114 See gclient_utils.py:rmtree docstring for more explanation on the
115 windows case.
116 """
117 if sys.platform == 'win32':
118 lockfile = os.path.normcase(self.lockfile)
119
120 def delete():
121 exitcode = subprocess.call(['cmd.exe', '/c',
122 'del', '/f', '/q', lockfile])
123 if exitcode != 0:
124 raise LockError('Failed to remove lock: %s' % (lockfile,))
125 exponential_backoff_retry(
126 delete,
127 excs=(LockError,),
128 name='del [%s]' % (lockfile,))
129 else:
130 os.remove(self.lockfile)
131
132 def lock(self):
133 """Acquire the lock.
134
135 This will block with a deadline of self.timeout seconds.
136 """
137 elapsed = 0
138 while True:
139 try:
140 self._make_lockfile()
141 return
142 except OSError as e:
143 if elapsed < self.timeout:
144 sleep_time = max(10, min(3, self.timeout - elapsed))
145 logging.info('Could not create git cache lockfile; '
146 'will retry after sleep(%d).', sleep_time);
147 elapsed += sleep_time
148 time.sleep(sleep_time)
149 continue
150 if e.errno == errno.EEXIST:
151 raise LockError("%s is already locked" % self.path)
152 else:
153 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
154
155 def unlock(self):
156 """Release the lock."""
157 try:
158 if not self.is_locked():
159 raise LockError("%s is not locked" % self.path)
160 if not self.i_am_locking():
161 raise LockError("%s is locked, but not by me" % self.path)
162 self._remove_lockfile()
163 except WinErr:
164 # Windows is unreliable when it comes to file locking. YMMV.
165 pass
166
167 def break_lock(self):
168 """Remove the lock, even if it was created by someone else."""
169 try:
170 self._remove_lockfile()
171 return True
172 except OSError as exc:
173 if exc.errno == errno.ENOENT:
174 return False
175 else:
176 raise
177
178 def is_locked(self):
179 """Test if the file is locked by anyone.
180
181 Note: This method is potentially racy. By the time it returns the lockfile
182 may have been unlocked, removed, or stolen by some other process.
183 """
184 return os.path.exists(self.lockfile)
185
186 def i_am_locking(self):
187 """Test if the file is locked by this process."""
188 return self.is_locked() and self.pid == self._read_pid()
189
190
szager@chromium.org848fd492014-04-09 19:06:44 +0000191class Mirror(object):
192
193 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
194 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000195 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000196 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000197
Robert Iannuccia19649b2018-06-29 16:31:45 +0000198 UNSET_CACHEPATH = object()
199
200 # Used for tests
201 _GIT_CONFIG_LOCATION = []
202
szager@chromium.org66c8b852015-09-22 23:19:07 +0000203 @staticmethod
204 def parse_fetch_spec(spec):
205 """Parses and canonicalizes a fetch spec.
206
207 Returns (fetchspec, value_regex), where value_regex can be used
208 with 'git config --replace-all'.
209 """
210 parts = spec.split(':', 1)
211 src = parts[0].lstrip('+').rstrip('/')
212 if not src.startswith('refs/'):
213 src = 'refs/heads/%s' % src
214 dest = parts[1].rstrip('/') if len(parts) > 1 else src
215 regex = r'\+%s:.*' % src.replace('*', r'\*')
216 return ('+%s:%s' % (src, dest), regex)
217
szager@chromium.org848fd492014-04-09 19:06:44 +0000218 def __init__(self, url, refs=None, print_func=None):
219 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000220 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000221 self.basedir = self.UrlToCacheDir(url)
222 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000223 if print_func:
224 self.print = self.print_without_file
225 self.print_func = print_func
226 else:
227 self.print = print
228
dnj4625b5a2016-11-10 18:23:26 -0800229 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000230 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000231
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800232 @contextlib.contextmanager
233 def print_duration_of(self, what):
234 start = time.time()
235 try:
236 yield
237 finally:
238 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
239
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000240 @property
241 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700242 u = urlparse.urlparse(self.url)
243 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000244 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700245 elif u.netloc == 'chrome-internal.googlesource.com':
246 return 'chrome-git-cache'
247 # Not recognized.
248 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000249
szager@chromium.org174766f2014-05-13 21:27:46 +0000250 @classmethod
251 def FromPath(cls, path):
252 return cls(cls.CacheDirToUrl(path))
253
szager@chromium.org848fd492014-04-09 19:06:44 +0000254 @staticmethod
255 def UrlToCacheDir(url):
256 """Convert a git url to a normalized form for the cache dir path."""
257 parsed = urlparse.urlparse(url)
258 norm_url = parsed.netloc + parsed.path
259 if norm_url.endswith('.git'):
260 norm_url = norm_url[:-len('.git')]
261 return norm_url.replace('-', '--').replace('/', '-').lower()
262
263 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000264 def CacheDirToUrl(path):
265 """Convert a cache dir path to its corresponding url."""
266 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
267 return 'https://%s' % netpath
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @classmethod
270 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000271 with cls.cachepath_lock:
272 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000273
274 @classmethod
275 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000276 with cls.cachepath_lock:
277 if not hasattr(cls, 'cachepath'):
278 try:
279 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000280 [cls.git_exe, 'config'] +
281 cls._GIT_CONFIG_LOCATION +
282 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000283 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000284 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000285 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000286
287 ret = getattr(cls, 'cachepath')
288 if ret is cls.UNSET_CACHEPATH:
289 raise RuntimeError('No cache.cachepath git configuration or '
290 '$GIT_CACHE_PATH is set.')
291 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
dnj4625b5a2016-11-10 18:23:26 -0800293 def Rename(self, src, dst):
294 # This is somehow racy on Windows.
295 # Catching OSError because WindowsError isn't portable and
296 # pylint complains.
297 exponential_backoff_retry(
298 lambda: os.rename(src, dst),
299 excs=(OSError,),
300 name='rename [%s] => [%s]' % (src, dst),
301 printerr=self.print)
302
szager@chromium.org848fd492014-04-09 19:06:44 +0000303 def RunGit(self, cmd, **kwargs):
304 """Run git in a subprocess."""
305 cwd = kwargs.setdefault('cwd', self.mirror_path)
306 kwargs.setdefault('print_stdout', False)
307 kwargs.setdefault('filter_fn', self.print)
308 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
309 env.setdefault('GIT_ASKPASS', 'true')
310 env.setdefault('SSH_ASKPASS', 'true')
311 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
312 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
313
Edward Lemur579c9862018-07-13 23:17:51 +0000314 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000315 if cwd is None:
316 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000317
Edward Lemur579c9862018-07-13 23:17:51 +0000318 if reset_fetch_config:
319 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
320
szager@chromium.org301a7c32014-06-16 17:13:50 +0000321 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700322 try:
323 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
324 except subprocess.CalledProcessError:
325 # Hard error, need to clobber.
326 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000327
328 # Don't combine pack files into one big pack file. It's really slow for
329 # repositories, and there's no way to track progress and make sure it's
330 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700331 if self.supported_project():
332 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000333
334 # Allocate more RAM for cache-ing delta chains, for better performance
335 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000336 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000337 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000338
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000339 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000340 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000341 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000342 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000343 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000344 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000345 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000346
347 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800348 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000349
350 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
351 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700352 if not self.bootstrap_bucket:
353 return False
hinoka@google.com776a2c32014-04-25 07:54:25 +0000354 python_fallback = False
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000355 if (sys.platform.startswith('win') and
356 not gclient_utils.FindExecutable('7z')):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000357 python_fallback = True
358 elif sys.platform.startswith('darwin'):
359 # The OSX version of unzip doesn't support zip64.
360 python_fallback = True
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000361 elif not gclient_utils.FindExecutable('unzip'):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000362 python_fallback = True
szager@chromium.org848fd492014-04-09 19:06:44 +0000363
364 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000365 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000366 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800367 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
szager@chromium.org848fd492014-04-09 19:06:44 +0000368 ls_out_sorted = sorted(ls_out.splitlines())
369 if not ls_out_sorted:
370 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800371 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
372 (self.mirror_path, self.bootstrap_bucket,
373 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000374 return False
375 latest_checkout = ls_out_sorted[-1]
376
377 # Download zip file to a temporary directory.
378 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000379 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000380 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800381 with self.print_duration_of('download'):
382 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000383 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000384 return False
385 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
386
hinoka@google.com776a2c32014-04-25 07:54:25 +0000387 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800388 with self.print_duration_of('unzip'):
389 if not python_fallback:
390 if sys.platform.startswith('win'):
391 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
392 else:
393 cmd = ['unzip', filename, '-d', directory]
394 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000395 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800396 try:
397 with zipfile.ZipFile(filename, 'r') as f:
398 f.printdir()
399 f.extractall(directory)
400 except Exception as e:
401 self.print('Encountered error: %s' % str(e), file=sys.stderr)
402 retcode = 1
403 else:
404 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000405 finally:
406 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800407 #
408 # This is somehow racy on Windows.
409 # Catching OSError because WindowsError isn't portable and
410 # pylint complains.
411 exponential_backoff_retry(
412 lambda: gclient_utils.rm_file_or_tree(tempdir),
413 excs=(OSError,),
414 name='rmtree [%s]' % (tempdir,),
415 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000416
417 if retcode:
418 self.print(
419 'Extracting bootstrap zipfile %s failed.\n'
420 'Resuming normal operations.' % filename)
421 return False
422 return True
423
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800424 def contains_revision(self, revision):
425 if not self.exists():
426 return False
427
428 if sys.platform.startswith('win'):
429 # Windows .bat scripts use ^ as escape sequence, which means we have to
430 # escape it with itself for every .bat invocation.
431 needle = '%s^^^^{commit}' % revision
432 else:
433 needle = '%s^{commit}' % revision
434 try:
435 # cat-file exits with 0 on success, that is git object of given hash was
436 # found.
437 self.RunGit(['cat-file', '-e', needle])
438 return True
439 except subprocess.CalledProcessError:
440 return False
441
szager@chromium.org848fd492014-04-09 19:06:44 +0000442 def exists(self):
443 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
444
Ryan Tseng3beabd02017-03-15 13:57:58 -0700445 def supported_project(self):
446 """Returns true if this repo is known to have a bootstrap zip file."""
447 u = urlparse.urlparse(self.url)
448 return u.netloc in [
449 'chromium.googlesource.com',
450 'chrome-internal.googlesource.com']
451
szager@chromium.org66c8b852015-09-22 23:19:07 +0000452 def _preserve_fetchspec(self):
453 """Read and preserve remote.origin.fetch from an existing mirror.
454
455 This modifies self.fetch_specs.
456 """
457 if not self.exists():
458 return
459 try:
460 config_fetchspecs = subprocess.check_output(
461 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
462 cwd=self.mirror_path)
463 for fetchspec in config_fetchspecs.splitlines():
464 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
465 except subprocess.CalledProcessError:
466 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
467 'existing cache directory. You may need to manually edit '
468 '%s and "git cache fetch" again.'
469 % os.path.join(self.mirror_path, 'config'))
470
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000471 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
472 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000473 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
474 pack_files = []
475
476 if os.path.isdir(pack_dir):
477 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800478 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
479 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000480
481 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000482 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000483 len(pack_files) > GC_AUTOPACKLIMIT)
484 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000485 if self.exists():
486 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
487 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000488 tempdir = tempfile.mkdtemp(
489 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
490 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
491 if bootstrapped:
492 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000493 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700494 elif not self.exists() or not self.supported_project():
495 # Bootstrap failed due to either
496 # 1. No previous cache
497 # 2. Project doesn't have a bootstrap zip file
498 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000499 self.RunGit(['init', '--bare'], cwd=tempdir)
500 else:
501 # Bootstrap failed, previous cache exists; warn and continue.
502 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800503 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
504 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000505 % len(pack_files))
506 gclient_utils.rmtree(tempdir)
507 tempdir = None
508 else:
509 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
510 logging.warn(
511 'Shallow fetch requested, but repo cache already exists.')
512 return tempdir
513
Edward Lemur579c9862018-07-13 23:17:51 +0000514 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
515 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000516 v = []
517 d = []
518 if verbose:
519 v = ['-v', '--progress']
520 if depth:
521 d = ['--depth', str(depth)]
522 fetch_cmd = ['fetch'] + v + d + ['origin']
523 fetch_specs = subprocess.check_output(
524 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
525 cwd=rundir).strip().splitlines()
526 for spec in fetch_specs:
527 try:
528 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800529 with self.print_duration_of('fetch %s' % spec):
530 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000531 except subprocess.CalledProcessError:
532 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700533 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000534 logging.warn('Fetch of %s failed' % spec)
535
Vadim Shtayura08049e22017-10-11 00:14:52 +0000536 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000537 verbose=False, ignore_lock=False, lock_timeout=0,
538 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000539 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000540 if shallow and not depth:
541 depth = 10000
542 gclient_utils.safe_makedirs(self.GetCachePath())
543
Vadim Shtayura08049e22017-10-11 00:14:52 +0000544 lockfile = Lockfile(self.mirror_path, lock_timeout)
545 if not ignore_lock:
546 lockfile.lock()
547
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000548 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000549 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000550 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000551 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000552 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700553 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000554 # This is a major failure, we need to clean and force a bootstrap.
555 gclient_utils.rmtree(rundir)
556 self.print(GIT_CACHE_CORRUPT_MESSAGE)
557 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
558 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000559 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000560 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000561 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800562 if os.path.exists(self.mirror_path):
563 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800564 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000565 if not ignore_lock:
566 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000567
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000568 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000569 # The files are named <git number>.zip
570 gen_number = subprocess.check_output(
571 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000572 # Run Garbage Collect to compress packfile.
573 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000574 # Creating a temp file and then deleting it ensures we can use this name.
575 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
576 os.remove(tmp_zipfile)
577 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
578 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000579 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
580 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000581 gsutil.call('cp', tmp_zipfile, dest_name)
582 os.remove(tmp_zipfile)
583
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000584 # Remove all other files in the same directory.
585 if prune:
586 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
587 for filename in ls_out.splitlines():
588 if filename == dest_name:
589 continue
590 gsutil.call('rm', filename)
591
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000592 @staticmethod
593 def DeleteTmpPackFiles(path):
594 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000595 if not os.path.isdir(pack_dir):
596 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000597 pack_files = [f for f in os.listdir(pack_dir) if
598 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
599 for f in pack_files:
600 f = os.path.join(pack_dir, f)
601 try:
602 os.remove(f)
603 logging.warn('Deleted stale temporary pack file %s' % f)
604 except OSError:
605 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000606
Vadim Shtayura08049e22017-10-11 00:14:52 +0000607 @classmethod
608 def BreakLocks(cls, path):
609 did_unlock = False
610 lf = Lockfile(path)
611 if lf.break_lock():
612 did_unlock = True
613 # Look for lock files that might have been left behind by an interrupted
614 # git process.
615 lf = os.path.join(path, 'config.lock')
616 if os.path.exists(lf):
617 os.remove(lf)
618 did_unlock = True
619 cls.DeleteTmpPackFiles(path)
620 return did_unlock
621
622 def unlock(self):
623 return self.BreakLocks(self.mirror_path)
624
625 @classmethod
626 def UnlockAll(cls):
627 cachepath = cls.GetCachePath()
628 if not cachepath:
629 return
630 dirlist = os.listdir(cachepath)
631 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
632 if os.path.isdir(os.path.join(cachepath, path))])
633 for dirent in dirlist:
634 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
635 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
636 elif (dirent.endswith('.lock') and
637 os.path.isfile(os.path.join(cachepath, dirent))):
638 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
639
640 unlocked_repos = []
641 for repo_dir in repo_dirs:
642 if cls.BreakLocks(repo_dir):
643 unlocked_repos.append(repo_dir)
644
645 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000646
agable@chromium.org5a306a22014-02-24 22:13:59 +0000647@subcommand.usage('[url of repo to check for caching]')
648def CMDexists(parser, args):
649 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000650 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000651 if not len(args) == 1:
652 parser.error('git cache exists only takes exactly one repo url.')
653 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000654 mirror = Mirror(url)
655 if mirror.exists():
656 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000657 return 0
658 return 1
659
660
hinoka@google.com563559c2014-04-02 00:36:24 +0000661@subcommand.usage('[url of repo to create a bootstrap zip file]')
662def CMDupdate_bootstrap(parser, args):
663 """Create and uploads a bootstrap tarball."""
664 # Lets just assert we can't do this on Windows.
665 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000666 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000667 return 1
668
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000669 parser.add_option('--prune', action='store_true',
670 help='Prune all other cached zipballs of the same repo.')
671
hinoka@google.com563559c2014-04-02 00:36:24 +0000672 # First, we need to ensure the cache is populated.
673 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000674 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000675 CMDpopulate(parser, populate_args)
676
677 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000678 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000679 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000680 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000681 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000682 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000683
684
agable@chromium.org5a306a22014-02-24 22:13:59 +0000685@subcommand.usage('[url of repo to add to or update in cache]')
686def CMDpopulate(parser, args):
687 """Ensure that the cache has all up-to-date objects for the given repo."""
688 parser.add_option('--depth', type='int',
689 help='Only cache DEPTH commits of history')
690 parser.add_option('--shallow', '-s', action='store_true',
691 help='Only cache 10000 commits of history')
692 parser.add_option('--ref', action='append',
693 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000694 parser.add_option('--no_bootstrap', '--no-bootstrap',
695 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000696 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000697 parser.add_option('--ignore_locks', '--ignore-locks',
698 action='store_true',
699 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000700 parser.add_option('--reset-fetch-config', action='store_true', default=False,
701 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000702
agable@chromium.org5a306a22014-02-24 22:13:59 +0000703 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000704 if not len(args) == 1:
705 parser.error('git cache populate only takes exactly one repo url.')
706 url = args[0]
707
szager@chromium.org848fd492014-04-09 19:06:44 +0000708 mirror = Mirror(url, refs=options.ref)
709 kwargs = {
710 'verbose': options.verbose,
711 'shallow': options.shallow,
712 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000713 'ignore_lock': options.ignore_locks,
714 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000715 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000716 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000717 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000718 kwargs['depth'] = options.depth
719 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000720
721
szager@chromium.orgf3145112014-08-07 21:02:36 +0000722@subcommand.usage('Fetch new commits into cache and current checkout')
723def CMDfetch(parser, args):
724 """Update mirror, and fetch in cwd."""
725 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000726 parser.add_option('--no_bootstrap', '--no-bootstrap',
727 action='store_true',
728 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000729 options, args = parser.parse_args(args)
730
731 # Figure out which remotes to fetch. This mimics the behavior of regular
732 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
733 # this will NOT try to traverse up the branching structure to find the
734 # ultimate remote to update.
735 remotes = []
736 if options.all:
737 assert not args, 'fatal: fetch --all does not take a repository argument'
738 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
739 elif args:
740 remotes = args
741 else:
742 current_branch = subprocess.check_output(
743 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
744 if current_branch != 'HEAD':
745 upstream = subprocess.check_output(
746 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
747 ).strip()
748 if upstream and upstream != '.':
749 remotes = [upstream]
750 if not remotes:
751 remotes = ['origin']
752
753 cachepath = Mirror.GetCachePath()
754 git_dir = os.path.abspath(subprocess.check_output(
755 [Mirror.git_exe, 'rev-parse', '--git-dir']))
756 git_dir = os.path.abspath(git_dir)
757 if git_dir.startswith(cachepath):
758 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000759 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000760 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000761 return 0
762 for remote in remotes:
763 remote_url = subprocess.check_output(
764 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
765 if remote_url.startswith(cachepath):
766 mirror = Mirror.FromPath(remote_url)
767 mirror.print = lambda *args: None
768 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000769 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000770 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000771 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
772 return 0
773
774
Vadim Shtayura08049e22017-10-11 00:14:52 +0000775@subcommand.usage('[url of repo to unlock, or -a|--all]')
776def CMDunlock(parser, args):
777 """Unlock one or all repos if their lock files are still around."""
778 parser.add_option('--force', '-f', action='store_true',
779 help='Actually perform the action')
780 parser.add_option('--all', '-a', action='store_true',
781 help='Unlock all repository caches')
782 options, args = parser.parse_args(args)
783 if len(args) > 1 or (len(args) == 0 and not options.all):
784 parser.error('git cache unlock takes exactly one repo url, or --all')
785
786 if not options.force:
787 cachepath = Mirror.GetCachePath()
788 lockfiles = [os.path.join(cachepath, path)
789 for path in os.listdir(cachepath)
790 if path.endswith('.lock') and os.path.isfile(path)]
791 parser.error('git cache unlock requires -f|--force to do anything. '
792 'Refusing to unlock the following repo caches: '
793 ', '.join(lockfiles))
794
795 unlocked_repos = []
796 if options.all:
797 unlocked_repos.extend(Mirror.UnlockAll())
798 else:
799 m = Mirror(args[0])
800 if m.unlock():
801 unlocked_repos.append(m.mirror_path)
802
803 if unlocked_repos:
804 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
805 unlocked_repos))
806
807
agable@chromium.org5a306a22014-02-24 22:13:59 +0000808class OptionParser(optparse.OptionParser):
809 """Wrapper class for OptionParser to handle global options."""
810
811 def __init__(self, *args, **kwargs):
812 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
813 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000814 help=(
815 'Path to the directory containing the caches. Normally '
816 'deduced from git config cache.cachepath or '
817 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000818 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000819 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000820 self.add_option('-q', '--quiet', action='store_true',
821 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000822 self.add_option('--timeout', type='int', default=0,
823 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000824
825 def parse_args(self, args=None, values=None):
826 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000827 if options.quiet:
828 options.verbose = 0
829
830 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
831 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000832
833 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000834 global_cache_dir = Mirror.GetCachePath()
835 except RuntimeError:
836 global_cache_dir = None
837 if options.cache_dir:
838 if global_cache_dir and (
839 os.path.abspath(options.cache_dir) !=
840 os.path.abspath(global_cache_dir)):
841 logging.warn('Overriding globally-configured cache directory.')
842 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000843
agable@chromium.org5a306a22014-02-24 22:13:59 +0000844 return options, args
845
846
847def main(argv):
848 dispatcher = subcommand.CommandDispatcher(__name__)
849 return dispatcher.execute(OptionParser(), argv)
850
851
852if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000853 try:
854 sys.exit(main(sys.argv[1:]))
855 except KeyboardInterrupt:
856 sys.stderr.write('interrupted\n')
857 sys.exit(1)