blob: 6554da8d7617434843c8acf767b9a7e9d1c59492 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -08009import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000010import errno
11import logging
12import optparse
13import os
szager@chromium.org174766f2014-05-13 21:27:46 +000014import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000016import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000017import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import subprocess
19import sys
20import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000021import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000022
hinoka@google.com563559c2014-04-02 00:36:24 +000023from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000024import gclient_utils
25import subcommand
26
szager@chromium.org301a7c32014-06-16 17:13:50 +000027# Analogous to gc.autopacklimit git config.
28GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090029
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000030GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
31
szager@chromium.org848fd492014-04-09 19:06:44 +000032try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080033 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000034 WinErr = WindowsError
35except NameError:
36 class WinErr(Exception):
37 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000038
Vadim Shtayura08049e22017-10-11 00:14:52 +000039class LockError(Exception):
40 pass
41
hinokadcd84042016-06-09 14:26:17 -070042class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000043 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
dnj4625b5a2016-11-10 18:23:26 -080045
46def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
47 sleep_time=0.25, printerr=None):
48 """Executes |fn| up to |count| times, backing off exponentially.
49
50 Args:
51 fn (callable): The function to execute. If this raises a handled
52 exception, the function will retry with exponential backoff.
53 excs (tuple): A tuple of Exception types to handle. If one of these is
54 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
55 that is not in this list, it will immediately pass through. If |excs|
56 is empty, the Exception base class will be used.
57 name (str): Optional operation name to print in the retry string.
58 count (int): The number of times to try before allowing the exception to
59 pass through.
60 sleep_time (float): The initial number of seconds to sleep in between
61 retries. This will be doubled each retry.
62 printerr (callable): Function that will be called with the error string upon
63 failures. If None, |logging.warning| will be used.
64
65 Returns: The return value of the successful fn.
66 """
67 printerr = printerr or logging.warning
68 for i in xrange(count):
69 try:
70 return fn()
71 except excs as e:
72 if (i+1) >= count:
73 raise
74
75 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
76 (name or 'operation'), sleep_time, (i+1), count, e))
77 time.sleep(sleep_time)
78 sleep_time *= 2
79
80
Vadim Shtayura08049e22017-10-11 00:14:52 +000081class Lockfile(object):
82 """Class to represent a cross-platform process-specific lockfile."""
83
84 def __init__(self, path, timeout=0):
85 self.path = os.path.abspath(path)
86 self.timeout = timeout
87 self.lockfile = self.path + ".lock"
88 self.pid = os.getpid()
89
90 def _read_pid(self):
91 """Read the pid stored in the lockfile.
92
93 Note: This method is potentially racy. By the time it returns the lockfile
94 may have been unlocked, removed, or stolen by some other process.
95 """
96 try:
97 with open(self.lockfile, 'r') as f:
98 pid = int(f.readline().strip())
99 except (IOError, ValueError):
100 pid = None
101 return pid
102
103 def _make_lockfile(self):
104 """Safely creates a lockfile containing the current pid."""
105 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
106 fd = os.open(self.lockfile, open_flags, 0o644)
107 f = os.fdopen(fd, 'w')
108 print(self.pid, file=f)
109 f.close()
110
111 def _remove_lockfile(self):
112 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
113
114 See gclient_utils.py:rmtree docstring for more explanation on the
115 windows case.
116 """
117 if sys.platform == 'win32':
118 lockfile = os.path.normcase(self.lockfile)
119
120 def delete():
121 exitcode = subprocess.call(['cmd.exe', '/c',
122 'del', '/f', '/q', lockfile])
123 if exitcode != 0:
124 raise LockError('Failed to remove lock: %s' % (lockfile,))
125 exponential_backoff_retry(
126 delete,
127 excs=(LockError,),
128 name='del [%s]' % (lockfile,))
129 else:
130 os.remove(self.lockfile)
131
132 def lock(self):
133 """Acquire the lock.
134
135 This will block with a deadline of self.timeout seconds.
136 """
137 elapsed = 0
138 while True:
139 try:
140 self._make_lockfile()
141 return
142 except OSError as e:
143 if elapsed < self.timeout:
144 sleep_time = max(10, min(3, self.timeout - elapsed))
145 logging.info('Could not create git cache lockfile; '
146 'will retry after sleep(%d).', sleep_time);
147 elapsed += sleep_time
148 time.sleep(sleep_time)
149 continue
150 if e.errno == errno.EEXIST:
151 raise LockError("%s is already locked" % self.path)
152 else:
153 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
154
155 def unlock(self):
156 """Release the lock."""
157 try:
158 if not self.is_locked():
159 raise LockError("%s is not locked" % self.path)
160 if not self.i_am_locking():
161 raise LockError("%s is locked, but not by me" % self.path)
162 self._remove_lockfile()
163 except WinErr:
164 # Windows is unreliable when it comes to file locking. YMMV.
165 pass
166
167 def break_lock(self):
168 """Remove the lock, even if it was created by someone else."""
169 try:
170 self._remove_lockfile()
171 return True
172 except OSError as exc:
173 if exc.errno == errno.ENOENT:
174 return False
175 else:
176 raise
177
178 def is_locked(self):
179 """Test if the file is locked by anyone.
180
181 Note: This method is potentially racy. By the time it returns the lockfile
182 may have been unlocked, removed, or stolen by some other process.
183 """
184 return os.path.exists(self.lockfile)
185
186 def i_am_locking(self):
187 """Test if the file is locked by this process."""
188 return self.is_locked() and self.pid == self._read_pid()
189
190
szager@chromium.org848fd492014-04-09 19:06:44 +0000191class Mirror(object):
192
193 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
194 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000195 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000196 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000197
Robert Iannuccia19649b2018-06-29 16:31:45 +0000198 UNSET_CACHEPATH = object()
199
200 # Used for tests
201 _GIT_CONFIG_LOCATION = []
202
szager@chromium.org66c8b852015-09-22 23:19:07 +0000203 @staticmethod
204 def parse_fetch_spec(spec):
205 """Parses and canonicalizes a fetch spec.
206
207 Returns (fetchspec, value_regex), where value_regex can be used
208 with 'git config --replace-all'.
209 """
210 parts = spec.split(':', 1)
211 src = parts[0].lstrip('+').rstrip('/')
212 if not src.startswith('refs/'):
213 src = 'refs/heads/%s' % src
214 dest = parts[1].rstrip('/') if len(parts) > 1 else src
215 regex = r'\+%s:.*' % src.replace('*', r'\*')
216 return ('+%s:%s' % (src, dest), regex)
217
szager@chromium.org848fd492014-04-09 19:06:44 +0000218 def __init__(self, url, refs=None, print_func=None):
219 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000220 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000221 self.basedir = self.UrlToCacheDir(url)
222 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000223 if print_func:
224 self.print = self.print_without_file
225 self.print_func = print_func
226 else:
227 self.print = print
228
dnj4625b5a2016-11-10 18:23:26 -0800229 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000230 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000231
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800232 @contextlib.contextmanager
233 def print_duration_of(self, what):
234 start = time.time()
235 try:
236 yield
237 finally:
238 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
239
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000240 @property
241 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700242 u = urlparse.urlparse(self.url)
243 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000244 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700245 elif u.netloc == 'chrome-internal.googlesource.com':
246 return 'chrome-git-cache'
247 # Not recognized.
248 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000249
szager@chromium.org174766f2014-05-13 21:27:46 +0000250 @classmethod
251 def FromPath(cls, path):
252 return cls(cls.CacheDirToUrl(path))
253
szager@chromium.org848fd492014-04-09 19:06:44 +0000254 @staticmethod
255 def UrlToCacheDir(url):
256 """Convert a git url to a normalized form for the cache dir path."""
257 parsed = urlparse.urlparse(url)
258 norm_url = parsed.netloc + parsed.path
259 if norm_url.endswith('.git'):
260 norm_url = norm_url[:-len('.git')]
261 return norm_url.replace('-', '--').replace('/', '-').lower()
262
263 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000264 def CacheDirToUrl(path):
265 """Convert a cache dir path to its corresponding url."""
266 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
267 return 'https://%s' % netpath
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @classmethod
270 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000271 with cls.cachepath_lock:
272 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000273
274 @classmethod
275 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000276 with cls.cachepath_lock:
277 if not hasattr(cls, 'cachepath'):
278 try:
279 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000280 [cls.git_exe, 'config'] +
281 cls._GIT_CONFIG_LOCATION +
282 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000283 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000284 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000285 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000286
287 ret = getattr(cls, 'cachepath')
288 if ret is cls.UNSET_CACHEPATH:
289 raise RuntimeError('No cache.cachepath git configuration or '
290 '$GIT_CACHE_PATH is set.')
291 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
dnj4625b5a2016-11-10 18:23:26 -0800293 def Rename(self, src, dst):
294 # This is somehow racy on Windows.
295 # Catching OSError because WindowsError isn't portable and
296 # pylint complains.
297 exponential_backoff_retry(
298 lambda: os.rename(src, dst),
299 excs=(OSError,),
300 name='rename [%s] => [%s]' % (src, dst),
301 printerr=self.print)
302
szager@chromium.org848fd492014-04-09 19:06:44 +0000303 def RunGit(self, cmd, **kwargs):
304 """Run git in a subprocess."""
305 cwd = kwargs.setdefault('cwd', self.mirror_path)
306 kwargs.setdefault('print_stdout', False)
307 kwargs.setdefault('filter_fn', self.print)
308 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
309 env.setdefault('GIT_ASKPASS', 'true')
310 env.setdefault('SSH_ASKPASS', 'true')
311 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
312 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
313
Edward Lemur579c9862018-07-13 23:17:51 +0000314 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000315 if cwd is None:
316 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000317
Edward Lemur579c9862018-07-13 23:17:51 +0000318 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000319 try:
320 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
321 except subprocess.CalledProcessError as e:
322 # If exit code was 5, it means we attempted to unset a config that
323 # didn't exist. Ignore it.
324 if e.returncode != 5:
325 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000326
szager@chromium.org301a7c32014-06-16 17:13:50 +0000327 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700328 try:
329 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
330 except subprocess.CalledProcessError:
331 # Hard error, need to clobber.
332 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000333
334 # Don't combine pack files into one big pack file. It's really slow for
335 # repositories, and there's no way to track progress and make sure it's
336 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700337 if self.supported_project():
338 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000339
340 # Allocate more RAM for cache-ing delta chains, for better performance
341 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000342 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000343 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000344
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000345 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000346 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000347 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000348 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000349 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000350 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000351 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000352
353 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800354 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000355
356 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
357 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700358 if not self.bootstrap_bucket:
359 return False
hinoka@google.com776a2c32014-04-25 07:54:25 +0000360 python_fallback = False
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000361 if (sys.platform.startswith('win') and
362 not gclient_utils.FindExecutable('7z')):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000363 python_fallback = True
364 elif sys.platform.startswith('darwin'):
365 # The OSX version of unzip doesn't support zip64.
366 python_fallback = True
sbc@chromium.org9d0644d2015-06-05 23:16:54 +0000367 elif not gclient_utils.FindExecutable('unzip'):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000368 python_fallback = True
szager@chromium.org848fd492014-04-09 19:06:44 +0000369
370 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000371 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000372 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800373 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
szager@chromium.org848fd492014-04-09 19:06:44 +0000374 ls_out_sorted = sorted(ls_out.splitlines())
375 if not ls_out_sorted:
376 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800377 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
378 (self.mirror_path, self.bootstrap_bucket,
379 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000380 return False
381 latest_checkout = ls_out_sorted[-1]
382
383 # Download zip file to a temporary directory.
384 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000385 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000386 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800387 with self.print_duration_of('download'):
388 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000389 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000390 return False
391 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
392
hinoka@google.com776a2c32014-04-25 07:54:25 +0000393 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800394 with self.print_duration_of('unzip'):
395 if not python_fallback:
396 if sys.platform.startswith('win'):
397 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
398 else:
399 cmd = ['unzip', filename, '-d', directory]
400 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000401 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800402 try:
403 with zipfile.ZipFile(filename, 'r') as f:
404 f.printdir()
405 f.extractall(directory)
406 except Exception as e:
407 self.print('Encountered error: %s' % str(e), file=sys.stderr)
408 retcode = 1
409 else:
410 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000411 finally:
412 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800413 #
414 # This is somehow racy on Windows.
415 # Catching OSError because WindowsError isn't portable and
416 # pylint complains.
417 exponential_backoff_retry(
418 lambda: gclient_utils.rm_file_or_tree(tempdir),
419 excs=(OSError,),
420 name='rmtree [%s]' % (tempdir,),
421 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000422
423 if retcode:
424 self.print(
425 'Extracting bootstrap zipfile %s failed.\n'
426 'Resuming normal operations.' % filename)
427 return False
428 return True
429
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800430 def contains_revision(self, revision):
431 if not self.exists():
432 return False
433
434 if sys.platform.startswith('win'):
435 # Windows .bat scripts use ^ as escape sequence, which means we have to
436 # escape it with itself for every .bat invocation.
437 needle = '%s^^^^{commit}' % revision
438 else:
439 needle = '%s^{commit}' % revision
440 try:
441 # cat-file exits with 0 on success, that is git object of given hash was
442 # found.
443 self.RunGit(['cat-file', '-e', needle])
444 return True
445 except subprocess.CalledProcessError:
446 return False
447
szager@chromium.org848fd492014-04-09 19:06:44 +0000448 def exists(self):
449 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
450
Ryan Tseng3beabd02017-03-15 13:57:58 -0700451 def supported_project(self):
452 """Returns true if this repo is known to have a bootstrap zip file."""
453 u = urlparse.urlparse(self.url)
454 return u.netloc in [
455 'chromium.googlesource.com',
456 'chrome-internal.googlesource.com']
457
szager@chromium.org66c8b852015-09-22 23:19:07 +0000458 def _preserve_fetchspec(self):
459 """Read and preserve remote.origin.fetch from an existing mirror.
460
461 This modifies self.fetch_specs.
462 """
463 if not self.exists():
464 return
465 try:
466 config_fetchspecs = subprocess.check_output(
467 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
468 cwd=self.mirror_path)
469 for fetchspec in config_fetchspecs.splitlines():
470 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
471 except subprocess.CalledProcessError:
472 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
473 'existing cache directory. You may need to manually edit '
474 '%s and "git cache fetch" again.'
475 % os.path.join(self.mirror_path, 'config'))
476
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000477 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
478 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000479 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
480 pack_files = []
481
482 if os.path.isdir(pack_dir):
483 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800484 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
485 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000486
487 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000488 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000489 len(pack_files) > GC_AUTOPACKLIMIT)
490 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000491 if self.exists():
492 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
493 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000494 tempdir = tempfile.mkdtemp(
495 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
496 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
497 if bootstrapped:
498 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000499 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700500 elif not self.exists() or not self.supported_project():
501 # Bootstrap failed due to either
502 # 1. No previous cache
503 # 2. Project doesn't have a bootstrap zip file
504 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000505 self.RunGit(['init', '--bare'], cwd=tempdir)
506 else:
507 # Bootstrap failed, previous cache exists; warn and continue.
508 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800509 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
510 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000511 % len(pack_files))
512 gclient_utils.rmtree(tempdir)
513 tempdir = None
514 else:
515 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
516 logging.warn(
517 'Shallow fetch requested, but repo cache already exists.')
518 return tempdir
519
Edward Lemur579c9862018-07-13 23:17:51 +0000520 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
521 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000522 v = []
523 d = []
524 if verbose:
525 v = ['-v', '--progress']
526 if depth:
527 d = ['--depth', str(depth)]
528 fetch_cmd = ['fetch'] + v + d + ['origin']
529 fetch_specs = subprocess.check_output(
530 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
531 cwd=rundir).strip().splitlines()
532 for spec in fetch_specs:
533 try:
534 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800535 with self.print_duration_of('fetch %s' % spec):
536 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000537 except subprocess.CalledProcessError:
538 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700539 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000540 logging.warn('Fetch of %s failed' % spec)
541
Vadim Shtayura08049e22017-10-11 00:14:52 +0000542 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000543 verbose=False, ignore_lock=False, lock_timeout=0,
544 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000545 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000546 if shallow and not depth:
547 depth = 10000
548 gclient_utils.safe_makedirs(self.GetCachePath())
549
Vadim Shtayura08049e22017-10-11 00:14:52 +0000550 lockfile = Lockfile(self.mirror_path, lock_timeout)
551 if not ignore_lock:
552 lockfile.lock()
553
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000554 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000555 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000556 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000557 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000558 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700559 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000560 # This is a major failure, we need to clean and force a bootstrap.
561 gclient_utils.rmtree(rundir)
562 self.print(GIT_CACHE_CORRUPT_MESSAGE)
563 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
564 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000565 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000566 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000567 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800568 if os.path.exists(self.mirror_path):
569 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800570 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000571 if not ignore_lock:
572 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000573
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000574 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000575 # The files are named <git number>.zip
576 gen_number = subprocess.check_output(
577 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000578 # Run Garbage Collect to compress packfile.
579 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000580 # Creating a temp file and then deleting it ensures we can use this name.
581 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
582 os.remove(tmp_zipfile)
583 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
584 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000585 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
586 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000587 gsutil.call('cp', tmp_zipfile, dest_name)
588 os.remove(tmp_zipfile)
589
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000590 # Remove all other files in the same directory.
591 if prune:
592 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
593 for filename in ls_out.splitlines():
594 if filename == dest_name:
595 continue
596 gsutil.call('rm', filename)
597
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000598 @staticmethod
599 def DeleteTmpPackFiles(path):
600 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000601 if not os.path.isdir(pack_dir):
602 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000603 pack_files = [f for f in os.listdir(pack_dir) if
604 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
605 for f in pack_files:
606 f = os.path.join(pack_dir, f)
607 try:
608 os.remove(f)
609 logging.warn('Deleted stale temporary pack file %s' % f)
610 except OSError:
611 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000612
Vadim Shtayura08049e22017-10-11 00:14:52 +0000613 @classmethod
614 def BreakLocks(cls, path):
615 did_unlock = False
616 lf = Lockfile(path)
617 if lf.break_lock():
618 did_unlock = True
619 # Look for lock files that might have been left behind by an interrupted
620 # git process.
621 lf = os.path.join(path, 'config.lock')
622 if os.path.exists(lf):
623 os.remove(lf)
624 did_unlock = True
625 cls.DeleteTmpPackFiles(path)
626 return did_unlock
627
628 def unlock(self):
629 return self.BreakLocks(self.mirror_path)
630
631 @classmethod
632 def UnlockAll(cls):
633 cachepath = cls.GetCachePath()
634 if not cachepath:
635 return
636 dirlist = os.listdir(cachepath)
637 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
638 if os.path.isdir(os.path.join(cachepath, path))])
639 for dirent in dirlist:
640 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
641 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
642 elif (dirent.endswith('.lock') and
643 os.path.isfile(os.path.join(cachepath, dirent))):
644 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
645
646 unlocked_repos = []
647 for repo_dir in repo_dirs:
648 if cls.BreakLocks(repo_dir):
649 unlocked_repos.append(repo_dir)
650
651 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000652
agable@chromium.org5a306a22014-02-24 22:13:59 +0000653@subcommand.usage('[url of repo to check for caching]')
654def CMDexists(parser, args):
655 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000656 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000657 if not len(args) == 1:
658 parser.error('git cache exists only takes exactly one repo url.')
659 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000660 mirror = Mirror(url)
661 if mirror.exists():
662 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000663 return 0
664 return 1
665
666
hinoka@google.com563559c2014-04-02 00:36:24 +0000667@subcommand.usage('[url of repo to create a bootstrap zip file]')
668def CMDupdate_bootstrap(parser, args):
669 """Create and uploads a bootstrap tarball."""
670 # Lets just assert we can't do this on Windows.
671 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000672 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000673 return 1
674
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000675 parser.add_option('--prune', action='store_true',
676 help='Prune all other cached zipballs of the same repo.')
677
hinoka@google.com563559c2014-04-02 00:36:24 +0000678 # First, we need to ensure the cache is populated.
679 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000680 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000681 CMDpopulate(parser, populate_args)
682
683 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000684 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000685 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000686 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000687 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000688 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000689
690
agable@chromium.org5a306a22014-02-24 22:13:59 +0000691@subcommand.usage('[url of repo to add to or update in cache]')
692def CMDpopulate(parser, args):
693 """Ensure that the cache has all up-to-date objects for the given repo."""
694 parser.add_option('--depth', type='int',
695 help='Only cache DEPTH commits of history')
696 parser.add_option('--shallow', '-s', action='store_true',
697 help='Only cache 10000 commits of history')
698 parser.add_option('--ref', action='append',
699 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000700 parser.add_option('--no_bootstrap', '--no-bootstrap',
701 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000702 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000703 parser.add_option('--ignore_locks', '--ignore-locks',
704 action='store_true',
705 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000706 parser.add_option('--reset-fetch-config', action='store_true', default=False,
707 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000708
agable@chromium.org5a306a22014-02-24 22:13:59 +0000709 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000710 if not len(args) == 1:
711 parser.error('git cache populate only takes exactly one repo url.')
712 url = args[0]
713
szager@chromium.org848fd492014-04-09 19:06:44 +0000714 mirror = Mirror(url, refs=options.ref)
715 kwargs = {
716 'verbose': options.verbose,
717 'shallow': options.shallow,
718 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000719 'ignore_lock': options.ignore_locks,
720 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000721 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000722 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000723 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000724 kwargs['depth'] = options.depth
725 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000726
727
szager@chromium.orgf3145112014-08-07 21:02:36 +0000728@subcommand.usage('Fetch new commits into cache and current checkout')
729def CMDfetch(parser, args):
730 """Update mirror, and fetch in cwd."""
731 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000732 parser.add_option('--no_bootstrap', '--no-bootstrap',
733 action='store_true',
734 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000735 options, args = parser.parse_args(args)
736
737 # Figure out which remotes to fetch. This mimics the behavior of regular
738 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
739 # this will NOT try to traverse up the branching structure to find the
740 # ultimate remote to update.
741 remotes = []
742 if options.all:
743 assert not args, 'fatal: fetch --all does not take a repository argument'
744 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
745 elif args:
746 remotes = args
747 else:
748 current_branch = subprocess.check_output(
749 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
750 if current_branch != 'HEAD':
751 upstream = subprocess.check_output(
752 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
753 ).strip()
754 if upstream and upstream != '.':
755 remotes = [upstream]
756 if not remotes:
757 remotes = ['origin']
758
759 cachepath = Mirror.GetCachePath()
760 git_dir = os.path.abspath(subprocess.check_output(
761 [Mirror.git_exe, 'rev-parse', '--git-dir']))
762 git_dir = os.path.abspath(git_dir)
763 if git_dir.startswith(cachepath):
764 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000765 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000766 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000767 return 0
768 for remote in remotes:
769 remote_url = subprocess.check_output(
770 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
771 if remote_url.startswith(cachepath):
772 mirror = Mirror.FromPath(remote_url)
773 mirror.print = lambda *args: None
774 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000775 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000776 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000777 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
778 return 0
779
780
Vadim Shtayura08049e22017-10-11 00:14:52 +0000781@subcommand.usage('[url of repo to unlock, or -a|--all]')
782def CMDunlock(parser, args):
783 """Unlock one or all repos if their lock files are still around."""
784 parser.add_option('--force', '-f', action='store_true',
785 help='Actually perform the action')
786 parser.add_option('--all', '-a', action='store_true',
787 help='Unlock all repository caches')
788 options, args = parser.parse_args(args)
789 if len(args) > 1 or (len(args) == 0 and not options.all):
790 parser.error('git cache unlock takes exactly one repo url, or --all')
791
792 if not options.force:
793 cachepath = Mirror.GetCachePath()
794 lockfiles = [os.path.join(cachepath, path)
795 for path in os.listdir(cachepath)
796 if path.endswith('.lock') and os.path.isfile(path)]
797 parser.error('git cache unlock requires -f|--force to do anything. '
798 'Refusing to unlock the following repo caches: '
799 ', '.join(lockfiles))
800
801 unlocked_repos = []
802 if options.all:
803 unlocked_repos.extend(Mirror.UnlockAll())
804 else:
805 m = Mirror(args[0])
806 if m.unlock():
807 unlocked_repos.append(m.mirror_path)
808
809 if unlocked_repos:
810 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
811 unlocked_repos))
812
813
agable@chromium.org5a306a22014-02-24 22:13:59 +0000814class OptionParser(optparse.OptionParser):
815 """Wrapper class for OptionParser to handle global options."""
816
817 def __init__(self, *args, **kwargs):
818 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
819 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000820 help=(
821 'Path to the directory containing the caches. Normally '
822 'deduced from git config cache.cachepath or '
823 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000824 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000825 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000826 self.add_option('-q', '--quiet', action='store_true',
827 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000828 self.add_option('--timeout', type='int', default=0,
829 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000830
831 def parse_args(self, args=None, values=None):
832 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000833 if options.quiet:
834 options.verbose = 0
835
836 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
837 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000838
839 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000840 global_cache_dir = Mirror.GetCachePath()
841 except RuntimeError:
842 global_cache_dir = None
843 if options.cache_dir:
844 if global_cache_dir and (
845 os.path.abspath(options.cache_dir) !=
846 os.path.abspath(global_cache_dir)):
847 logging.warn('Overriding globally-configured cache directory.')
848 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000849
agable@chromium.org5a306a22014-02-24 22:13:59 +0000850 return options, args
851
852
853def main(argv):
854 dispatcher = subcommand.CommandDispatcher(__name__)
855 return dispatcher.execute(OptionParser(), argv)
856
857
858if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000859 try:
860 sys.exit(main(sys.argv[1:]))
861 except KeyboardInterrupt:
862 sys.stderr.write('interrupted\n')
863 sys.exit(1)