blob: 4e67767dc26dac2cdc5483c54fc7f99719a0d715 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000016import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000017import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000018import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000019import subprocess
20import sys
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 13:57:58 -0700248 u = urlparse.urlparse(self.url)
249 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000250 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 elif u.netloc == 'chrome-internal.googlesource.com':
252 return 'chrome-git-cache'
253 # Not recognized.
254 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000255
szager@chromium.org174766f2014-05-13 21:27:46 +0000256 @classmethod
257 def FromPath(cls, path):
258 return cls(cls.CacheDirToUrl(path))
259
szager@chromium.org848fd492014-04-09 19:06:44 +0000260 @staticmethod
261 def UrlToCacheDir(url):
262 """Convert a git url to a normalized form for the cache dir path."""
263 parsed = urlparse.urlparse(url)
264 norm_url = parsed.netloc + parsed.path
265 if norm_url.endswith('.git'):
266 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000267
268 # Use the same dir for authenticated URLs and unauthenticated URLs.
269 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
270
szager@chromium.org848fd492014-04-09 19:06:44 +0000271 return norm_url.replace('-', '--').replace('/', '-').lower()
272
273 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000274 def CacheDirToUrl(path):
275 """Convert a cache dir path to its corresponding url."""
276 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
277 return 'https://%s' % netpath
278
szager@chromium.org848fd492014-04-09 19:06:44 +0000279 @classmethod
280 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000281 with cls.cachepath_lock:
282 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000283
284 @classmethod
285 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000286 with cls.cachepath_lock:
287 if not hasattr(cls, 'cachepath'):
288 try:
289 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000290 [cls.git_exe, 'config'] +
291 cls._GIT_CONFIG_LOCATION +
292 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000293 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000294 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000295 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000296
297 ret = getattr(cls, 'cachepath')
298 if ret is cls.UNSET_CACHEPATH:
299 raise RuntimeError('No cache.cachepath git configuration or '
300 '$GIT_CACHE_PATH is set.')
301 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000302
dnj4625b5a2016-11-10 18:23:26 -0800303 def Rename(self, src, dst):
304 # This is somehow racy on Windows.
305 # Catching OSError because WindowsError isn't portable and
306 # pylint complains.
307 exponential_backoff_retry(
308 lambda: os.rename(src, dst),
309 excs=(OSError,),
310 name='rename [%s] => [%s]' % (src, dst),
311 printerr=self.print)
312
szager@chromium.org848fd492014-04-09 19:06:44 +0000313 def RunGit(self, cmd, **kwargs):
314 """Run git in a subprocess."""
315 cwd = kwargs.setdefault('cwd', self.mirror_path)
316 kwargs.setdefault('print_stdout', False)
317 kwargs.setdefault('filter_fn', self.print)
318 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
319 env.setdefault('GIT_ASKPASS', 'true')
320 env.setdefault('SSH_ASKPASS', 'true')
321 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
322 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
323
Edward Lemur579c9862018-07-13 23:17:51 +0000324 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000325 if cwd is None:
326 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000327
Edward Lemur579c9862018-07-13 23:17:51 +0000328 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000329 try:
330 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
331 except subprocess.CalledProcessError as e:
332 # If exit code was 5, it means we attempted to unset a config that
333 # didn't exist. Ignore it.
334 if e.returncode != 5:
335 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000336
szager@chromium.org301a7c32014-06-16 17:13:50 +0000337 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700338 try:
339 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
340 except subprocess.CalledProcessError:
341 # Hard error, need to clobber.
342 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000343
344 # Don't combine pack files into one big pack file. It's really slow for
345 # repositories, and there's no way to track progress and make sure it's
346 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700347 if self.supported_project():
348 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000349
350 # Allocate more RAM for cache-ing delta chains, for better performance
351 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000352 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000353 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000354
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000355 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000356 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000357 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000358 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000359 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000360 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000361 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000362
363 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800364 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000365
366 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
367 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700368 if not self.bootstrap_bucket:
369 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25 +0000370 python_fallback = (
371 (sys.platform.startswith('win') and
372 not gclient_utils.FindExecutable('7z')) or
373 (not gclient_utils.FindExecutable('unzip')) or
374 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
375 )
szager@chromium.org848fd492014-04-09 19:06:44 +0000376
377 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000378 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000379 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800380 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000381
382 def compare_filenames(a, b):
383 # |a| and |b| look like gs://.../.../9999.zip. They both have the same
384 # gs://bootstrap_bucket/basedir/ prefix because they come from the same
385 # `gsutil ls`.
386 # This function only compares the numeral parts before .zip.
387 regex_pattern = r'/(\d+)\.zip$'
388 match_a = re.search(regex_pattern, a)
389 match_b = re.search(regex_pattern, b)
390 if (match_a is not None) and (match_b is not None):
391 num_a = int(match_a.group(1))
392 num_b = int(match_b.group(1))
393 return cmp(num_a, num_b)
394 # If it doesn't match the format, fallback to string comparison.
395 return cmp(a, b)
396
397 ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
szager@chromium.org848fd492014-04-09 19:06:44 +0000398 if not ls_out_sorted:
399 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800400 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
401 (self.mirror_path, self.bootstrap_bucket,
402 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000403 return False
404 latest_checkout = ls_out_sorted[-1]
405
406 # Download zip file to a temporary directory.
407 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000408 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000409 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800410 with self.print_duration_of('download'):
411 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000412 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000413 return False
414 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
415
hinoka@google.com776a2c32014-04-25 07:54:25 +0000416 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800417 with self.print_duration_of('unzip'):
418 if not python_fallback:
419 if sys.platform.startswith('win'):
420 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
421 else:
422 cmd = ['unzip', filename, '-d', directory]
423 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000424 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800425 try:
426 with zipfile.ZipFile(filename, 'r') as f:
427 f.printdir()
428 f.extractall(directory)
429 except Exception as e:
430 self.print('Encountered error: %s' % str(e), file=sys.stderr)
431 retcode = 1
432 else:
433 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000434 finally:
435 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800436 #
437 # This is somehow racy on Windows.
438 # Catching OSError because WindowsError isn't portable and
439 # pylint complains.
440 exponential_backoff_retry(
441 lambda: gclient_utils.rm_file_or_tree(tempdir),
442 excs=(OSError,),
443 name='rmtree [%s]' % (tempdir,),
444 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000445
446 if retcode:
447 self.print(
448 'Extracting bootstrap zipfile %s failed.\n'
449 'Resuming normal operations.' % filename)
450 return False
451 return True
452
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800453 def contains_revision(self, revision):
454 if not self.exists():
455 return False
456
457 if sys.platform.startswith('win'):
458 # Windows .bat scripts use ^ as escape sequence, which means we have to
459 # escape it with itself for every .bat invocation.
460 needle = '%s^^^^{commit}' % revision
461 else:
462 needle = '%s^{commit}' % revision
463 try:
464 # cat-file exits with 0 on success, that is git object of given hash was
465 # found.
466 self.RunGit(['cat-file', '-e', needle])
467 return True
468 except subprocess.CalledProcessError:
469 return False
470
szager@chromium.org848fd492014-04-09 19:06:44 +0000471 def exists(self):
472 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
473
Ryan Tseng3beabd02017-03-15 13:57:58 -0700474 def supported_project(self):
475 """Returns true if this repo is known to have a bootstrap zip file."""
476 u = urlparse.urlparse(self.url)
477 return u.netloc in [
478 'chromium.googlesource.com',
479 'chrome-internal.googlesource.com']
480
szager@chromium.org66c8b852015-09-22 23:19:07 +0000481 def _preserve_fetchspec(self):
482 """Read and preserve remote.origin.fetch from an existing mirror.
483
484 This modifies self.fetch_specs.
485 """
486 if not self.exists():
487 return
488 try:
489 config_fetchspecs = subprocess.check_output(
490 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
491 cwd=self.mirror_path)
492 for fetchspec in config_fetchspecs.splitlines():
493 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
494 except subprocess.CalledProcessError:
495 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
496 'existing cache directory. You may need to manually edit '
497 '%s and "git cache fetch" again.'
498 % os.path.join(self.mirror_path, 'config'))
499
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000500 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
501 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000502 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
503 pack_files = []
504
505 if os.path.isdir(pack_dir):
506 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800507 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
508 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000509
510 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000511 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000512 len(pack_files) > GC_AUTOPACKLIMIT)
513 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000514 if self.exists():
515 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
516 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000517 tempdir = tempfile.mkdtemp(
518 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
519 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
520 if bootstrapped:
521 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000522 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700523 elif not self.exists() or not self.supported_project():
524 # Bootstrap failed due to either
525 # 1. No previous cache
526 # 2. Project doesn't have a bootstrap zip file
527 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000528 self.RunGit(['init', '--bare'], cwd=tempdir)
529 else:
530 # Bootstrap failed, previous cache exists; warn and continue.
531 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800532 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
533 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000534 % len(pack_files))
535 gclient_utils.rmtree(tempdir)
536 tempdir = None
537 else:
538 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
539 logging.warn(
540 'Shallow fetch requested, but repo cache already exists.')
541 return tempdir
542
Edward Lemur579c9862018-07-13 23:17:51 +0000543 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
544 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000545 v = []
546 d = []
547 if verbose:
548 v = ['-v', '--progress']
549 if depth:
550 d = ['--depth', str(depth)]
551 fetch_cmd = ['fetch'] + v + d + ['origin']
552 fetch_specs = subprocess.check_output(
553 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
554 cwd=rundir).strip().splitlines()
555 for spec in fetch_specs:
556 try:
557 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800558 with self.print_duration_of('fetch %s' % spec):
559 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000560 except subprocess.CalledProcessError:
561 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700562 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000563 logging.warn('Fetch of %s failed' % spec)
564
Vadim Shtayura08049e22017-10-11 00:14:52 +0000565 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000566 verbose=False, ignore_lock=False, lock_timeout=0,
567 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000568 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000569 if shallow and not depth:
570 depth = 10000
571 gclient_utils.safe_makedirs(self.GetCachePath())
572
Vadim Shtayura08049e22017-10-11 00:14:52 +0000573 lockfile = Lockfile(self.mirror_path, lock_timeout)
574 if not ignore_lock:
575 lockfile.lock()
576
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000577 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000578 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000579 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000580 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000581 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700582 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000583 # This is a major failure, we need to clean and force a bootstrap.
584 gclient_utils.rmtree(rundir)
585 self.print(GIT_CACHE_CORRUPT_MESSAGE)
586 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
587 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000588 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000589 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000590 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800591 if os.path.exists(self.mirror_path):
592 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800593 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000594 if not ignore_lock:
595 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000596
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000597 def update_bootstrap(self, prune=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000598 # The files are named <git number>.zip
599 gen_number = subprocess.check_output(
600 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000601 # Run Garbage Collect to compress packfile.
602 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000603 # Creating a temp file and then deleting it ensures we can use this name.
604 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
605 os.remove(tmp_zipfile)
606 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
607 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000608 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
609 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000610 gsutil.call('cp', tmp_zipfile, dest_name)
611 os.remove(tmp_zipfile)
612
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000613 # Remove all other files in the same directory.
614 if prune:
615 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
616 for filename in ls_out.splitlines():
617 if filename == dest_name:
618 continue
619 gsutil.call('rm', filename)
620
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000621 @staticmethod
622 def DeleteTmpPackFiles(path):
623 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000624 if not os.path.isdir(pack_dir):
625 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000626 pack_files = [f for f in os.listdir(pack_dir) if
627 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
628 for f in pack_files:
629 f = os.path.join(pack_dir, f)
630 try:
631 os.remove(f)
632 logging.warn('Deleted stale temporary pack file %s' % f)
633 except OSError:
634 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000635
Vadim Shtayura08049e22017-10-11 00:14:52 +0000636 @classmethod
637 def BreakLocks(cls, path):
638 did_unlock = False
639 lf = Lockfile(path)
640 if lf.break_lock():
641 did_unlock = True
642 # Look for lock files that might have been left behind by an interrupted
643 # git process.
644 lf = os.path.join(path, 'config.lock')
645 if os.path.exists(lf):
646 os.remove(lf)
647 did_unlock = True
648 cls.DeleteTmpPackFiles(path)
649 return did_unlock
650
651 def unlock(self):
652 return self.BreakLocks(self.mirror_path)
653
654 @classmethod
655 def UnlockAll(cls):
656 cachepath = cls.GetCachePath()
657 if not cachepath:
658 return
659 dirlist = os.listdir(cachepath)
660 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
661 if os.path.isdir(os.path.join(cachepath, path))])
662 for dirent in dirlist:
663 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
664 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
665 elif (dirent.endswith('.lock') and
666 os.path.isfile(os.path.join(cachepath, dirent))):
667 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
668
669 unlocked_repos = []
670 for repo_dir in repo_dirs:
671 if cls.BreakLocks(repo_dir):
672 unlocked_repos.append(repo_dir)
673
674 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000675
agable@chromium.org5a306a22014-02-24 22:13:59 +0000676@subcommand.usage('[url of repo to check for caching]')
677def CMDexists(parser, args):
678 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000679 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000680 if not len(args) == 1:
681 parser.error('git cache exists only takes exactly one repo url.')
682 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000683 mirror = Mirror(url)
684 if mirror.exists():
685 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000686 return 0
687 return 1
688
689
hinoka@google.com563559c2014-04-02 00:36:24 +0000690@subcommand.usage('[url of repo to create a bootstrap zip file]')
691def CMDupdate_bootstrap(parser, args):
692 """Create and uploads a bootstrap tarball."""
693 # Lets just assert we can't do this on Windows.
694 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000695 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000696 return 1
697
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000698 parser.add_option('--prune', action='store_true',
699 help='Prune all other cached zipballs of the same repo.')
700
hinoka@google.com563559c2014-04-02 00:36:24 +0000701 # First, we need to ensure the cache is populated.
702 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000703 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000704 CMDpopulate(parser, populate_args)
705
706 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000707 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000708 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000709 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000710 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000711 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000712
713
agable@chromium.org5a306a22014-02-24 22:13:59 +0000714@subcommand.usage('[url of repo to add to or update in cache]')
715def CMDpopulate(parser, args):
716 """Ensure that the cache has all up-to-date objects for the given repo."""
717 parser.add_option('--depth', type='int',
718 help='Only cache DEPTH commits of history')
719 parser.add_option('--shallow', '-s', action='store_true',
720 help='Only cache 10000 commits of history')
721 parser.add_option('--ref', action='append',
722 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000723 parser.add_option('--no_bootstrap', '--no-bootstrap',
724 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000725 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000726 parser.add_option('--ignore_locks', '--ignore-locks',
727 action='store_true',
728 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000729 parser.add_option('--reset-fetch-config', action='store_true', default=False,
730 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000731
agable@chromium.org5a306a22014-02-24 22:13:59 +0000732 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000733 if not len(args) == 1:
734 parser.error('git cache populate only takes exactly one repo url.')
735 url = args[0]
736
szager@chromium.org848fd492014-04-09 19:06:44 +0000737 mirror = Mirror(url, refs=options.ref)
738 kwargs = {
739 'verbose': options.verbose,
740 'shallow': options.shallow,
741 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000742 'ignore_lock': options.ignore_locks,
743 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000744 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000745 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000746 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000747 kwargs['depth'] = options.depth
748 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000749
750
szager@chromium.orgf3145112014-08-07 21:02:36 +0000751@subcommand.usage('Fetch new commits into cache and current checkout')
752def CMDfetch(parser, args):
753 """Update mirror, and fetch in cwd."""
754 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000755 parser.add_option('--no_bootstrap', '--no-bootstrap',
756 action='store_true',
757 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000758 options, args = parser.parse_args(args)
759
760 # Figure out which remotes to fetch. This mimics the behavior of regular
761 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
762 # this will NOT try to traverse up the branching structure to find the
763 # ultimate remote to update.
764 remotes = []
765 if options.all:
766 assert not args, 'fatal: fetch --all does not take a repository argument'
767 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
768 elif args:
769 remotes = args
770 else:
771 current_branch = subprocess.check_output(
772 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
773 if current_branch != 'HEAD':
774 upstream = subprocess.check_output(
775 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
776 ).strip()
777 if upstream and upstream != '.':
778 remotes = [upstream]
779 if not remotes:
780 remotes = ['origin']
781
782 cachepath = Mirror.GetCachePath()
783 git_dir = os.path.abspath(subprocess.check_output(
784 [Mirror.git_exe, 'rev-parse', '--git-dir']))
785 git_dir = os.path.abspath(git_dir)
786 if git_dir.startswith(cachepath):
787 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000788 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000789 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000790 return 0
791 for remote in remotes:
792 remote_url = subprocess.check_output(
793 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
794 if remote_url.startswith(cachepath):
795 mirror = Mirror.FromPath(remote_url)
796 mirror.print = lambda *args: None
797 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000798 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000799 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000800 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
801 return 0
802
803
Vadim Shtayura08049e22017-10-11 00:14:52 +0000804@subcommand.usage('[url of repo to unlock, or -a|--all]')
805def CMDunlock(parser, args):
806 """Unlock one or all repos if their lock files are still around."""
807 parser.add_option('--force', '-f', action='store_true',
808 help='Actually perform the action')
809 parser.add_option('--all', '-a', action='store_true',
810 help='Unlock all repository caches')
811 options, args = parser.parse_args(args)
812 if len(args) > 1 or (len(args) == 0 and not options.all):
813 parser.error('git cache unlock takes exactly one repo url, or --all')
814
815 if not options.force:
816 cachepath = Mirror.GetCachePath()
817 lockfiles = [os.path.join(cachepath, path)
818 for path in os.listdir(cachepath)
819 if path.endswith('.lock') and os.path.isfile(path)]
820 parser.error('git cache unlock requires -f|--force to do anything. '
821 'Refusing to unlock the following repo caches: '
822 ', '.join(lockfiles))
823
824 unlocked_repos = []
825 if options.all:
826 unlocked_repos.extend(Mirror.UnlockAll())
827 else:
828 m = Mirror(args[0])
829 if m.unlock():
830 unlocked_repos.append(m.mirror_path)
831
832 if unlocked_repos:
833 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
834 unlocked_repos))
835
836
agable@chromium.org5a306a22014-02-24 22:13:59 +0000837class OptionParser(optparse.OptionParser):
838 """Wrapper class for OptionParser to handle global options."""
839
840 def __init__(self, *args, **kwargs):
841 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
842 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000843 help=(
844 'Path to the directory containing the caches. Normally '
845 'deduced from git config cache.cachepath or '
846 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000847 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000848 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000849 self.add_option('-q', '--quiet', action='store_true',
850 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000851 self.add_option('--timeout', type='int', default=0,
852 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000853
854 def parse_args(self, args=None, values=None):
855 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000856 if options.quiet:
857 options.verbose = 0
858
859 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
860 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000861
862 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000863 global_cache_dir = Mirror.GetCachePath()
864 except RuntimeError:
865 global_cache_dir = None
866 if options.cache_dir:
867 if global_cache_dir and (
868 os.path.abspath(options.cache_dir) !=
869 os.path.abspath(global_cache_dir)):
870 logging.warn('Overriding globally-configured cache directory.')
871 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000872
agable@chromium.org5a306a22014-02-24 22:13:59 +0000873 return options, args
874
875
876def main(argv):
877 dispatcher = subcommand.CommandDispatcher(__name__)
878 return dispatcher.execute(OptionParser(), argv)
879
880
881if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000882 try:
883 sys.exit(main(sys.argv[1:]))
884 except KeyboardInterrupt:
885 sys.stderr.write('interrupted\n')
886 sys.exit(1)