blob: 4a2a23741196a7331d8ba0471e579e2de051ead5 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000016import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000017import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000018import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000019import subprocess
20import sys
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000248 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
249 if b:
250 return b
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 u = urlparse.urlparse(self.url)
252 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000253 return 'chromium-git-cache'
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000254 # TODO(tandrii): delete once LUCI migration is completed.
255 # Only public hosts will be supported going forward.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700256 elif u.netloc == 'chrome-internal.googlesource.com':
257 return 'chrome-git-cache'
258 # Not recognized.
259 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000260
szager@chromium.org174766f2014-05-13 21:27:46 +0000261 @classmethod
262 def FromPath(cls, path):
263 return cls(cls.CacheDirToUrl(path))
264
szager@chromium.org848fd492014-04-09 19:06:44 +0000265 @staticmethod
266 def UrlToCacheDir(url):
267 """Convert a git url to a normalized form for the cache dir path."""
268 parsed = urlparse.urlparse(url)
269 norm_url = parsed.netloc + parsed.path
270 if norm_url.endswith('.git'):
271 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000272
273 # Use the same dir for authenticated URLs and unauthenticated URLs.
274 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
275
szager@chromium.org848fd492014-04-09 19:06:44 +0000276 return norm_url.replace('-', '--').replace('/', '-').lower()
277
278 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000279 def CacheDirToUrl(path):
280 """Convert a cache dir path to its corresponding url."""
281 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
282 return 'https://%s' % netpath
283
szager@chromium.org848fd492014-04-09 19:06:44 +0000284 @classmethod
285 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000286 with cls.cachepath_lock:
287 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000288
289 @classmethod
290 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000291 with cls.cachepath_lock:
292 if not hasattr(cls, 'cachepath'):
293 try:
294 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000295 [cls.git_exe, 'config'] +
296 cls._GIT_CONFIG_LOCATION +
297 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000298 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000299 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000300 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000301
302 ret = getattr(cls, 'cachepath')
303 if ret is cls.UNSET_CACHEPATH:
304 raise RuntimeError('No cache.cachepath git configuration or '
305 '$GIT_CACHE_PATH is set.')
306 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000307
dnj4625b5a2016-11-10 18:23:26 -0800308 def Rename(self, src, dst):
309 # This is somehow racy on Windows.
310 # Catching OSError because WindowsError isn't portable and
311 # pylint complains.
312 exponential_backoff_retry(
313 lambda: os.rename(src, dst),
314 excs=(OSError,),
315 name='rename [%s] => [%s]' % (src, dst),
316 printerr=self.print)
317
szager@chromium.org848fd492014-04-09 19:06:44 +0000318 def RunGit(self, cmd, **kwargs):
319 """Run git in a subprocess."""
320 cwd = kwargs.setdefault('cwd', self.mirror_path)
321 kwargs.setdefault('print_stdout', False)
322 kwargs.setdefault('filter_fn', self.print)
323 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
324 env.setdefault('GIT_ASKPASS', 'true')
325 env.setdefault('SSH_ASKPASS', 'true')
326 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
327 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
328
Edward Lemur579c9862018-07-13 23:17:51 +0000329 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000330 if cwd is None:
331 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000332
Edward Lemur579c9862018-07-13 23:17:51 +0000333 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000334 try:
335 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
336 except subprocess.CalledProcessError as e:
337 # If exit code was 5, it means we attempted to unset a config that
338 # didn't exist. Ignore it.
339 if e.returncode != 5:
340 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000341
szager@chromium.org301a7c32014-06-16 17:13:50 +0000342 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700343 try:
344 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
345 except subprocess.CalledProcessError:
346 # Hard error, need to clobber.
347 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000348
349 # Don't combine pack files into one big pack file. It's really slow for
350 # repositories, and there's no way to track progress and make sure it's
351 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700352 if self.supported_project():
353 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000354
355 # Allocate more RAM for cache-ing delta chains, for better performance
356 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000357 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000358 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000359
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000360 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000361 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000362 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000363 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000364 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000365 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000366 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000367
368 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800369 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000370
371 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
372 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700373 if not self.bootstrap_bucket:
374 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25 +0000375 python_fallback = (
376 (sys.platform.startswith('win') and
377 not gclient_utils.FindExecutable('7z')) or
378 (not gclient_utils.FindExecutable('unzip')) or
379 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
380 )
szager@chromium.org848fd492014-04-09 19:06:44 +0000381
382 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000383 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000384 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800385 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000386
387 def compare_filenames(a, b):
388 # |a| and |b| look like gs://.../.../9999.zip. They both have the same
389 # gs://bootstrap_bucket/basedir/ prefix because they come from the same
390 # `gsutil ls`.
391 # This function only compares the numeral parts before .zip.
392 regex_pattern = r'/(\d+)\.zip$'
393 match_a = re.search(regex_pattern, a)
394 match_b = re.search(regex_pattern, b)
395 if (match_a is not None) and (match_b is not None):
396 num_a = int(match_a.group(1))
397 num_b = int(match_b.group(1))
398 return cmp(num_a, num_b)
399 # If it doesn't match the format, fallback to string comparison.
400 return cmp(a, b)
401
402 ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
szager@chromium.org848fd492014-04-09 19:06:44 +0000403 if not ls_out_sorted:
404 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800405 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
406 (self.mirror_path, self.bootstrap_bucket,
407 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000408 return False
409 latest_checkout = ls_out_sorted[-1]
410
411 # Download zip file to a temporary directory.
412 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000413 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000414 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800415 with self.print_duration_of('download'):
416 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000417 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000418 return False
419 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
420
hinoka@google.com776a2c32014-04-25 07:54:25 +0000421 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800422 with self.print_duration_of('unzip'):
423 if not python_fallback:
424 if sys.platform.startswith('win'):
425 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
426 else:
427 cmd = ['unzip', filename, '-d', directory]
428 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000429 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800430 try:
431 with zipfile.ZipFile(filename, 'r') as f:
432 f.printdir()
433 f.extractall(directory)
434 except Exception as e:
435 self.print('Encountered error: %s' % str(e), file=sys.stderr)
436 retcode = 1
437 else:
438 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000439 finally:
440 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800441 #
442 # This is somehow racy on Windows.
443 # Catching OSError because WindowsError isn't portable and
444 # pylint complains.
445 exponential_backoff_retry(
446 lambda: gclient_utils.rm_file_or_tree(tempdir),
447 excs=(OSError,),
448 name='rmtree [%s]' % (tempdir,),
449 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000450
451 if retcode:
452 self.print(
453 'Extracting bootstrap zipfile %s failed.\n'
454 'Resuming normal operations.' % filename)
455 return False
456 return True
457
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800458 def contains_revision(self, revision):
459 if not self.exists():
460 return False
461
462 if sys.platform.startswith('win'):
463 # Windows .bat scripts use ^ as escape sequence, which means we have to
464 # escape it with itself for every .bat invocation.
465 needle = '%s^^^^{commit}' % revision
466 else:
467 needle = '%s^{commit}' % revision
468 try:
469 # cat-file exits with 0 on success, that is git object of given hash was
470 # found.
471 self.RunGit(['cat-file', '-e', needle])
472 return True
473 except subprocess.CalledProcessError:
474 return False
475
szager@chromium.org848fd492014-04-09 19:06:44 +0000476 def exists(self):
477 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
478
Ryan Tseng3beabd02017-03-15 13:57:58 -0700479 def supported_project(self):
480 """Returns true if this repo is known to have a bootstrap zip file."""
481 u = urlparse.urlparse(self.url)
482 return u.netloc in [
483 'chromium.googlesource.com',
484 'chrome-internal.googlesource.com']
485
szager@chromium.org66c8b852015-09-22 23:19:07 +0000486 def _preserve_fetchspec(self):
487 """Read and preserve remote.origin.fetch from an existing mirror.
488
489 This modifies self.fetch_specs.
490 """
491 if not self.exists():
492 return
493 try:
494 config_fetchspecs = subprocess.check_output(
495 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
496 cwd=self.mirror_path)
497 for fetchspec in config_fetchspecs.splitlines():
498 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
499 except subprocess.CalledProcessError:
500 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
501 'existing cache directory. You may need to manually edit '
502 '%s and "git cache fetch" again.'
503 % os.path.join(self.mirror_path, 'config'))
504
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000505 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
506 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000507 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
508 pack_files = []
509
510 if os.path.isdir(pack_dir):
511 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800512 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
513 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000514
515 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000516 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000517 len(pack_files) > GC_AUTOPACKLIMIT)
518 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000519 if self.exists():
520 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
521 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000522 tempdir = tempfile.mkdtemp(
523 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
524 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
525 if bootstrapped:
526 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000527 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700528 elif not self.exists() or not self.supported_project():
529 # Bootstrap failed due to either
530 # 1. No previous cache
531 # 2. Project doesn't have a bootstrap zip file
532 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000533 self.RunGit(['init', '--bare'], cwd=tempdir)
534 else:
535 # Bootstrap failed, previous cache exists; warn and continue.
536 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800537 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
538 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000539 % len(pack_files))
540 gclient_utils.rmtree(tempdir)
541 tempdir = None
542 else:
543 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
544 logging.warn(
545 'Shallow fetch requested, but repo cache already exists.')
546 return tempdir
547
Edward Lemur579c9862018-07-13 23:17:51 +0000548 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
549 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000550 v = []
551 d = []
552 if verbose:
553 v = ['-v', '--progress']
554 if depth:
555 d = ['--depth', str(depth)]
556 fetch_cmd = ['fetch'] + v + d + ['origin']
557 fetch_specs = subprocess.check_output(
558 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
559 cwd=rundir).strip().splitlines()
560 for spec in fetch_specs:
561 try:
562 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800563 with self.print_duration_of('fetch %s' % spec):
564 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000565 except subprocess.CalledProcessError:
566 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700567 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000568 logging.warn('Fetch of %s failed' % spec)
569
Vadim Shtayura08049e22017-10-11 00:14:52 +0000570 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000571 verbose=False, ignore_lock=False, lock_timeout=0,
572 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000573 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000574 if shallow and not depth:
575 depth = 10000
576 gclient_utils.safe_makedirs(self.GetCachePath())
577
Vadim Shtayura08049e22017-10-11 00:14:52 +0000578 lockfile = Lockfile(self.mirror_path, lock_timeout)
579 if not ignore_lock:
580 lockfile.lock()
581
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000582 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000583 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000584 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000585 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000586 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700587 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000588 # This is a major failure, we need to clean and force a bootstrap.
589 gclient_utils.rmtree(rundir)
590 self.print(GIT_CACHE_CORRUPT_MESSAGE)
591 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
592 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000593 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000594 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000595 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800596 if os.path.exists(self.mirror_path):
597 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800598 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000599 if not ignore_lock:
600 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000601
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000602 def update_bootstrap(self, prune=False):
Andrii Shyshkalovfb86a012019-04-26 01:03:46 +0000603 # The files are named <git number>.zip
szager@chromium.org848fd492014-04-09 19:06:44 +0000604 gen_number = subprocess.check_output(
605 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000606 # Run Garbage Collect to compress packfile.
607 self.RunGit(['gc', '--prune=all'])
Andrii Shyshkalovfb86a012019-04-26 01:03:46 +0000608 # Creating a temp file and then deleting it ensures we can use this name.
609 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
610 os.remove(tmp_zipfile)
611 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
Karen Qian74913992019-04-26 00:31:34 +0000612 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
Andrii Shyshkalovfb86a012019-04-26 01:03:46 +0000613 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
614 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
615 gsutil.call('cp', tmp_zipfile, dest_name)
616 os.remove(tmp_zipfile)
Karen Qian74913992019-04-26 00:31:34 +0000617
Andrii Shyshkalovfb86a012019-04-26 01:03:46 +0000618 # Remove all other files in the same directory.
619 if prune:
620 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
621 for filename in ls_out.splitlines():
622 if filename == dest_name:
623 continue
624 gsutil.call('rm', filename)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000625
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000626 @staticmethod
627 def DeleteTmpPackFiles(path):
628 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000629 if not os.path.isdir(pack_dir):
630 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000631 pack_files = [f for f in os.listdir(pack_dir) if
632 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
633 for f in pack_files:
634 f = os.path.join(pack_dir, f)
635 try:
636 os.remove(f)
637 logging.warn('Deleted stale temporary pack file %s' % f)
638 except OSError:
639 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000640
Vadim Shtayura08049e22017-10-11 00:14:52 +0000641 @classmethod
642 def BreakLocks(cls, path):
643 did_unlock = False
644 lf = Lockfile(path)
645 if lf.break_lock():
646 did_unlock = True
647 # Look for lock files that might have been left behind by an interrupted
648 # git process.
649 lf = os.path.join(path, 'config.lock')
650 if os.path.exists(lf):
651 os.remove(lf)
652 did_unlock = True
653 cls.DeleteTmpPackFiles(path)
654 return did_unlock
655
656 def unlock(self):
657 return self.BreakLocks(self.mirror_path)
658
659 @classmethod
660 def UnlockAll(cls):
661 cachepath = cls.GetCachePath()
662 if not cachepath:
663 return
664 dirlist = os.listdir(cachepath)
665 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
666 if os.path.isdir(os.path.join(cachepath, path))])
667 for dirent in dirlist:
668 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
669 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
670 elif (dirent.endswith('.lock') and
671 os.path.isfile(os.path.join(cachepath, dirent))):
672 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
673
674 unlocked_repos = []
675 for repo_dir in repo_dirs:
676 if cls.BreakLocks(repo_dir):
677 unlocked_repos.append(repo_dir)
678
679 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000680
agable@chromium.org5a306a22014-02-24 22:13:59 +0000681@subcommand.usage('[url of repo to check for caching]')
682def CMDexists(parser, args):
683 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000684 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000685 if not len(args) == 1:
686 parser.error('git cache exists only takes exactly one repo url.')
687 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000688 mirror = Mirror(url)
689 if mirror.exists():
690 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000691 return 0
692 return 1
693
694
hinoka@google.com563559c2014-04-02 00:36:24 +0000695@subcommand.usage('[url of repo to create a bootstrap zip file]')
696def CMDupdate_bootstrap(parser, args):
697 """Create and uploads a bootstrap tarball."""
698 # Lets just assert we can't do this on Windows.
699 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000700 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000701 return 1
702
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000703 parser.add_option('--prune', action='store_true',
704 help='Prune all other cached zipballs of the same repo.')
705
hinoka@google.com563559c2014-04-02 00:36:24 +0000706 # First, we need to ensure the cache is populated.
707 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000708 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000709 CMDpopulate(parser, populate_args)
710
711 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000712 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000713 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000714 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000715 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000716 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000717
718
agable@chromium.org5a306a22014-02-24 22:13:59 +0000719@subcommand.usage('[url of repo to add to or update in cache]')
720def CMDpopulate(parser, args):
721 """Ensure that the cache has all up-to-date objects for the given repo."""
722 parser.add_option('--depth', type='int',
723 help='Only cache DEPTH commits of history')
724 parser.add_option('--shallow', '-s', action='store_true',
725 help='Only cache 10000 commits of history')
726 parser.add_option('--ref', action='append',
727 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000728 parser.add_option('--no_bootstrap', '--no-bootstrap',
729 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000730 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000731 parser.add_option('--ignore_locks', '--ignore-locks',
732 action='store_true',
733 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000734 parser.add_option('--reset-fetch-config', action='store_true', default=False,
735 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000736
agable@chromium.org5a306a22014-02-24 22:13:59 +0000737 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000738 if not len(args) == 1:
739 parser.error('git cache populate only takes exactly one repo url.')
740 url = args[0]
741
szager@chromium.org848fd492014-04-09 19:06:44 +0000742 mirror = Mirror(url, refs=options.ref)
743 kwargs = {
744 'verbose': options.verbose,
745 'shallow': options.shallow,
746 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000747 'ignore_lock': options.ignore_locks,
748 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000749 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000750 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000751 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000752 kwargs['depth'] = options.depth
753 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000754
755
szager@chromium.orgf3145112014-08-07 21:02:36 +0000756@subcommand.usage('Fetch new commits into cache and current checkout')
757def CMDfetch(parser, args):
758 """Update mirror, and fetch in cwd."""
759 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000760 parser.add_option('--no_bootstrap', '--no-bootstrap',
761 action='store_true',
762 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000763 options, args = parser.parse_args(args)
764
765 # Figure out which remotes to fetch. This mimics the behavior of regular
766 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
767 # this will NOT try to traverse up the branching structure to find the
768 # ultimate remote to update.
769 remotes = []
770 if options.all:
771 assert not args, 'fatal: fetch --all does not take a repository argument'
772 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
773 elif args:
774 remotes = args
775 else:
776 current_branch = subprocess.check_output(
777 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
778 if current_branch != 'HEAD':
779 upstream = subprocess.check_output(
780 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
781 ).strip()
782 if upstream and upstream != '.':
783 remotes = [upstream]
784 if not remotes:
785 remotes = ['origin']
786
787 cachepath = Mirror.GetCachePath()
788 git_dir = os.path.abspath(subprocess.check_output(
789 [Mirror.git_exe, 'rev-parse', '--git-dir']))
790 git_dir = os.path.abspath(git_dir)
791 if git_dir.startswith(cachepath):
792 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000793 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000794 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000795 return 0
796 for remote in remotes:
797 remote_url = subprocess.check_output(
798 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
799 if remote_url.startswith(cachepath):
800 mirror = Mirror.FromPath(remote_url)
801 mirror.print = lambda *args: None
802 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000803 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000804 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000805 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
806 return 0
807
808
Vadim Shtayura08049e22017-10-11 00:14:52 +0000809@subcommand.usage('[url of repo to unlock, or -a|--all]')
810def CMDunlock(parser, args):
811 """Unlock one or all repos if their lock files are still around."""
812 parser.add_option('--force', '-f', action='store_true',
813 help='Actually perform the action')
814 parser.add_option('--all', '-a', action='store_true',
815 help='Unlock all repository caches')
816 options, args = parser.parse_args(args)
817 if len(args) > 1 or (len(args) == 0 and not options.all):
818 parser.error('git cache unlock takes exactly one repo url, or --all')
819
820 if not options.force:
821 cachepath = Mirror.GetCachePath()
822 lockfiles = [os.path.join(cachepath, path)
823 for path in os.listdir(cachepath)
824 if path.endswith('.lock') and os.path.isfile(path)]
825 parser.error('git cache unlock requires -f|--force to do anything. '
826 'Refusing to unlock the following repo caches: '
827 ', '.join(lockfiles))
828
829 unlocked_repos = []
830 if options.all:
831 unlocked_repos.extend(Mirror.UnlockAll())
832 else:
833 m = Mirror(args[0])
834 if m.unlock():
835 unlocked_repos.append(m.mirror_path)
836
837 if unlocked_repos:
838 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
839 unlocked_repos))
840
841
agable@chromium.org5a306a22014-02-24 22:13:59 +0000842class OptionParser(optparse.OptionParser):
843 """Wrapper class for OptionParser to handle global options."""
844
845 def __init__(self, *args, **kwargs):
846 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
847 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000848 help=(
849 'Path to the directory containing the caches. Normally '
850 'deduced from git config cache.cachepath or '
851 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000852 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000853 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000854 self.add_option('-q', '--quiet', action='store_true',
855 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000856 self.add_option('--timeout', type='int', default=0,
857 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000858
859 def parse_args(self, args=None, values=None):
860 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000861 if options.quiet:
862 options.verbose = 0
863
864 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
865 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000866
867 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000868 global_cache_dir = Mirror.GetCachePath()
869 except RuntimeError:
870 global_cache_dir = None
871 if options.cache_dir:
872 if global_cache_dir and (
873 os.path.abspath(options.cache_dir) !=
874 os.path.abspath(global_cache_dir)):
875 logging.warn('Overriding globally-configured cache directory.')
876 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000877
agable@chromium.org5a306a22014-02-24 22:13:59 +0000878 return options, args
879
880
881def main(argv):
882 dispatcher = subcommand.CommandDispatcher(__name__)
883 return dispatcher.execute(OptionParser(), argv)
884
885
886if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000887 try:
888 sys.exit(main(sys.argv[1:]))
889 except KeyboardInterrupt:
890 sys.stderr.write('interrupted\n')
891 sys.exit(1)