blob: c1166303e5183c206c561d4f1b1996576528155a [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
John Budorick47ec0692019-05-01 15:04:28 +000016import subprocess
17import sys
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000019import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000020import time
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000248 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
249 if b:
250 return b
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 u = urlparse.urlparse(self.url)
252 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000253 return 'chromium-git-cache'
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000254 # TODO(tandrii): delete once LUCI migration is completed.
255 # Only public hosts will be supported going forward.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700256 elif u.netloc == 'chrome-internal.googlesource.com':
257 return 'chrome-git-cache'
258 # Not recognized.
259 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000260
Karen Qiandcad7492019-04-26 03:11:16 +0000261 @property
262 def _gs_path(self):
263 return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
264
szager@chromium.org174766f2014-05-13 21:27:46 +0000265 @classmethod
266 def FromPath(cls, path):
267 return cls(cls.CacheDirToUrl(path))
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @staticmethod
270 def UrlToCacheDir(url):
271 """Convert a git url to a normalized form for the cache dir path."""
272 parsed = urlparse.urlparse(url)
273 norm_url = parsed.netloc + parsed.path
274 if norm_url.endswith('.git'):
275 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000276
277 # Use the same dir for authenticated URLs and unauthenticated URLs.
278 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
279
szager@chromium.org848fd492014-04-09 19:06:44 +0000280 return norm_url.replace('-', '--').replace('/', '-').lower()
281
282 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000283 def CacheDirToUrl(path):
284 """Convert a cache dir path to its corresponding url."""
285 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
286 return 'https://%s' % netpath
287
szager@chromium.org848fd492014-04-09 19:06:44 +0000288 @classmethod
289 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000290 with cls.cachepath_lock:
291 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
293 @classmethod
294 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000295 with cls.cachepath_lock:
296 if not hasattr(cls, 'cachepath'):
297 try:
298 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000299 [cls.git_exe, 'config'] +
300 cls._GIT_CONFIG_LOCATION +
301 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000302 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000303 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000304 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000305
306 ret = getattr(cls, 'cachepath')
307 if ret is cls.UNSET_CACHEPATH:
308 raise RuntimeError('No cache.cachepath git configuration or '
309 '$GIT_CACHE_PATH is set.')
310 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000311
Karen Qianccd2b4d2019-05-03 22:25:59 +0000312 @staticmethod
313 def _GetMostRecentCacheDirectory(ls_out_set):
314 ready_file_pattern = re.compile(r'.*/(\d+).ready$')
315 ready_dirs = []
316
317 for name in ls_out_set:
318 m = ready_file_pattern.match(name)
319 # Given <path>/<number>.ready,
320 # we are interested in <path>/<number> directory
321 if m and (name[:-len('.ready')] + '/') in ls_out_set:
322 ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
323
324 if not ready_dirs:
325 return None
326
327 return max(ready_dirs)[1]
328
dnj4625b5a2016-11-10 18:23:26 -0800329 def Rename(self, src, dst):
330 # This is somehow racy on Windows.
331 # Catching OSError because WindowsError isn't portable and
332 # pylint complains.
333 exponential_backoff_retry(
334 lambda: os.rename(src, dst),
335 excs=(OSError,),
336 name='rename [%s] => [%s]' % (src, dst),
337 printerr=self.print)
338
szager@chromium.org848fd492014-04-09 19:06:44 +0000339 def RunGit(self, cmd, **kwargs):
340 """Run git in a subprocess."""
341 cwd = kwargs.setdefault('cwd', self.mirror_path)
342 kwargs.setdefault('print_stdout', False)
343 kwargs.setdefault('filter_fn', self.print)
344 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
345 env.setdefault('GIT_ASKPASS', 'true')
346 env.setdefault('SSH_ASKPASS', 'true')
347 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
348 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
349
Edward Lemur579c9862018-07-13 23:17:51 +0000350 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000351 if cwd is None:
352 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000353
Edward Lemur579c9862018-07-13 23:17:51 +0000354 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000355 try:
356 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
357 except subprocess.CalledProcessError as e:
358 # If exit code was 5, it means we attempted to unset a config that
359 # didn't exist. Ignore it.
360 if e.returncode != 5:
361 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000362
szager@chromium.org301a7c32014-06-16 17:13:50 +0000363 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700364 try:
365 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
366 except subprocess.CalledProcessError:
367 # Hard error, need to clobber.
368 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000369
370 # Don't combine pack files into one big pack file. It's really slow for
371 # repositories, and there's no way to track progress and make sure it's
372 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700373 if self.supported_project():
374 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000375
376 # Allocate more RAM for cache-ing delta chains, for better performance
377 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000378 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000379 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000380
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000381 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000382 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000383 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000384 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000385 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000386 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000387 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000388
389 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800390 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000391
392 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
393 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700394 if not self.bootstrap_bucket:
395 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000396
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000397 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000398
Karen Qian0cbd5a52019-04-29 20:14:50 +0000399 # Get the most recent version of the directory.
400 # This is determined from the most recent version of a .ready file.
401 # The .ready file is only uploaded when an entire directory has been
402 # uploaded to GS.
403 _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
Karen Qianccd2b4d2019-05-03 22:25:59 +0000404 ls_out_set = set(ls_out.strip().splitlines())
405 latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000406
Karen Qianccd2b4d2019-05-03 22:25:59 +0000407 if not latest_dir:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800408 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
409 (self.mirror_path, self.bootstrap_bucket,
Karen Qian0cbd5a52019-04-29 20:14:50 +0000410 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000411 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000412
szager@chromium.org848fd492014-04-09 19:06:44 +0000413 try:
Karen Qian0cbd5a52019-04-29 20:14:50 +0000414 # create new temporary directory locally
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000415 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
Karen Qian0cbd5a52019-04-29 20:14:50 +0000416 self.RunGit(['init', '--bare'], cwd=tempdir)
417 self.print('Downloading files in %s/* into %s.' %
418 (latest_dir, tempdir))
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800419 with self.print_duration_of('download'):
Karen Qian0cbd5a52019-04-29 20:14:50 +0000420 code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
421 tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000422 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000423 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000424 except Exception as e:
425 self.print('Encountered error: %s' % str(e), file=sys.stderr)
426 gclient_utils.rmtree(tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000427 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000428 # delete the old directory
429 if os.path.exists(directory):
430 gclient_utils.rmtree(directory)
431 self.Rename(tempdir, directory)
szager@chromium.org848fd492014-04-09 19:06:44 +0000432 return True
433
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800434 def contains_revision(self, revision):
435 if not self.exists():
436 return False
437
438 if sys.platform.startswith('win'):
439 # Windows .bat scripts use ^ as escape sequence, which means we have to
440 # escape it with itself for every .bat invocation.
441 needle = '%s^^^^{commit}' % revision
442 else:
443 needle = '%s^{commit}' % revision
444 try:
445 # cat-file exits with 0 on success, that is git object of given hash was
446 # found.
447 self.RunGit(['cat-file', '-e', needle])
448 return True
449 except subprocess.CalledProcessError:
450 return False
451
szager@chromium.org848fd492014-04-09 19:06:44 +0000452 def exists(self):
453 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
454
Ryan Tseng3beabd02017-03-15 13:57:58 -0700455 def supported_project(self):
456 """Returns true if this repo is known to have a bootstrap zip file."""
457 u = urlparse.urlparse(self.url)
458 return u.netloc in [
459 'chromium.googlesource.com',
460 'chrome-internal.googlesource.com']
461
szager@chromium.org66c8b852015-09-22 23:19:07 +0000462 def _preserve_fetchspec(self):
463 """Read and preserve remote.origin.fetch from an existing mirror.
464
465 This modifies self.fetch_specs.
466 """
467 if not self.exists():
468 return
469 try:
470 config_fetchspecs = subprocess.check_output(
471 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
472 cwd=self.mirror_path)
473 for fetchspec in config_fetchspecs.splitlines():
474 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
475 except subprocess.CalledProcessError:
476 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
477 'existing cache directory. You may need to manually edit '
478 '%s and "git cache fetch" again.'
479 % os.path.join(self.mirror_path, 'config'))
480
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000481 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000482 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
483 pack_files = []
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000484 if os.path.isdir(pack_dir):
485 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800486 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
Karen Qian0cbd5a52019-04-29 20:14:50 +0000487 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000488
489 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000490 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000491 len(pack_files) > GC_AUTOPACKLIMIT)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000492
493 if not should_bootstrap:
494 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
495 logging.warn(
496 'Shallow fetch requested, but repo cache already exists.')
497 return
498
499 if self.exists():
500 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
501 self._preserve_fetchspec()
502 else:
John Budorick47ec0692019-05-01 15:04:28 +0000503 if os.path.exists(self.mirror_path):
504 # If the mirror path exists but self.exists() returns false, we're
505 # in an unexpected state. Nuke the previous mirror directory and
506 # start fresh.
507 gclient_utils.rmtree(self.mirror_path)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000508 os.mkdir(self.mirror_path)
509
510 bootstrapped = (not depth and bootstrap and
511 self.bootstrap_repo(self.mirror_path))
512
513 if not bootstrapped:
514 if not self.exists() or not self.supported_project():
515 # Bootstrap failed due to:
516 # 1. No previous cache.
517 # 2. Project doesn't have a bootstrap folder.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700518 # Start with a bare git dir.
Karen Qian0cbd5a52019-04-29 20:14:50 +0000519 self.RunGit(['init', '--bare'], cwd=self.mirror_path)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000520 else:
521 # Bootstrap failed, previous cache exists; warn and continue.
522 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800523 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
524 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000525 % len(pack_files))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000526
Edward Lemur579c9862018-07-13 23:17:51 +0000527 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
528 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000529 v = []
530 d = []
531 if verbose:
532 v = ['-v', '--progress']
533 if depth:
534 d = ['--depth', str(depth)]
535 fetch_cmd = ['fetch'] + v + d + ['origin']
536 fetch_specs = subprocess.check_output(
537 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
538 cwd=rundir).strip().splitlines()
539 for spec in fetch_specs:
540 try:
541 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800542 with self.print_duration_of('fetch %s' % spec):
543 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000544 except subprocess.CalledProcessError:
545 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700546 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000547 logging.warn('Fetch of %s failed' % spec)
548
Vadim Shtayura08049e22017-10-11 00:14:52 +0000549 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000550 verbose=False, ignore_lock=False, lock_timeout=0,
551 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000552 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000553 if shallow and not depth:
554 depth = 10000
555 gclient_utils.safe_makedirs(self.GetCachePath())
556
Vadim Shtayura08049e22017-10-11 00:14:52 +0000557 lockfile = Lockfile(self.mirror_path, lock_timeout)
558 if not ignore_lock:
559 lockfile.lock()
560
szager@chromium.org108eced2014-06-19 21:22:43 +0000561 try:
Karen Qian0cbd5a52019-04-29 20:14:50 +0000562 self._ensure_bootstrapped(depth, bootstrap)
563 self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700564 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000565 # This is a major failure, we need to clean and force a bootstrap.
Karen Qian0cbd5a52019-04-29 20:14:50 +0000566 gclient_utils.rmtree(self.mirror_path)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000567 self.print(GIT_CACHE_CORRUPT_MESSAGE)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000568 self._ensure_bootstrapped(depth, bootstrap, force=True)
569 self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000570 finally:
Vadim Shtayura08049e22017-10-11 00:14:52 +0000571 if not ignore_lock:
572 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000573
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000574 def update_bootstrap(self, prune=False):
Karen Qiandcad7492019-04-26 03:11:16 +0000575 # The folder is <git number>
szager@chromium.org848fd492014-04-09 19:06:44 +0000576 gen_number = subprocess.check_output(
577 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
Karen Qiandcad7492019-04-26 03:11:16 +0000578 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
579
580 src_name = self.mirror_path
Karen Qianccd2b4d2019-05-03 22:25:59 +0000581 dest_prefix = '%s/%s' % (self._gs_path, gen_number)
Karen Qiandcad7492019-04-26 03:11:16 +0000582
Karen Qianccd2b4d2019-05-03 22:25:59 +0000583 # ls_out lists contents in the format: gs://blah/blah/123...
584 _, ls_out, _ = gsutil.check_call('ls', self._gs_path)
Karen Qiandcad7492019-04-26 03:11:16 +0000585
Karen Qianccd2b4d2019-05-03 22:25:59 +0000586 # Check to see if folder already exists in gs
587 ls_out_set = set(ls_out.strip().splitlines())
588 if (dest_prefix + '/' in ls_out_set and
589 dest_prefix + '.ready' in ls_out_set):
590 print('Cache %s already exists.' % dest_prefix)
Karen Qiandcad7492019-04-26 03:11:16 +0000591 return
592
Andrii Shyshkalov199182f2019-04-26 16:01:20 +0000593 # Run Garbage Collect to compress packfile.
594 self.RunGit(['gc', '--prune=all'])
595
Karen Qianccd2b4d2019-05-03 22:25:59 +0000596 gsutil.call('-m', 'cp', '-r', src_name, dest_prefix)
Karen Qiandcad7492019-04-26 03:11:16 +0000597
Karen Qianccd2b4d2019-05-03 22:25:59 +0000598 # Create .ready file and upload
Karen Qiandcad7492019-04-26 03:11:16 +0000599 _, ready_file_name = tempfile.mkstemp(suffix='.ready')
600 try:
Karen Qianccd2b4d2019-05-03 22:25:59 +0000601 gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
Karen Qiandcad7492019-04-26 03:11:16 +0000602 finally:
603 os.remove(ready_file_name)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000604
Karen Qianccd2b4d2019-05-03 22:25:59 +0000605 # remove all other directory/.ready files in the same gs_path
606 # except for the directory/.ready file previously created
607 # which can be used for bootstrapping while the current one is
608 # being uploaded
609 if not prune:
610 return
611 prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
612 if not prev_dest_prefix:
613 return
614 for path in ls_out_set:
615 if (path == prev_dest_prefix + '/' or
616 path == prev_dest_prefix + '.ready'):
617 continue
618 if path.endswith('.ready'):
619 gsutil.call('rm', path)
620 continue
621 gsutil.call('-m', 'rm', '-r', path)
622
623
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000624 @staticmethod
625 def DeleteTmpPackFiles(path):
626 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000627 if not os.path.isdir(pack_dir):
628 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000629 pack_files = [f for f in os.listdir(pack_dir) if
630 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
631 for f in pack_files:
632 f = os.path.join(pack_dir, f)
633 try:
634 os.remove(f)
635 logging.warn('Deleted stale temporary pack file %s' % f)
636 except OSError:
637 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000638
Vadim Shtayura08049e22017-10-11 00:14:52 +0000639 @classmethod
640 def BreakLocks(cls, path):
641 did_unlock = False
642 lf = Lockfile(path)
643 if lf.break_lock():
644 did_unlock = True
645 # Look for lock files that might have been left behind by an interrupted
646 # git process.
647 lf = os.path.join(path, 'config.lock')
648 if os.path.exists(lf):
649 os.remove(lf)
650 did_unlock = True
651 cls.DeleteTmpPackFiles(path)
652 return did_unlock
653
654 def unlock(self):
655 return self.BreakLocks(self.mirror_path)
656
657 @classmethod
658 def UnlockAll(cls):
659 cachepath = cls.GetCachePath()
660 if not cachepath:
661 return
662 dirlist = os.listdir(cachepath)
663 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
664 if os.path.isdir(os.path.join(cachepath, path))])
665 for dirent in dirlist:
666 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
667 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
668 elif (dirent.endswith('.lock') and
669 os.path.isfile(os.path.join(cachepath, dirent))):
670 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
671
672 unlocked_repos = []
673 for repo_dir in repo_dirs:
674 if cls.BreakLocks(repo_dir):
675 unlocked_repos.append(repo_dir)
676
677 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000678
agable@chromium.org5a306a22014-02-24 22:13:59 +0000679@subcommand.usage('[url of repo to check for caching]')
680def CMDexists(parser, args):
681 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000682 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000683 if not len(args) == 1:
684 parser.error('git cache exists only takes exactly one repo url.')
685 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000686 mirror = Mirror(url)
687 if mirror.exists():
688 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000689 return 0
690 return 1
691
692
hinoka@google.com563559c2014-04-02 00:36:24 +0000693@subcommand.usage('[url of repo to create a bootstrap zip file]')
694def CMDupdate_bootstrap(parser, args):
695 """Create and uploads a bootstrap tarball."""
696 # Lets just assert we can't do this on Windows.
697 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000698 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000699 return 1
700
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000701 parser.add_option('--prune', action='store_true',
Andrii Shyshkalov7a2205c2019-04-26 05:14:36 +0000702 help='Prune all other cached bundles of the same repo.')
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000703
hinoka@google.com563559c2014-04-02 00:36:24 +0000704 # First, we need to ensure the cache is populated.
705 populate_args = args[:]
hinoka@google.com563559c2014-04-02 00:36:24 +0000706 CMDpopulate(parser, populate_args)
707
708 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000709 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000710 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000711 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000712 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000713 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000714
715
agable@chromium.org5a306a22014-02-24 22:13:59 +0000716@subcommand.usage('[url of repo to add to or update in cache]')
717def CMDpopulate(parser, args):
718 """Ensure that the cache has all up-to-date objects for the given repo."""
719 parser.add_option('--depth', type='int',
720 help='Only cache DEPTH commits of history')
721 parser.add_option('--shallow', '-s', action='store_true',
722 help='Only cache 10000 commits of history')
723 parser.add_option('--ref', action='append',
724 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000725 parser.add_option('--no_bootstrap', '--no-bootstrap',
726 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000727 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000728 parser.add_option('--ignore_locks', '--ignore-locks',
729 action='store_true',
730 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000731 parser.add_option('--reset-fetch-config', action='store_true', default=False,
732 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000733
agable@chromium.org5a306a22014-02-24 22:13:59 +0000734 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000735 if not len(args) == 1:
736 parser.error('git cache populate only takes exactly one repo url.')
737 url = args[0]
738
szager@chromium.org848fd492014-04-09 19:06:44 +0000739 mirror = Mirror(url, refs=options.ref)
740 kwargs = {
741 'verbose': options.verbose,
742 'shallow': options.shallow,
743 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000744 'ignore_lock': options.ignore_locks,
745 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000746 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000747 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000748 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000749 kwargs['depth'] = options.depth
750 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000751
752
szager@chromium.orgf3145112014-08-07 21:02:36 +0000753@subcommand.usage('Fetch new commits into cache and current checkout')
754def CMDfetch(parser, args):
755 """Update mirror, and fetch in cwd."""
756 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000757 parser.add_option('--no_bootstrap', '--no-bootstrap',
758 action='store_true',
759 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000760 options, args = parser.parse_args(args)
761
762 # Figure out which remotes to fetch. This mimics the behavior of regular
763 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
764 # this will NOT try to traverse up the branching structure to find the
765 # ultimate remote to update.
766 remotes = []
767 if options.all:
768 assert not args, 'fatal: fetch --all does not take a repository argument'
769 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
770 elif args:
771 remotes = args
772 else:
773 current_branch = subprocess.check_output(
774 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
775 if current_branch != 'HEAD':
776 upstream = subprocess.check_output(
777 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
778 ).strip()
779 if upstream and upstream != '.':
780 remotes = [upstream]
781 if not remotes:
782 remotes = ['origin']
783
784 cachepath = Mirror.GetCachePath()
785 git_dir = os.path.abspath(subprocess.check_output(
786 [Mirror.git_exe, 'rev-parse', '--git-dir']))
787 git_dir = os.path.abspath(git_dir)
788 if git_dir.startswith(cachepath):
789 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000790 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000791 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000792 return 0
793 for remote in remotes:
794 remote_url = subprocess.check_output(
795 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
796 if remote_url.startswith(cachepath):
797 mirror = Mirror.FromPath(remote_url)
798 mirror.print = lambda *args: None
799 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000800 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000801 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000802 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
803 return 0
804
805
Vadim Shtayura08049e22017-10-11 00:14:52 +0000806@subcommand.usage('[url of repo to unlock, or -a|--all]')
807def CMDunlock(parser, args):
808 """Unlock one or all repos if their lock files are still around."""
809 parser.add_option('--force', '-f', action='store_true',
810 help='Actually perform the action')
811 parser.add_option('--all', '-a', action='store_true',
812 help='Unlock all repository caches')
813 options, args = parser.parse_args(args)
814 if len(args) > 1 or (len(args) == 0 and not options.all):
815 parser.error('git cache unlock takes exactly one repo url, or --all')
816
817 if not options.force:
818 cachepath = Mirror.GetCachePath()
819 lockfiles = [os.path.join(cachepath, path)
820 for path in os.listdir(cachepath)
821 if path.endswith('.lock') and os.path.isfile(path)]
822 parser.error('git cache unlock requires -f|--force to do anything. '
823 'Refusing to unlock the following repo caches: '
824 ', '.join(lockfiles))
825
826 unlocked_repos = []
827 if options.all:
828 unlocked_repos.extend(Mirror.UnlockAll())
829 else:
830 m = Mirror(args[0])
831 if m.unlock():
832 unlocked_repos.append(m.mirror_path)
833
834 if unlocked_repos:
835 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
836 unlocked_repos))
837
838
agable@chromium.org5a306a22014-02-24 22:13:59 +0000839class OptionParser(optparse.OptionParser):
840 """Wrapper class for OptionParser to handle global options."""
841
842 def __init__(self, *args, **kwargs):
843 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
844 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000845 help=(
846 'Path to the directory containing the caches. Normally '
847 'deduced from git config cache.cachepath or '
848 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000849 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000850 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000851 self.add_option('-q', '--quiet', action='store_true',
852 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000853 self.add_option('--timeout', type='int', default=0,
854 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000855
856 def parse_args(self, args=None, values=None):
857 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000858 if options.quiet:
859 options.verbose = 0
860
861 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
862 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000863
864 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000865 global_cache_dir = Mirror.GetCachePath()
866 except RuntimeError:
867 global_cache_dir = None
868 if options.cache_dir:
869 if global_cache_dir and (
870 os.path.abspath(options.cache_dir) !=
871 os.path.abspath(global_cache_dir)):
872 logging.warn('Overriding globally-configured cache directory.')
873 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000874
agable@chromium.org5a306a22014-02-24 22:13:59 +0000875 return options, args
876
877
878def main(argv):
879 dispatcher = subcommand.CommandDispatcher(__name__)
880 return dispatcher.execute(OptionParser(), argv)
881
882
883if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000884 try:
885 sys.exit(main(sys.argv[1:]))
886 except KeyboardInterrupt:
887 sys.stderr.write('interrupted\n')
Karen Qianccd2b4d2019-05-03 22:25:59 +0000888 sys.exit(1)