blob: 44e59a39d6e2e9ed7b70765dc79976af9c016ebe [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
John Budorick47ec0692019-05-01 15:04:28 +000016import subprocess
17import sys
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000019import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000020import time
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000248 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
249 if b:
250 return b
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 u = urlparse.urlparse(self.url)
252 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000253 return 'chromium-git-cache'
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000254 # TODO(tandrii): delete once LUCI migration is completed.
255 # Only public hosts will be supported going forward.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700256 elif u.netloc == 'chrome-internal.googlesource.com':
257 return 'chrome-git-cache'
258 # Not recognized.
259 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000260
Karen Qiandcad7492019-04-26 03:11:16 +0000261 @property
262 def _gs_path(self):
263 return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
264
szager@chromium.org174766f2014-05-13 21:27:46 +0000265 @classmethod
266 def FromPath(cls, path):
267 return cls(cls.CacheDirToUrl(path))
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @staticmethod
270 def UrlToCacheDir(url):
271 """Convert a git url to a normalized form for the cache dir path."""
272 parsed = urlparse.urlparse(url)
273 norm_url = parsed.netloc + parsed.path
274 if norm_url.endswith('.git'):
275 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000276
277 # Use the same dir for authenticated URLs and unauthenticated URLs.
278 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
279
szager@chromium.org848fd492014-04-09 19:06:44 +0000280 return norm_url.replace('-', '--').replace('/', '-').lower()
281
282 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000283 def CacheDirToUrl(path):
284 """Convert a cache dir path to its corresponding url."""
285 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
286 return 'https://%s' % netpath
287
szager@chromium.org848fd492014-04-09 19:06:44 +0000288 @classmethod
289 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000290 with cls.cachepath_lock:
291 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
293 @classmethod
294 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000295 with cls.cachepath_lock:
296 if not hasattr(cls, 'cachepath'):
297 try:
298 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000299 [cls.git_exe, 'config'] +
300 cls._GIT_CONFIG_LOCATION +
301 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000302 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000303 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000304 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000305
306 ret = getattr(cls, 'cachepath')
307 if ret is cls.UNSET_CACHEPATH:
308 raise RuntimeError('No cache.cachepath git configuration or '
309 '$GIT_CACHE_PATH is set.')
310 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000311
dnj4625b5a2016-11-10 18:23:26 -0800312 def Rename(self, src, dst):
313 # This is somehow racy on Windows.
314 # Catching OSError because WindowsError isn't portable and
315 # pylint complains.
316 exponential_backoff_retry(
317 lambda: os.rename(src, dst),
318 excs=(OSError,),
319 name='rename [%s] => [%s]' % (src, dst),
320 printerr=self.print)
321
szager@chromium.org848fd492014-04-09 19:06:44 +0000322 def RunGit(self, cmd, **kwargs):
323 """Run git in a subprocess."""
324 cwd = kwargs.setdefault('cwd', self.mirror_path)
325 kwargs.setdefault('print_stdout', False)
326 kwargs.setdefault('filter_fn', self.print)
327 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
328 env.setdefault('GIT_ASKPASS', 'true')
329 env.setdefault('SSH_ASKPASS', 'true')
330 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
331 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
332
Edward Lemur579c9862018-07-13 23:17:51 +0000333 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000334 if cwd is None:
335 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000336
Edward Lemur579c9862018-07-13 23:17:51 +0000337 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000338 try:
339 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
340 except subprocess.CalledProcessError as e:
341 # If exit code was 5, it means we attempted to unset a config that
342 # didn't exist. Ignore it.
343 if e.returncode != 5:
344 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000345
szager@chromium.org301a7c32014-06-16 17:13:50 +0000346 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700347 try:
348 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
349 except subprocess.CalledProcessError:
350 # Hard error, need to clobber.
351 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000352
353 # Don't combine pack files into one big pack file. It's really slow for
354 # repositories, and there's no way to track progress and make sure it's
355 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700356 if self.supported_project():
357 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000358
359 # Allocate more RAM for cache-ing delta chains, for better performance
360 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000361 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000362 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000363
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000364 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000365 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000366 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000367 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000368 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000369 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000370 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000371
372 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800373 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000374
375 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
376 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700377 if not self.bootstrap_bucket:
378 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000379
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000380 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000381
Karen Qian0cbd5a52019-04-29 20:14:50 +0000382 # Get the most recent version of the directory.
383 # This is determined from the most recent version of a .ready file.
384 # The .ready file is only uploaded when an entire directory has been
385 # uploaded to GS.
386 _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000387
Karen Qian0cbd5a52019-04-29 20:14:50 +0000388 ready_file_pattern = re.compile(r'.*/(\d+).ready$')
389
390 objects = set(ls_out.strip().splitlines())
391 ready_dirs = []
392
393 for name in objects:
394 m = ready_file_pattern.match(name)
395 # Given <path>/<number>.ready,
396 # we are interested in <path>/<number> directory
397
398 if m and (name[:-len('.ready')] + '/') in objects:
399 ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
400
401 if not ready_dirs:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800402 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
403 (self.mirror_path, self.bootstrap_bucket,
Karen Qian0cbd5a52019-04-29 20:14:50 +0000404 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000405 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000406 latest_dir = max(ready_dirs)[1]
szager@chromium.org848fd492014-04-09 19:06:44 +0000407
szager@chromium.org848fd492014-04-09 19:06:44 +0000408 try:
Karen Qian0cbd5a52019-04-29 20:14:50 +0000409 # create new temporary directory locally
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000410 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
Karen Qian0cbd5a52019-04-29 20:14:50 +0000411 self.RunGit(['init', '--bare'], cwd=tempdir)
412 self.print('Downloading files in %s/* into %s.' %
413 (latest_dir, tempdir))
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800414 with self.print_duration_of('download'):
Karen Qian0cbd5a52019-04-29 20:14:50 +0000415 code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
416 tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000417 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000418 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000419 except Exception as e:
420 self.print('Encountered error: %s' % str(e), file=sys.stderr)
421 gclient_utils.rmtree(tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000422 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000423 # delete the old directory
424 if os.path.exists(directory):
425 gclient_utils.rmtree(directory)
426 self.Rename(tempdir, directory)
szager@chromium.org848fd492014-04-09 19:06:44 +0000427 return True
428
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800429 def contains_revision(self, revision):
430 if not self.exists():
431 return False
432
433 if sys.platform.startswith('win'):
434 # Windows .bat scripts use ^ as escape sequence, which means we have to
435 # escape it with itself for every .bat invocation.
436 needle = '%s^^^^{commit}' % revision
437 else:
438 needle = '%s^{commit}' % revision
439 try:
440 # cat-file exits with 0 on success, that is git object of given hash was
441 # found.
442 self.RunGit(['cat-file', '-e', needle])
443 return True
444 except subprocess.CalledProcessError:
445 return False
446
szager@chromium.org848fd492014-04-09 19:06:44 +0000447 def exists(self):
448 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
449
Ryan Tseng3beabd02017-03-15 13:57:58 -0700450 def supported_project(self):
451 """Returns true if this repo is known to have a bootstrap zip file."""
452 u = urlparse.urlparse(self.url)
453 return u.netloc in [
454 'chromium.googlesource.com',
455 'chrome-internal.googlesource.com']
456
szager@chromium.org66c8b852015-09-22 23:19:07 +0000457 def _preserve_fetchspec(self):
458 """Read and preserve remote.origin.fetch from an existing mirror.
459
460 This modifies self.fetch_specs.
461 """
462 if not self.exists():
463 return
464 try:
465 config_fetchspecs = subprocess.check_output(
466 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
467 cwd=self.mirror_path)
468 for fetchspec in config_fetchspecs.splitlines():
469 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
470 except subprocess.CalledProcessError:
471 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
472 'existing cache directory. You may need to manually edit '
473 '%s and "git cache fetch" again.'
474 % os.path.join(self.mirror_path, 'config'))
475
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000476 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000477 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
478 pack_files = []
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000479 if os.path.isdir(pack_dir):
480 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800481 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
Karen Qian0cbd5a52019-04-29 20:14:50 +0000482 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000483
484 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000485 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000486 len(pack_files) > GC_AUTOPACKLIMIT)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000487
488 if not should_bootstrap:
489 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
490 logging.warn(
491 'Shallow fetch requested, but repo cache already exists.')
492 return
493
494 if self.exists():
495 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
496 self._preserve_fetchspec()
497 else:
John Budorick47ec0692019-05-01 15:04:28 +0000498 if os.path.exists(self.mirror_path):
499 # If the mirror path exists but self.exists() returns false, we're
500 # in an unexpected state. Nuke the previous mirror directory and
501 # start fresh.
502 gclient_utils.rmtree(self.mirror_path)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000503 os.mkdir(self.mirror_path)
504
505 bootstrapped = (not depth and bootstrap and
506 self.bootstrap_repo(self.mirror_path))
507
508 if not bootstrapped:
509 if not self.exists() or not self.supported_project():
510 # Bootstrap failed due to:
511 # 1. No previous cache.
512 # 2. Project doesn't have a bootstrap folder.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700513 # Start with a bare git dir.
Karen Qian0cbd5a52019-04-29 20:14:50 +0000514 self.RunGit(['init', '--bare'], cwd=self.mirror_path)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000515 else:
516 # Bootstrap failed, previous cache exists; warn and continue.
517 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800518 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
519 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000520 % len(pack_files))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000521
Edward Lemur579c9862018-07-13 23:17:51 +0000522 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
523 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000524 v = []
525 d = []
526 if verbose:
527 v = ['-v', '--progress']
528 if depth:
529 d = ['--depth', str(depth)]
530 fetch_cmd = ['fetch'] + v + d + ['origin']
531 fetch_specs = subprocess.check_output(
532 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
533 cwd=rundir).strip().splitlines()
534 for spec in fetch_specs:
535 try:
536 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800537 with self.print_duration_of('fetch %s' % spec):
538 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000539 except subprocess.CalledProcessError:
540 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700541 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000542 logging.warn('Fetch of %s failed' % spec)
543
Vadim Shtayura08049e22017-10-11 00:14:52 +0000544 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000545 verbose=False, ignore_lock=False, lock_timeout=0,
546 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000547 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000548 if shallow and not depth:
549 depth = 10000
550 gclient_utils.safe_makedirs(self.GetCachePath())
551
Vadim Shtayura08049e22017-10-11 00:14:52 +0000552 lockfile = Lockfile(self.mirror_path, lock_timeout)
553 if not ignore_lock:
554 lockfile.lock()
555
szager@chromium.org108eced2014-06-19 21:22:43 +0000556 try:
Karen Qian0cbd5a52019-04-29 20:14:50 +0000557 self._ensure_bootstrapped(depth, bootstrap)
558 self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700559 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000560 # This is a major failure, we need to clean and force a bootstrap.
Karen Qian0cbd5a52019-04-29 20:14:50 +0000561 gclient_utils.rmtree(self.mirror_path)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000562 self.print(GIT_CACHE_CORRUPT_MESSAGE)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000563 self._ensure_bootstrapped(depth, bootstrap, force=True)
564 self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000565 finally:
Vadim Shtayura08049e22017-10-11 00:14:52 +0000566 if not ignore_lock:
567 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000568
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000569 def update_bootstrap(self, prune=False):
Karen Qiandcad7492019-04-26 03:11:16 +0000570 # The folder is <git number>
szager@chromium.org848fd492014-04-09 19:06:44 +0000571 gen_number = subprocess.check_output(
572 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
Karen Qiandcad7492019-04-26 03:11:16 +0000573 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
574
575 src_name = self.mirror_path
576 dest_name = '%s/%s' % (self._gs_path, gen_number)
577
578 # check to see if folder already exists in gs
579 _, ls_out, ls_err = gsutil.check_call('ls', dest_name)
580 _, ls_out_ready, ls_err_ready = (
581 gsutil.check_call('ls', dest_name + '.ready'))
582
583 # only printing out errors because the folder/ready file
584 # might not exist yet, so it will error no matter what
585 if ls_err:
586 print('Failed to check GS:\n%s' % (ls_err))
Karen Qiandcad7492019-04-26 03:11:16 +0000587 if ls_err_ready:
588 print('Failed to check GS:\n%s' % (ls_err_ready))
589
590 if not (ls_out == '' and ls_out_ready == ''):
Andrii Shyshkalovc62691b2019-04-26 15:56:30 +0000591 print('Cache %s already exists' % dest_name)
Karen Qiandcad7492019-04-26 03:11:16 +0000592 return
593
Andrii Shyshkalov199182f2019-04-26 16:01:20 +0000594 # Run Garbage Collect to compress packfile.
595 self.RunGit(['gc', '--prune=all'])
596
Karen Qiandcad7492019-04-26 03:11:16 +0000597 gsutil.call('-m', 'cp', '-r', src_name, dest_name)
598
599 #TODO(karenqian): prune old caches
600
601 # create .ready file and upload
602 _, ready_file_name = tempfile.mkstemp(suffix='.ready')
603 try:
604 gsutil.call('cp', ready_file_name, '%s.ready' % (dest_name))
605 finally:
606 os.remove(ready_file_name)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000607
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000608 @staticmethod
609 def DeleteTmpPackFiles(path):
610 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000611 if not os.path.isdir(pack_dir):
612 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000613 pack_files = [f for f in os.listdir(pack_dir) if
614 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
615 for f in pack_files:
616 f = os.path.join(pack_dir, f)
617 try:
618 os.remove(f)
619 logging.warn('Deleted stale temporary pack file %s' % f)
620 except OSError:
621 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000622
Vadim Shtayura08049e22017-10-11 00:14:52 +0000623 @classmethod
624 def BreakLocks(cls, path):
625 did_unlock = False
626 lf = Lockfile(path)
627 if lf.break_lock():
628 did_unlock = True
629 # Look for lock files that might have been left behind by an interrupted
630 # git process.
631 lf = os.path.join(path, 'config.lock')
632 if os.path.exists(lf):
633 os.remove(lf)
634 did_unlock = True
635 cls.DeleteTmpPackFiles(path)
636 return did_unlock
637
638 def unlock(self):
639 return self.BreakLocks(self.mirror_path)
640
641 @classmethod
642 def UnlockAll(cls):
643 cachepath = cls.GetCachePath()
644 if not cachepath:
645 return
646 dirlist = os.listdir(cachepath)
647 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
648 if os.path.isdir(os.path.join(cachepath, path))])
649 for dirent in dirlist:
650 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
651 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
652 elif (dirent.endswith('.lock') and
653 os.path.isfile(os.path.join(cachepath, dirent))):
654 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
655
656 unlocked_repos = []
657 for repo_dir in repo_dirs:
658 if cls.BreakLocks(repo_dir):
659 unlocked_repos.append(repo_dir)
660
661 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000662
agable@chromium.org5a306a22014-02-24 22:13:59 +0000663@subcommand.usage('[url of repo to check for caching]')
664def CMDexists(parser, args):
665 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000666 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000667 if not len(args) == 1:
668 parser.error('git cache exists only takes exactly one repo url.')
669 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000670 mirror = Mirror(url)
671 if mirror.exists():
672 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000673 return 0
674 return 1
675
676
hinoka@google.com563559c2014-04-02 00:36:24 +0000677@subcommand.usage('[url of repo to create a bootstrap zip file]')
678def CMDupdate_bootstrap(parser, args):
679 """Create and uploads a bootstrap tarball."""
680 # Lets just assert we can't do this on Windows.
681 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000682 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000683 return 1
684
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000685 parser.add_option('--prune', action='store_true',
Andrii Shyshkalov7a2205c2019-04-26 05:14:36 +0000686 help='Prune all other cached bundles of the same repo.')
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000687
hinoka@google.com563559c2014-04-02 00:36:24 +0000688 # First, we need to ensure the cache is populated.
689 populate_args = args[:]
hinoka@google.com563559c2014-04-02 00:36:24 +0000690 CMDpopulate(parser, populate_args)
691
692 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000693 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000694 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000695 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000696 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000697 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000698
699
agable@chromium.org5a306a22014-02-24 22:13:59 +0000700@subcommand.usage('[url of repo to add to or update in cache]')
701def CMDpopulate(parser, args):
702 """Ensure that the cache has all up-to-date objects for the given repo."""
703 parser.add_option('--depth', type='int',
704 help='Only cache DEPTH commits of history')
705 parser.add_option('--shallow', '-s', action='store_true',
706 help='Only cache 10000 commits of history')
707 parser.add_option('--ref', action='append',
708 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000709 parser.add_option('--no_bootstrap', '--no-bootstrap',
710 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000711 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000712 parser.add_option('--ignore_locks', '--ignore-locks',
713 action='store_true',
714 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000715 parser.add_option('--reset-fetch-config', action='store_true', default=False,
716 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000717
agable@chromium.org5a306a22014-02-24 22:13:59 +0000718 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000719 if not len(args) == 1:
720 parser.error('git cache populate only takes exactly one repo url.')
721 url = args[0]
722
szager@chromium.org848fd492014-04-09 19:06:44 +0000723 mirror = Mirror(url, refs=options.ref)
724 kwargs = {
725 'verbose': options.verbose,
726 'shallow': options.shallow,
727 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000728 'ignore_lock': options.ignore_locks,
729 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000730 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000731 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000732 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000733 kwargs['depth'] = options.depth
734 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000735
736
szager@chromium.orgf3145112014-08-07 21:02:36 +0000737@subcommand.usage('Fetch new commits into cache and current checkout')
738def CMDfetch(parser, args):
739 """Update mirror, and fetch in cwd."""
740 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000741 parser.add_option('--no_bootstrap', '--no-bootstrap',
742 action='store_true',
743 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000744 options, args = parser.parse_args(args)
745
746 # Figure out which remotes to fetch. This mimics the behavior of regular
747 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
748 # this will NOT try to traverse up the branching structure to find the
749 # ultimate remote to update.
750 remotes = []
751 if options.all:
752 assert not args, 'fatal: fetch --all does not take a repository argument'
753 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
754 elif args:
755 remotes = args
756 else:
757 current_branch = subprocess.check_output(
758 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
759 if current_branch != 'HEAD':
760 upstream = subprocess.check_output(
761 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
762 ).strip()
763 if upstream and upstream != '.':
764 remotes = [upstream]
765 if not remotes:
766 remotes = ['origin']
767
768 cachepath = Mirror.GetCachePath()
769 git_dir = os.path.abspath(subprocess.check_output(
770 [Mirror.git_exe, 'rev-parse', '--git-dir']))
771 git_dir = os.path.abspath(git_dir)
772 if git_dir.startswith(cachepath):
773 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000774 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000775 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000776 return 0
777 for remote in remotes:
778 remote_url = subprocess.check_output(
779 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
780 if remote_url.startswith(cachepath):
781 mirror = Mirror.FromPath(remote_url)
782 mirror.print = lambda *args: None
783 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000784 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000785 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000786 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
787 return 0
788
789
Vadim Shtayura08049e22017-10-11 00:14:52 +0000790@subcommand.usage('[url of repo to unlock, or -a|--all]')
791def CMDunlock(parser, args):
792 """Unlock one or all repos if their lock files are still around."""
793 parser.add_option('--force', '-f', action='store_true',
794 help='Actually perform the action')
795 parser.add_option('--all', '-a', action='store_true',
796 help='Unlock all repository caches')
797 options, args = parser.parse_args(args)
798 if len(args) > 1 or (len(args) == 0 and not options.all):
799 parser.error('git cache unlock takes exactly one repo url, or --all')
800
801 if not options.force:
802 cachepath = Mirror.GetCachePath()
803 lockfiles = [os.path.join(cachepath, path)
804 for path in os.listdir(cachepath)
805 if path.endswith('.lock') and os.path.isfile(path)]
806 parser.error('git cache unlock requires -f|--force to do anything. '
807 'Refusing to unlock the following repo caches: '
808 ', '.join(lockfiles))
809
810 unlocked_repos = []
811 if options.all:
812 unlocked_repos.extend(Mirror.UnlockAll())
813 else:
814 m = Mirror(args[0])
815 if m.unlock():
816 unlocked_repos.append(m.mirror_path)
817
818 if unlocked_repos:
819 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
820 unlocked_repos))
821
822
agable@chromium.org5a306a22014-02-24 22:13:59 +0000823class OptionParser(optparse.OptionParser):
824 """Wrapper class for OptionParser to handle global options."""
825
826 def __init__(self, *args, **kwargs):
827 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
828 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000829 help=(
830 'Path to the directory containing the caches. Normally '
831 'deduced from git config cache.cachepath or '
832 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000833 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000834 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000835 self.add_option('-q', '--quiet', action='store_true',
836 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000837 self.add_option('--timeout', type='int', default=0,
838 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000839
840 def parse_args(self, args=None, values=None):
841 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000842 if options.quiet:
843 options.verbose = 0
844
845 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
846 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000847
848 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000849 global_cache_dir = Mirror.GetCachePath()
850 except RuntimeError:
851 global_cache_dir = None
852 if options.cache_dir:
853 if global_cache_dir and (
854 os.path.abspath(options.cache_dir) !=
855 os.path.abspath(global_cache_dir)):
856 logging.warn('Overriding globally-configured cache directory.')
857 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000858
agable@chromium.org5a306a22014-02-24 22:13:59 +0000859 return options, args
860
861
862def main(argv):
863 dispatcher = subcommand.CommandDispatcher(__name__)
864 return dispatcher.execute(OptionParser(), argv)
865
866
867if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000868 try:
869 sys.exit(main(sys.argv[1:]))
870 except KeyboardInterrupt:
871 sys.stderr.write('interrupted\n')
John Budorick47ec0692019-05-01 15:04:28 +0000872 sys.exit(1)