blob: 7a6c907d474b37cc8d554609bd0e8c69fa10645b [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000016import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000017import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000018import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000019import subprocess
20import sys
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com776a2c32014-04-25 07:54:25 +000027import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000028
hinoka@google.com563559c2014-04-02 00:36:24 +000029from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import gclient_utils
31import subcommand
32
szager@chromium.org301a7c32014-06-16 17:13:50 +000033# Analogous to gc.autopacklimit git config.
34GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090035
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000036GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
37
szager@chromium.org848fd492014-04-09 19:06:44 +000038try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080039 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000040 WinErr = WindowsError
41except NameError:
42 class WinErr(Exception):
43 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
Vadim Shtayura08049e22017-10-11 00:14:52 +000045class LockError(Exception):
46 pass
47
hinokadcd84042016-06-09 14:26:17 -070048class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000049 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000050
dnj4625b5a2016-11-10 18:23:26 -080051
52def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
53 sleep_time=0.25, printerr=None):
54 """Executes |fn| up to |count| times, backing off exponentially.
55
56 Args:
57 fn (callable): The function to execute. If this raises a handled
58 exception, the function will retry with exponential backoff.
59 excs (tuple): A tuple of Exception types to handle. If one of these is
60 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
61 that is not in this list, it will immediately pass through. If |excs|
62 is empty, the Exception base class will be used.
63 name (str): Optional operation name to print in the retry string.
64 count (int): The number of times to try before allowing the exception to
65 pass through.
66 sleep_time (float): The initial number of seconds to sleep in between
67 retries. This will be doubled each retry.
68 printerr (callable): Function that will be called with the error string upon
69 failures. If None, |logging.warning| will be used.
70
71 Returns: The return value of the successful fn.
72 """
73 printerr = printerr or logging.warning
74 for i in xrange(count):
75 try:
76 return fn()
77 except excs as e:
78 if (i+1) >= count:
79 raise
80
81 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
82 (name or 'operation'), sleep_time, (i+1), count, e))
83 time.sleep(sleep_time)
84 sleep_time *= 2
85
86
Vadim Shtayura08049e22017-10-11 00:14:52 +000087class Lockfile(object):
88 """Class to represent a cross-platform process-specific lockfile."""
89
90 def __init__(self, path, timeout=0):
91 self.path = os.path.abspath(path)
92 self.timeout = timeout
93 self.lockfile = self.path + ".lock"
94 self.pid = os.getpid()
95
96 def _read_pid(self):
97 """Read the pid stored in the lockfile.
98
99 Note: This method is potentially racy. By the time it returns the lockfile
100 may have been unlocked, removed, or stolen by some other process.
101 """
102 try:
103 with open(self.lockfile, 'r') as f:
104 pid = int(f.readline().strip())
105 except (IOError, ValueError):
106 pid = None
107 return pid
108
109 def _make_lockfile(self):
110 """Safely creates a lockfile containing the current pid."""
111 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
112 fd = os.open(self.lockfile, open_flags, 0o644)
113 f = os.fdopen(fd, 'w')
114 print(self.pid, file=f)
115 f.close()
116
117 def _remove_lockfile(self):
118 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
119
120 See gclient_utils.py:rmtree docstring for more explanation on the
121 windows case.
122 """
123 if sys.platform == 'win32':
124 lockfile = os.path.normcase(self.lockfile)
125
126 def delete():
127 exitcode = subprocess.call(['cmd.exe', '/c',
128 'del', '/f', '/q', lockfile])
129 if exitcode != 0:
130 raise LockError('Failed to remove lock: %s' % (lockfile,))
131 exponential_backoff_retry(
132 delete,
133 excs=(LockError,),
134 name='del [%s]' % (lockfile,))
135 else:
136 os.remove(self.lockfile)
137
138 def lock(self):
139 """Acquire the lock.
140
141 This will block with a deadline of self.timeout seconds.
142 """
143 elapsed = 0
144 while True:
145 try:
146 self._make_lockfile()
147 return
148 except OSError as e:
149 if elapsed < self.timeout:
150 sleep_time = max(10, min(3, self.timeout - elapsed))
151 logging.info('Could not create git cache lockfile; '
152 'will retry after sleep(%d).', sleep_time);
153 elapsed += sleep_time
154 time.sleep(sleep_time)
155 continue
156 if e.errno == errno.EEXIST:
157 raise LockError("%s is already locked" % self.path)
158 else:
159 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
160
161 def unlock(self):
162 """Release the lock."""
163 try:
164 if not self.is_locked():
165 raise LockError("%s is not locked" % self.path)
166 if not self.i_am_locking():
167 raise LockError("%s is locked, but not by me" % self.path)
168 self._remove_lockfile()
169 except WinErr:
170 # Windows is unreliable when it comes to file locking. YMMV.
171 pass
172
173 def break_lock(self):
174 """Remove the lock, even if it was created by someone else."""
175 try:
176 self._remove_lockfile()
177 return True
178 except OSError as exc:
179 if exc.errno == errno.ENOENT:
180 return False
181 else:
182 raise
183
184 def is_locked(self):
185 """Test if the file is locked by anyone.
186
187 Note: This method is potentially racy. By the time it returns the lockfile
188 may have been unlocked, removed, or stolen by some other process.
189 """
190 return os.path.exists(self.lockfile)
191
192 def i_am_locking(self):
193 """Test if the file is locked by this process."""
194 return self.is_locked() and self.pid == self._read_pid()
195
196
szager@chromium.org848fd492014-04-09 19:06:44 +0000197class Mirror(object):
198
199 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
200 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +0000201 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000202 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000203
Robert Iannuccia19649b2018-06-29 16:31:45 +0000204 UNSET_CACHEPATH = object()
205
206 # Used for tests
207 _GIT_CONFIG_LOCATION = []
208
szager@chromium.org66c8b852015-09-22 23:19:07 +0000209 @staticmethod
210 def parse_fetch_spec(spec):
211 """Parses and canonicalizes a fetch spec.
212
213 Returns (fetchspec, value_regex), where value_regex can be used
214 with 'git config --replace-all'.
215 """
216 parts = spec.split(':', 1)
217 src = parts[0].lstrip('+').rstrip('/')
218 if not src.startswith('refs/'):
219 src = 'refs/heads/%s' % src
220 dest = parts[1].rstrip('/') if len(parts) > 1 else src
221 regex = r'\+%s:.*' % src.replace('*', r'\*')
222 return ('+%s:%s' % (src, dest), regex)
223
szager@chromium.org848fd492014-04-09 19:06:44 +0000224 def __init__(self, url, refs=None, print_func=None):
225 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000226 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000227 self.basedir = self.UrlToCacheDir(url)
228 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000229 if print_func:
230 self.print = self.print_without_file
231 self.print_func = print_func
232 else:
233 self.print = print
234
dnj4625b5a2016-11-10 18:23:26 -0800235 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000236 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000237
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800238 @contextlib.contextmanager
239 def print_duration_of(self, what):
240 start = time.time()
241 try:
242 yield
243 finally:
244 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
245
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000246 @property
247 def bootstrap_bucket(self):
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000248 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
249 if b:
250 return b
Ryan Tseng3beabd02017-03-15 13:57:58 -0700251 u = urlparse.urlparse(self.url)
252 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000253 return 'chromium-git-cache'
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000254 # TODO(tandrii): delete once LUCI migration is completed.
255 # Only public hosts will be supported going forward.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700256 elif u.netloc == 'chrome-internal.googlesource.com':
257 return 'chrome-git-cache'
258 # Not recognized.
259 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000260
Karen Qian74913992019-04-26 00:31:34 +0000261 @property
262 def gs_folder(self):
263 return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
264
szager@chromium.org174766f2014-05-13 21:27:46 +0000265 @classmethod
266 def FromPath(cls, path):
267 return cls(cls.CacheDirToUrl(path))
268
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 @staticmethod
270 def UrlToCacheDir(url):
271 """Convert a git url to a normalized form for the cache dir path."""
272 parsed = urlparse.urlparse(url)
273 norm_url = parsed.netloc + parsed.path
274 if norm_url.endswith('.git'):
275 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000276
277 # Use the same dir for authenticated URLs and unauthenticated URLs.
278 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
279
szager@chromium.org848fd492014-04-09 19:06:44 +0000280 return norm_url.replace('-', '--').replace('/', '-').lower()
281
282 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000283 def CacheDirToUrl(path):
284 """Convert a cache dir path to its corresponding url."""
285 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
286 return 'https://%s' % netpath
287
szager@chromium.org848fd492014-04-09 19:06:44 +0000288 @classmethod
289 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000290 with cls.cachepath_lock:
291 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000292
293 @classmethod
294 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000295 with cls.cachepath_lock:
296 if not hasattr(cls, 'cachepath'):
297 try:
298 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000299 [cls.git_exe, 'config'] +
300 cls._GIT_CONFIG_LOCATION +
301 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000302 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000303 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000304 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000305
306 ret = getattr(cls, 'cachepath')
307 if ret is cls.UNSET_CACHEPATH:
308 raise RuntimeError('No cache.cachepath git configuration or '
309 '$GIT_CACHE_PATH is set.')
310 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000311
dnj4625b5a2016-11-10 18:23:26 -0800312 def Rename(self, src, dst):
313 # This is somehow racy on Windows.
314 # Catching OSError because WindowsError isn't portable and
315 # pylint complains.
316 exponential_backoff_retry(
317 lambda: os.rename(src, dst),
318 excs=(OSError,),
319 name='rename [%s] => [%s]' % (src, dst),
320 printerr=self.print)
321
szager@chromium.org848fd492014-04-09 19:06:44 +0000322 def RunGit(self, cmd, **kwargs):
323 """Run git in a subprocess."""
324 cwd = kwargs.setdefault('cwd', self.mirror_path)
325 kwargs.setdefault('print_stdout', False)
326 kwargs.setdefault('filter_fn', self.print)
327 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
328 env.setdefault('GIT_ASKPASS', 'true')
329 env.setdefault('SSH_ASKPASS', 'true')
330 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
331 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
332
Edward Lemur579c9862018-07-13 23:17:51 +0000333 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000334 if cwd is None:
335 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000336
Edward Lemur579c9862018-07-13 23:17:51 +0000337 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000338 try:
339 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
340 except subprocess.CalledProcessError as e:
341 # If exit code was 5, it means we attempted to unset a config that
342 # didn't exist. Ignore it.
343 if e.returncode != 5:
344 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000345
szager@chromium.org301a7c32014-06-16 17:13:50 +0000346 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700347 try:
348 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
349 except subprocess.CalledProcessError:
350 # Hard error, need to clobber.
351 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000352
353 # Don't combine pack files into one big pack file. It's really slow for
354 # repositories, and there's no way to track progress and make sure it's
355 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700356 if self.supported_project():
357 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000358
359 # Allocate more RAM for cache-ing delta chains, for better performance
360 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000361 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000362 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000363
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000364 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000365 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000366 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000367 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000368 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000369 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000370 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000371
372 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800373 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000374
375 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
376 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700377 if not self.bootstrap_bucket:
378 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25 +0000379 python_fallback = (
380 (sys.platform.startswith('win') and
381 not gclient_utils.FindExecutable('7z')) or
382 (not gclient_utils.FindExecutable('unzip')) or
383 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
384 )
szager@chromium.org848fd492014-04-09 19:06:44 +0000385
386 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000387 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000388 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800389 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000390
391 def compare_filenames(a, b):
392 # |a| and |b| look like gs://.../.../9999.zip. They both have the same
393 # gs://bootstrap_bucket/basedir/ prefix because they come from the same
394 # `gsutil ls`.
395 # This function only compares the numeral parts before .zip.
396 regex_pattern = r'/(\d+)\.zip$'
397 match_a = re.search(regex_pattern, a)
398 match_b = re.search(regex_pattern, b)
399 if (match_a is not None) and (match_b is not None):
400 num_a = int(match_a.group(1))
401 num_b = int(match_b.group(1))
402 return cmp(num_a, num_b)
403 # If it doesn't match the format, fallback to string comparison.
404 return cmp(a, b)
405
406 ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
szager@chromium.org848fd492014-04-09 19:06:44 +0000407 if not ls_out_sorted:
408 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800409 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
410 (self.mirror_path, self.bootstrap_bucket,
411 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000412 return False
413 latest_checkout = ls_out_sorted[-1]
414
415 # Download zip file to a temporary directory.
416 try:
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000417 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
szager@chromium.org848fd492014-04-09 19:06:44 +0000418 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800419 with self.print_duration_of('download'):
420 code = gsutil.call('cp', latest_checkout, tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000421 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000422 return False
423 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
424
hinoka@google.com776a2c32014-04-25 07:54:25 +0000425 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800426 with self.print_duration_of('unzip'):
427 if not python_fallback:
428 if sys.platform.startswith('win'):
429 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
430 else:
431 cmd = ['unzip', filename, '-d', directory]
432 retcode = subprocess.call(cmd)
hinoka@google.com776a2c32014-04-25 07:54:25 +0000433 else:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800434 try:
435 with zipfile.ZipFile(filename, 'r') as f:
436 f.printdir()
437 f.extractall(directory)
438 except Exception as e:
439 self.print('Encountered error: %s' % str(e), file=sys.stderr)
440 retcode = 1
441 else:
442 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000443 finally:
444 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-10 18:23:26 -0800445 #
446 # This is somehow racy on Windows.
447 # Catching OSError because WindowsError isn't portable and
448 # pylint complains.
449 exponential_backoff_retry(
450 lambda: gclient_utils.rm_file_or_tree(tempdir),
451 excs=(OSError,),
452 name='rmtree [%s]' % (tempdir,),
453 printerr=self.print)
szager@chromium.org848fd492014-04-09 19:06:44 +0000454
455 if retcode:
456 self.print(
457 'Extracting bootstrap zipfile %s failed.\n'
458 'Resuming normal operations.' % filename)
459 return False
460 return True
461
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800462 def contains_revision(self, revision):
463 if not self.exists():
464 return False
465
466 if sys.platform.startswith('win'):
467 # Windows .bat scripts use ^ as escape sequence, which means we have to
468 # escape it with itself for every .bat invocation.
469 needle = '%s^^^^{commit}' % revision
470 else:
471 needle = '%s^{commit}' % revision
472 try:
473 # cat-file exits with 0 on success, that is git object of given hash was
474 # found.
475 self.RunGit(['cat-file', '-e', needle])
476 return True
477 except subprocess.CalledProcessError:
478 return False
479
szager@chromium.org848fd492014-04-09 19:06:44 +0000480 def exists(self):
481 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
482
Ryan Tseng3beabd02017-03-15 13:57:58 -0700483 def supported_project(self):
484 """Returns true if this repo is known to have a bootstrap zip file."""
485 u = urlparse.urlparse(self.url)
486 return u.netloc in [
487 'chromium.googlesource.com',
488 'chrome-internal.googlesource.com']
489
szager@chromium.org66c8b852015-09-22 23:19:07 +0000490 def _preserve_fetchspec(self):
491 """Read and preserve remote.origin.fetch from an existing mirror.
492
493 This modifies self.fetch_specs.
494 """
495 if not self.exists():
496 return
497 try:
498 config_fetchspecs = subprocess.check_output(
499 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
500 cwd=self.mirror_path)
501 for fetchspec in config_fetchspecs.splitlines():
502 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
503 except subprocess.CalledProcessError:
504 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
505 'existing cache directory. You may need to manually edit '
506 '%s and "git cache fetch" again.'
507 % os.path.join(self.mirror_path, 'config'))
508
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000509 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
510 tempdir = None
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000511 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
512 pack_files = []
513
514 if os.path.isdir(pack_dir):
515 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800516 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
517 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000518
519 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000520 not self.exists() or
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000521 len(pack_files) > GC_AUTOPACKLIMIT)
522 if should_bootstrap:
szager@chromium.org66c8b852015-09-22 23:19:07 +0000523 if self.exists():
524 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
525 self._preserve_fetchspec()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000526 tempdir = tempfile.mkdtemp(
527 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
528 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
529 if bootstrapped:
530 # Bootstrap succeeded; delete previous cache, if any.
hinoka@chromium.org42f9adf2014-09-05 11:10:35 +0000531 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 13:57:58 -0700532 elif not self.exists() or not self.supported_project():
533 # Bootstrap failed due to either
534 # 1. No previous cache
535 # 2. Project doesn't have a bootstrap zip file
536 # Start with a bare git dir.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000537 self.RunGit(['init', '--bare'], cwd=tempdir)
538 else:
539 # Bootstrap failed, previous cache exists; warn and continue.
540 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800541 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
542 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000543 % len(pack_files))
544 gclient_utils.rmtree(tempdir)
545 tempdir = None
546 else:
547 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
548 logging.warn(
549 'Shallow fetch requested, but repo cache already exists.')
550 return tempdir
551
Edward Lemur579c9862018-07-13 23:17:51 +0000552 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
553 self.config(rundir, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000554 v = []
555 d = []
556 if verbose:
557 v = ['-v', '--progress']
558 if depth:
559 d = ['--depth', str(depth)]
560 fetch_cmd = ['fetch'] + v + d + ['origin']
561 fetch_specs = subprocess.check_output(
562 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
563 cwd=rundir).strip().splitlines()
564 for spec in fetch_specs:
565 try:
566 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800567 with self.print_duration_of('fetch %s' % spec):
568 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000569 except subprocess.CalledProcessError:
570 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700571 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000572 logging.warn('Fetch of %s failed' % spec)
573
Vadim Shtayura08049e22017-10-11 00:14:52 +0000574 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51 +0000575 verbose=False, ignore_lock=False, lock_timeout=0,
576 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000577 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000578 if shallow and not depth:
579 depth = 10000
580 gclient_utils.safe_makedirs(self.GetCachePath())
581
Vadim Shtayura08049e22017-10-11 00:14:52 +0000582 lockfile = Lockfile(self.mirror_path, lock_timeout)
583 if not ignore_lock:
584 lockfile.lock()
585
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000586 tempdir = None
szager@chromium.org108eced2014-06-19 21:22:43 +0000587 try:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000588 tempdir = self._ensure_bootstrapped(depth, bootstrap)
szager@chromium.org848fd492014-04-09 19:06:44 +0000589 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51 +0000590 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 14:26:17 -0700591 except ClobberNeeded:
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000592 # This is a major failure, we need to clean and force a bootstrap.
593 gclient_utils.rmtree(rundir)
594 self.print(GIT_CACHE_CORRUPT_MESSAGE)
595 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
596 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51 +0000597 self._fetch(tempdir, verbose, depth, reset_fetch_config)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000598 finally:
szager@chromium.org848fd492014-04-09 19:06:44 +0000599 if tempdir:
dnjb445ef52016-11-10 15:51:39 -0800600 if os.path.exists(self.mirror_path):
601 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-10 18:23:26 -0800602 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000603 if not ignore_lock:
604 lockfile.unlock()
szager@chromium.org848fd492014-04-09 19:06:44 +0000605
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000606 def update_bootstrap(self, prune=False):
Karen Qian74913992019-04-26 00:31:34 +0000607 # The folder is <git number>
szager@chromium.org848fd492014-04-09 19:06:44 +0000608 gen_number = subprocess.check_output(
609 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
hinoka@chromium.org7b1cb6f2014-09-08 21:40:50 +0000610 # Run Garbage Collect to compress packfile.
611 self.RunGit(['gc', '--prune=all'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000612
Karen Qian74913992019-04-26 00:31:34 +0000613 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
614
615 src_name = self.mirror_path
616 dest_name = '%s/%s' % (self.gs_folder, gen_number)
617
618 # check to see if folder already exists in gs
619 _, ls_out, ls_err = gsutil.check_call('ls', dest_name)
620 _, ls_out_ready, ls_err_ready = (
621 gsutil.check_call('ls', dest_name + '.ready'))
622
623 if ls_err:
624 print('Failed to check GS:\n%s' % (ls_err))
625 return
626
627 if ls_err_ready:
628 print('Failed to check GS:\n%s' % (ls_err_ready))
629 return
630
631 if not (ls_out == '' and ls_out_ready == ''):
632 return
633
634 gsutil.call('-m', 'cp', '-r', src_name, dest_name)
635
636 #TODO(karenqian): prune old caches
637
638 # create .ready file and upload
639 _, ready_file_name = tempfile.mkstemp(suffix='.ready')
640 try:
641 gsutil.call('cp', ready_file_name, '%s.ready' % (dest_name))
642 finally:
643 os.remove(ready_file_name)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000644
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000645 @staticmethod
646 def DeleteTmpPackFiles(path):
647 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000648 if not os.path.isdir(pack_dir):
649 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000650 pack_files = [f for f in os.listdir(pack_dir) if
651 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
652 for f in pack_files:
653 f = os.path.join(pack_dir, f)
654 try:
655 os.remove(f)
656 logging.warn('Deleted stale temporary pack file %s' % f)
657 except OSError:
658 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000659
Vadim Shtayura08049e22017-10-11 00:14:52 +0000660 @classmethod
661 def BreakLocks(cls, path):
662 did_unlock = False
663 lf = Lockfile(path)
664 if lf.break_lock():
665 did_unlock = True
666 # Look for lock files that might have been left behind by an interrupted
667 # git process.
668 lf = os.path.join(path, 'config.lock')
669 if os.path.exists(lf):
670 os.remove(lf)
671 did_unlock = True
672 cls.DeleteTmpPackFiles(path)
673 return did_unlock
674
675 def unlock(self):
676 return self.BreakLocks(self.mirror_path)
677
678 @classmethod
679 def UnlockAll(cls):
680 cachepath = cls.GetCachePath()
681 if not cachepath:
682 return
683 dirlist = os.listdir(cachepath)
684 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
685 if os.path.isdir(os.path.join(cachepath, path))])
686 for dirent in dirlist:
687 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
688 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
689 elif (dirent.endswith('.lock') and
690 os.path.isfile(os.path.join(cachepath, dirent))):
691 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
692
693 unlocked_repos = []
694 for repo_dir in repo_dirs:
695 if cls.BreakLocks(repo_dir):
696 unlocked_repos.append(repo_dir)
697
698 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000699
agable@chromium.org5a306a22014-02-24 22:13:59 +0000700@subcommand.usage('[url of repo to check for caching]')
701def CMDexists(parser, args):
702 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000703 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000704 if not len(args) == 1:
705 parser.error('git cache exists only takes exactly one repo url.')
706 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000707 mirror = Mirror(url)
708 if mirror.exists():
709 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000710 return 0
711 return 1
712
713
hinoka@google.com563559c2014-04-02 00:36:24 +0000714@subcommand.usage('[url of repo to create a bootstrap zip file]')
715def CMDupdate_bootstrap(parser, args):
716 """Create and uploads a bootstrap tarball."""
717 # Lets just assert we can't do this on Windows.
718 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000719 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000720 return 1
721
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000722 parser.add_option('--prune', action='store_true',
723 help='Prune all other cached zipballs of the same repo.')
724
hinoka@google.com563559c2014-04-02 00:36:24 +0000725 # First, we need to ensure the cache is populated.
726 populate_args = args[:]
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000727 populate_args.append('--no-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000728 CMDpopulate(parser, populate_args)
729
730 # Get the repo directory.
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000731 options, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000732 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000733 mirror = Mirror(url)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000734 mirror.update_bootstrap(options.prune)
szager@chromium.org848fd492014-04-09 19:06:44 +0000735 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000736
737
agable@chromium.org5a306a22014-02-24 22:13:59 +0000738@subcommand.usage('[url of repo to add to or update in cache]')
739def CMDpopulate(parser, args):
740 """Ensure that the cache has all up-to-date objects for the given repo."""
741 parser.add_option('--depth', type='int',
742 help='Only cache DEPTH commits of history')
743 parser.add_option('--shallow', '-s', action='store_true',
744 help='Only cache 10000 commits of history')
745 parser.add_option('--ref', action='append',
746 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000747 parser.add_option('--no_bootstrap', '--no-bootstrap',
748 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000749 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000750 parser.add_option('--ignore_locks', '--ignore-locks',
751 action='store_true',
752 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51 +0000753 parser.add_option('--reset-fetch-config', action='store_true', default=False,
754 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000755
agable@chromium.org5a306a22014-02-24 22:13:59 +0000756 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000757 if not len(args) == 1:
758 parser.error('git cache populate only takes exactly one repo url.')
759 url = args[0]
760
szager@chromium.org848fd492014-04-09 19:06:44 +0000761 mirror = Mirror(url, refs=options.ref)
762 kwargs = {
763 'verbose': options.verbose,
764 'shallow': options.shallow,
765 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000766 'ignore_lock': options.ignore_locks,
767 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000768 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000769 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000770 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000771 kwargs['depth'] = options.depth
772 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000773
774
szager@chromium.orgf3145112014-08-07 21:02:36 +0000775@subcommand.usage('Fetch new commits into cache and current checkout')
776def CMDfetch(parser, args):
777 """Update mirror, and fetch in cwd."""
778 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000779 parser.add_option('--no_bootstrap', '--no-bootstrap',
780 action='store_true',
781 help='Don\'t (re)bootstrap from Google Storage')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000782 options, args = parser.parse_args(args)
783
784 # Figure out which remotes to fetch. This mimics the behavior of regular
785 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
786 # this will NOT try to traverse up the branching structure to find the
787 # ultimate remote to update.
788 remotes = []
789 if options.all:
790 assert not args, 'fatal: fetch --all does not take a repository argument'
791 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
792 elif args:
793 remotes = args
794 else:
795 current_branch = subprocess.check_output(
796 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
797 if current_branch != 'HEAD':
798 upstream = subprocess.check_output(
799 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
800 ).strip()
801 if upstream and upstream != '.':
802 remotes = [upstream]
803 if not remotes:
804 remotes = ['origin']
805
806 cachepath = Mirror.GetCachePath()
807 git_dir = os.path.abspath(subprocess.check_output(
808 [Mirror.git_exe, 'rev-parse', '--git-dir']))
809 git_dir = os.path.abspath(git_dir)
810 if git_dir.startswith(cachepath):
811 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000812 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000813 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000814 return 0
815 for remote in remotes:
816 remote_url = subprocess.check_output(
817 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
818 if remote_url.startswith(cachepath):
819 mirror = Mirror.FromPath(remote_url)
820 mirror.print = lambda *args: None
821 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000822 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52 +0000823 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000824 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
825 return 0
826
827
Vadim Shtayura08049e22017-10-11 00:14:52 +0000828@subcommand.usage('[url of repo to unlock, or -a|--all]')
829def CMDunlock(parser, args):
830 """Unlock one or all repos if their lock files are still around."""
831 parser.add_option('--force', '-f', action='store_true',
832 help='Actually perform the action')
833 parser.add_option('--all', '-a', action='store_true',
834 help='Unlock all repository caches')
835 options, args = parser.parse_args(args)
836 if len(args) > 1 or (len(args) == 0 and not options.all):
837 parser.error('git cache unlock takes exactly one repo url, or --all')
838
839 if not options.force:
840 cachepath = Mirror.GetCachePath()
841 lockfiles = [os.path.join(cachepath, path)
842 for path in os.listdir(cachepath)
843 if path.endswith('.lock') and os.path.isfile(path)]
844 parser.error('git cache unlock requires -f|--force to do anything. '
845 'Refusing to unlock the following repo caches: '
846 ', '.join(lockfiles))
847
848 unlocked_repos = []
849 if options.all:
850 unlocked_repos.extend(Mirror.UnlockAll())
851 else:
852 m = Mirror(args[0])
853 if m.unlock():
854 unlocked_repos.append(m.mirror_path)
855
856 if unlocked_repos:
857 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
858 unlocked_repos))
859
860
agable@chromium.org5a306a22014-02-24 22:13:59 +0000861class OptionParser(optparse.OptionParser):
862 """Wrapper class for OptionParser to handle global options."""
863
864 def __init__(self, *args, **kwargs):
865 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
866 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000867 help=(
868 'Path to the directory containing the caches. Normally '
869 'deduced from git config cache.cachepath or '
870 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000871 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000872 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000873 self.add_option('-q', '--quiet', action='store_true',
874 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000875 self.add_option('--timeout', type='int', default=0,
876 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000877
878 def parse_args(self, args=None, values=None):
879 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000880 if options.quiet:
881 options.verbose = 0
882
883 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
884 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000885
886 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000887 global_cache_dir = Mirror.GetCachePath()
888 except RuntimeError:
889 global_cache_dir = None
890 if options.cache_dir:
891 if global_cache_dir and (
892 os.path.abspath(options.cache_dir) !=
893 os.path.abspath(global_cache_dir)):
894 logging.warn('Overriding globally-configured cache directory.')
895 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000896
agable@chromium.org5a306a22014-02-24 22:13:59 +0000897 return options, args
898
899
900def main(argv):
901 dispatcher = subcommand.CommandDispatcher(__name__)
902 return dispatcher.execute(OptionParser(), argv)
903
904
905if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000906 try:
907 sys.exit(main(sys.argv[1:]))
908 except KeyboardInterrupt:
909 sys.stderr.write('interrupted\n')
910 sys.exit(1)