blob: 8580be76a0dcf1aa06d298e82c725609491513ba [file] [log] [blame]
Edward Lesmes98eda3f2019-08-12 21:09:53 +00001#!/usr/bin/env python
agable@chromium.org5a306a22014-02-24 22:13:59 +00002# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
Raul Tambreb946b232019-03-26 14:48:46 +00009
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -080010import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +000011import errno
12import logging
13import optparse
14import os
szager@chromium.org174766f2014-05-13 21:27:46 +000015import re
John Budorick47ec0692019-05-01 15:04:28 +000016import subprocess
17import sys
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000019import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000020import time
Raul Tambreb946b232019-03-26 14:48:46 +000021
22try:
23 import urlparse
24except ImportError: # For Py3 compatibility
25 import urllib.parse as urlparse
26
hinoka@google.com563559c2014-04-02 00:36:24 +000027from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000028import gclient_utils
Josip Sokcevicd3affaa2020-05-20 20:42:50 +000029import lockfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000030import subcommand
31
szager@chromium.org301a7c32014-06-16 17:13:50 +000032# Analogous to gc.autopacklimit git config.
33GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090034
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000035GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
36
szager@chromium.org848fd492014-04-09 19:06:44 +000037try:
Quinten Yearsleyb2cc4a92016-12-15 13:53:26 -080038 # pylint: disable=undefined-variable
szager@chromium.org848fd492014-04-09 19:06:44 +000039 WinErr = WindowsError
40except NameError:
41 class WinErr(Exception):
42 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000043
hinokadcd84042016-06-09 14:26:17 -070044class ClobberNeeded(Exception):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000045 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000046
dnj4625b5a2016-11-10 18:23:26 -080047
48def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
49 sleep_time=0.25, printerr=None):
50 """Executes |fn| up to |count| times, backing off exponentially.
51
52 Args:
53 fn (callable): The function to execute. If this raises a handled
54 exception, the function will retry with exponential backoff.
55 excs (tuple): A tuple of Exception types to handle. If one of these is
56 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
57 that is not in this list, it will immediately pass through. If |excs|
58 is empty, the Exception base class will be used.
59 name (str): Optional operation name to print in the retry string.
60 count (int): The number of times to try before allowing the exception to
61 pass through.
62 sleep_time (float): The initial number of seconds to sleep in between
63 retries. This will be doubled each retry.
64 printerr (callable): Function that will be called with the error string upon
65 failures. If None, |logging.warning| will be used.
66
67 Returns: The return value of the successful fn.
68 """
69 printerr = printerr or logging.warning
Edward Lesmes451e8ba2019-10-01 22:15:33 +000070 for i in range(count):
dnj4625b5a2016-11-10 18:23:26 -080071 try:
72 return fn()
73 except excs as e:
74 if (i+1) >= count:
75 raise
76
77 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
78 (name or 'operation'), sleep_time, (i+1), count, e))
79 time.sleep(sleep_time)
80 sleep_time *= 2
81
82
szager@chromium.org848fd492014-04-09 19:06:44 +000083class Mirror(object):
84
85 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
86 gsutil_exe = os.path.join(
hinoka@chromium.orgb091aa52014-12-20 01:47:31 +000087 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52 +000088 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +000089
Robert Iannuccia19649b2018-06-29 16:31:45 +000090 UNSET_CACHEPATH = object()
91
92 # Used for tests
93 _GIT_CONFIG_LOCATION = []
94
szager@chromium.org66c8b852015-09-22 23:19:07 +000095 @staticmethod
96 def parse_fetch_spec(spec):
97 """Parses and canonicalizes a fetch spec.
98
99 Returns (fetchspec, value_regex), where value_regex can be used
100 with 'git config --replace-all'.
101 """
102 parts = spec.split(':', 1)
103 src = parts[0].lstrip('+').rstrip('/')
104 if not src.startswith('refs/'):
105 src = 'refs/heads/%s' % src
106 dest = parts[1].rstrip('/') if len(parts) > 1 else src
107 regex = r'\+%s:.*' % src.replace('*', r'\*')
108 return ('+%s:%s' % (src, dest), regex)
109
szager@chromium.org848fd492014-04-09 19:06:44 +0000110 def __init__(self, url, refs=None, print_func=None):
111 self.url = url
szager@chromium.org66c8b852015-09-22 23:19:07 +0000112 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
szager@chromium.org848fd492014-04-09 19:06:44 +0000113 self.basedir = self.UrlToCacheDir(url)
114 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000115 if print_func:
116 self.print = self.print_without_file
117 self.print_func = print_func
118 else:
119 self.print = print
120
dnj4625b5a2016-11-10 18:23:26 -0800121 def print_without_file(self, message, **_kwargs):
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000122 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000123
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800124 @contextlib.contextmanager
125 def print_duration_of(self, what):
126 start = time.time()
127 try:
128 yield
129 finally:
130 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
131
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000132 @property
133 def bootstrap_bucket(self):
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000134 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
135 if b:
136 return b
Ryan Tseng3beabd02017-03-15 13:57:58 -0700137 u = urlparse.urlparse(self.url)
138 if u.netloc == 'chromium.googlesource.com':
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000139 return 'chromium-git-cache'
Andrii Shyshkalov4b79c382019-04-15 23:48:35 +0000140 # TODO(tandrii): delete once LUCI migration is completed.
141 # Only public hosts will be supported going forward.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700142 elif u.netloc == 'chrome-internal.googlesource.com':
143 return 'chrome-git-cache'
144 # Not recognized.
145 return None
hinoka@chromium.orgf8fa23d2014-06-05 01:00:04 +0000146
Karen Qiandcad7492019-04-26 03:11:16 +0000147 @property
148 def _gs_path(self):
149 return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
150
szager@chromium.org174766f2014-05-13 21:27:46 +0000151 @classmethod
152 def FromPath(cls, path):
153 return cls(cls.CacheDirToUrl(path))
154
szager@chromium.org848fd492014-04-09 19:06:44 +0000155 @staticmethod
156 def UrlToCacheDir(url):
157 """Convert a git url to a normalized form for the cache dir path."""
Edward Lemure9024d02019-11-19 18:47:46 +0000158 if os.path.isdir(url):
159 # Ignore the drive letter in Windows
160 url = os.path.splitdrive(url)[1]
161 return url.replace('-', '--').replace(os.sep, '-')
162
szager@chromium.org848fd492014-04-09 19:06:44 +0000163 parsed = urlparse.urlparse(url)
Edward Lemure9024d02019-11-19 18:47:46 +0000164 norm_url = parsed.netloc + parsed.path
szager@chromium.org848fd492014-04-09 19:06:44 +0000165 if norm_url.endswith('.git'):
166 norm_url = norm_url[:-len('.git')]
Dirk Prankedb589542019-04-12 21:07:01 +0000167
168 # Use the same dir for authenticated URLs and unauthenticated URLs.
169 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
170
szager@chromium.org848fd492014-04-09 19:06:44 +0000171 return norm_url.replace('-', '--').replace('/', '-').lower()
172
173 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000174 def CacheDirToUrl(path):
175 """Convert a cache dir path to its corresponding url."""
176 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
177 return 'https://%s' % netpath
178
szager@chromium.org848fd492014-04-09 19:06:44 +0000179 @classmethod
180 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000181 with cls.cachepath_lock:
182 setattr(cls, 'cachepath', cachepath)
szager@chromium.org848fd492014-04-09 19:06:44 +0000183
184 @classmethod
185 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52 +0000186 with cls.cachepath_lock:
187 if not hasattr(cls, 'cachepath'):
188 try:
189 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45 +0000190 [cls.git_exe, 'config'] +
191 cls._GIT_CONFIG_LOCATION +
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000192 ['cache.cachepath']).decode('utf-8', 'ignore').strip()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000193 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45 +0000194 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52 +0000195 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45 +0000196
197 ret = getattr(cls, 'cachepath')
198 if ret is cls.UNSET_CACHEPATH:
199 raise RuntimeError('No cache.cachepath git configuration or '
200 '$GIT_CACHE_PATH is set.')
201 return ret
szager@chromium.org848fd492014-04-09 19:06:44 +0000202
Karen Qianccd2b4d2019-05-03 22:25:59 +0000203 @staticmethod
204 def _GetMostRecentCacheDirectory(ls_out_set):
205 ready_file_pattern = re.compile(r'.*/(\d+).ready$')
206 ready_dirs = []
207
208 for name in ls_out_set:
209 m = ready_file_pattern.match(name)
210 # Given <path>/<number>.ready,
211 # we are interested in <path>/<number> directory
212 if m and (name[:-len('.ready')] + '/') in ls_out_set:
213 ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
214
215 if not ready_dirs:
216 return None
217
218 return max(ready_dirs)[1]
219
dnj4625b5a2016-11-10 18:23:26 -0800220 def Rename(self, src, dst):
221 # This is somehow racy on Windows.
222 # Catching OSError because WindowsError isn't portable and
223 # pylint complains.
224 exponential_backoff_retry(
225 lambda: os.rename(src, dst),
226 excs=(OSError,),
227 name='rename [%s] => [%s]' % (src, dst),
228 printerr=self.print)
229
szager@chromium.org848fd492014-04-09 19:06:44 +0000230 def RunGit(self, cmd, **kwargs):
231 """Run git in a subprocess."""
232 cwd = kwargs.setdefault('cwd', self.mirror_path)
233 kwargs.setdefault('print_stdout', False)
234 kwargs.setdefault('filter_fn', self.print)
235 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
236 env.setdefault('GIT_ASKPASS', 'true')
237 env.setdefault('SSH_ASKPASS', 'true')
238 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
239 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
240
Edward Lemur579c9862018-07-13 23:17:51 +0000241 def config(self, cwd=None, reset_fetch_config=False):
szager@chromium.org848fd492014-04-09 19:06:44 +0000242 if cwd is None:
243 cwd = self.mirror_path
szager@chromium.org301a7c32014-06-16 17:13:50 +0000244
Edward Lemur579c9862018-07-13 23:17:51 +0000245 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21 +0000246 try:
247 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
248 except subprocess.CalledProcessError as e:
249 # If exit code was 5, it means we attempted to unset a config that
250 # didn't exist. Ignore it.
251 if e.returncode != 5:
252 raise
Edward Lemur579c9862018-07-13 23:17:51 +0000253
szager@chromium.org301a7c32014-06-16 17:13:50 +0000254 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 14:26:17 -0700255 try:
256 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
257 except subprocess.CalledProcessError:
258 # Hard error, need to clobber.
259 raise ClobberNeeded()
szager@chromium.org301a7c32014-06-16 17:13:50 +0000260
261 # Don't combine pack files into one big pack file. It's really slow for
262 # repositories, and there's no way to track progress and make sure it's
263 # not stuck.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700264 if self.supported_project():
265 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000266
267 # Allocate more RAM for cache-ing delta chains, for better performance
268 # of "Resolving deltas".
szager@chromium.org848fd492014-04-09 19:06:44 +0000269 self.RunGit(['config', 'core.deltaBaseCacheLimit',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000270 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
szager@chromium.org301a7c32014-06-16 17:13:50 +0000271
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000272 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000273 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000274 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000275 for spec, value_regex in self.fetch_specs:
szager@chromium.org965c44f2014-08-19 21:19:19 +0000276 self.RunGit(
szager@chromium.org66c8b852015-09-22 23:19:07 +0000277 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
hinoka@chromium.org8e095af2015-06-10 19:19:07 +0000278 cwd=cwd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000279
280 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800281 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000282
283 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
284 """
Ryan Tseng3beabd02017-03-15 13:57:58 -0700285 if not self.bootstrap_bucket:
286 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000287
hinoka@chromium.org199bc5f2014-12-17 02:17:14 +0000288 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000289
Karen Qian0cbd5a52019-04-29 20:14:50 +0000290 # Get the most recent version of the directory.
291 # This is determined from the most recent version of a .ready file.
292 # The .ready file is only uploaded when an entire directory has been
293 # uploaded to GS.
294 _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
Karen Qianccd2b4d2019-05-03 22:25:59 +0000295 ls_out_set = set(ls_out.strip().splitlines())
296 latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000297
Karen Qianccd2b4d2019-05-03 22:25:59 +0000298 if not latest_dir:
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800299 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
300 (self.mirror_path, self.bootstrap_bucket,
Karen Qian0cbd5a52019-04-29 20:14:50 +0000301 ' '.join((ls_err or '').splitlines(True))))
szager@chromium.org848fd492014-04-09 19:06:44 +0000302 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000303
szager@chromium.org848fd492014-04-09 19:06:44 +0000304 try:
Karen Qian0cbd5a52019-04-29 20:14:50 +0000305 # create new temporary directory locally
szager@chromium.org1cbf1042014-06-17 18:26:24 +0000306 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
Karen Qian0cbd5a52019-04-29 20:14:50 +0000307 self.RunGit(['init', '--bare'], cwd=tempdir)
308 self.print('Downloading files in %s/* into %s.' %
309 (latest_dir, tempdir))
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800310 with self.print_duration_of('download'):
Karen Qian0cbd5a52019-04-29 20:14:50 +0000311 code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
312 tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000313 if code:
szager@chromium.org848fd492014-04-09 19:06:44 +0000314 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000315 except Exception as e:
316 self.print('Encountered error: %s' % str(e), file=sys.stderr)
317 gclient_utils.rmtree(tempdir)
szager@chromium.org848fd492014-04-09 19:06:44 +0000318 return False
Karen Qian0cbd5a52019-04-29 20:14:50 +0000319 # delete the old directory
320 if os.path.exists(directory):
321 gclient_utils.rmtree(directory)
322 self.Rename(tempdir, directory)
szager@chromium.org848fd492014-04-09 19:06:44 +0000323 return True
324
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800325 def contains_revision(self, revision):
326 if not self.exists():
327 return False
328
329 if sys.platform.startswith('win'):
330 # Windows .bat scripts use ^ as escape sequence, which means we have to
331 # escape it with itself for every .bat invocation.
332 needle = '%s^^^^{commit}' % revision
333 else:
334 needle = '%s^{commit}' % revision
335 try:
336 # cat-file exits with 0 on success, that is git object of given hash was
337 # found.
338 self.RunGit(['cat-file', '-e', needle])
339 return True
340 except subprocess.CalledProcessError:
341 return False
342
szager@chromium.org848fd492014-04-09 19:06:44 +0000343 def exists(self):
344 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
345
Ryan Tseng3beabd02017-03-15 13:57:58 -0700346 def supported_project(self):
347 """Returns true if this repo is known to have a bootstrap zip file."""
348 u = urlparse.urlparse(self.url)
349 return u.netloc in [
350 'chromium.googlesource.com',
351 'chrome-internal.googlesource.com']
352
szager@chromium.org66c8b852015-09-22 23:19:07 +0000353 def _preserve_fetchspec(self):
354 """Read and preserve remote.origin.fetch from an existing mirror.
355
356 This modifies self.fetch_specs.
357 """
358 if not self.exists():
359 return
360 try:
361 config_fetchspecs = subprocess.check_output(
362 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000363 cwd=self.mirror_path).decode('utf-8', 'ignore')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000364 for fetchspec in config_fetchspecs.splitlines():
365 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
366 except subprocess.CalledProcessError:
367 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
368 'existing cache directory. You may need to manually edit '
369 '%s and "git cache fetch" again.'
370 % os.path.join(self.mirror_path, 'config'))
371
Edward Lesmes34f71ab2020-03-25 21:24:00 +0000372 def _ensure_bootstrapped(
373 self, depth, bootstrap, reset_fetch_config, force=False):
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000374 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
375 pack_files = []
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000376 if os.path.isdir(pack_dir):
377 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Edward Lesmes34f71ab2020-03-25 21:24:00 +0000378 self.print('%s has %d .pack files, re-bootstrapping if >%d or ==0' %
Karen Qian0cbd5a52019-04-29 20:14:50 +0000379 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000380
381 should_bootstrap = (force or
szager@chromium.org66c8b852015-09-22 23:19:07 +0000382 not self.exists() or
Edward Lesmes34f71ab2020-03-25 21:24:00 +0000383 len(pack_files) > GC_AUTOPACKLIMIT or
384 len(pack_files) == 0)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000385
386 if not should_bootstrap:
387 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
388 logging.warn(
389 'Shallow fetch requested, but repo cache already exists.')
390 return
391
Edward Lesmes34f71ab2020-03-25 21:24:00 +0000392 if not self.exists():
John Budorick47ec0692019-05-01 15:04:28 +0000393 if os.path.exists(self.mirror_path):
394 # If the mirror path exists but self.exists() returns false, we're
395 # in an unexpected state. Nuke the previous mirror directory and
396 # start fresh.
397 gclient_utils.rmtree(self.mirror_path)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000398 os.mkdir(self.mirror_path)
Edward Lesmes34f71ab2020-03-25 21:24:00 +0000399 elif not reset_fetch_config:
400 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
401 self._preserve_fetchspec()
Karen Qian0cbd5a52019-04-29 20:14:50 +0000402
403 bootstrapped = (not depth and bootstrap and
404 self.bootstrap_repo(self.mirror_path))
405
406 if not bootstrapped:
407 if not self.exists() or not self.supported_project():
408 # Bootstrap failed due to:
409 # 1. No previous cache.
410 # 2. Project doesn't have a bootstrap folder.
Ryan Tseng3beabd02017-03-15 13:57:58 -0700411 # Start with a bare git dir.
Karen Qian0cbd5a52019-04-29 20:14:50 +0000412 self.RunGit(['init', '--bare'], cwd=self.mirror_path)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000413 else:
414 # Bootstrap failed, previous cache exists; warn and continue.
415 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800416 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
417 'but failed. Continuing with non-optimized repository.'
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000418 % len(pack_files))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000419
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000420 def _fetch(self,
421 rundir,
422 verbose,
423 depth,
424 no_fetch_tags,
425 reset_fetch_config,
426 prune=True):
Edward Lemur579c9862018-07-13 23:17:51 +0000427 self.config(rundir, reset_fetch_config)
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000428
429 fetch_cmd = ['fetch']
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000430 if verbose:
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000431 fetch_cmd.extend(['-v', '--progress'])
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000432 if depth:
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000433 fetch_cmd.extend(['--depth', str(depth)])
danakjc41f72c2019-11-05 17:12:01 +0000434 if no_fetch_tags:
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000435 fetch_cmd.append('--no-tags')
436 if prune:
437 fetch_cmd.append('--prune')
438 fetch_cmd.append('origin')
439
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000440 fetch_specs = subprocess.check_output(
441 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000442 cwd=rundir).decode('utf-8', 'ignore').strip().splitlines()
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000443 for spec in fetch_specs:
444 try:
445 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -0800446 with self.print_duration_of('fetch %s' % spec):
John Budorick3da78c42019-11-14 20:06:30 +0000447 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000448 except subprocess.CalledProcessError:
449 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 14:26:17 -0700450 raise ClobberNeeded() # Corrupted cache.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000451 logging.warn('Fetch of %s failed' % spec)
452
danakjc41f72c2019-11-05 17:12:01 +0000453 def populate(self,
454 depth=None,
455 no_fetch_tags=False,
456 shallow=False,
457 bootstrap=False,
458 verbose=False,
danakjc41f72c2019-11-05 17:12:01 +0000459 lock_timeout=0,
Edward Lemur579c9862018-07-13 23:17:51 +0000460 reset_fetch_config=False):
szager@chromium.orgb0a13a22014-06-18 00:52:25 +0000461 assert self.GetCachePath()
szager@chromium.org848fd492014-04-09 19:06:44 +0000462 if shallow and not depth:
463 depth = 10000
464 gclient_utils.safe_makedirs(self.GetCachePath())
465
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000466 with lockfile.lock(self.mirror_path, lock_timeout):
467 try:
468 self._ensure_bootstrapped(depth, bootstrap, reset_fetch_config)
469 self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
470 reset_fetch_config)
471 except ClobberNeeded:
472 # This is a major failure, we need to clean and force a bootstrap.
473 gclient_utils.rmtree(self.mirror_path)
474 self.print(GIT_CACHE_CORRUPT_MESSAGE)
475 self._ensure_bootstrapped(depth,
476 bootstrap,
477 reset_fetch_config,
478 force=True)
479 self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
480 reset_fetch_config)
szager@chromium.org848fd492014-04-09 19:06:44 +0000481
Andrii Shyshkalovdcfe55f2019-09-21 03:35:39 +0000482 def update_bootstrap(self, prune=False, gc_aggressive=False):
Karen Qiandcad7492019-04-26 03:11:16 +0000483 # The folder is <git number>
szager@chromium.org848fd492014-04-09 19:06:44 +0000484 gen_number = subprocess.check_output(
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000485 [self.git_exe, 'number', 'master'],
486 cwd=self.mirror_path).decode('utf-8', 'ignore').strip()
Karen Qiandcad7492019-04-26 03:11:16 +0000487 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
488
489 src_name = self.mirror_path
Karen Qianccd2b4d2019-05-03 22:25:59 +0000490 dest_prefix = '%s/%s' % (self._gs_path, gen_number)
Karen Qiandcad7492019-04-26 03:11:16 +0000491
Karen Qianccd2b4d2019-05-03 22:25:59 +0000492 # ls_out lists contents in the format: gs://blah/blah/123...
493 _, ls_out, _ = gsutil.check_call('ls', self._gs_path)
Karen Qiandcad7492019-04-26 03:11:16 +0000494
Karen Qianccd2b4d2019-05-03 22:25:59 +0000495 # Check to see if folder already exists in gs
496 ls_out_set = set(ls_out.strip().splitlines())
497 if (dest_prefix + '/' in ls_out_set and
498 dest_prefix + '.ready' in ls_out_set):
499 print('Cache %s already exists.' % dest_prefix)
Karen Qiandcad7492019-04-26 03:11:16 +0000500 return
501
Andrii Shyshkalov199182f2019-04-26 16:01:20 +0000502 # Run Garbage Collect to compress packfile.
Andrii Shyshkalovdcfe55f2019-09-21 03:35:39 +0000503 gc_args = ['gc', '--prune=all']
504 if gc_aggressive:
505 gc_args.append('--aggressive')
506 self.RunGit(gc_args)
Andrii Shyshkalov199182f2019-04-26 16:01:20 +0000507
Karen Qianccd2b4d2019-05-03 22:25:59 +0000508 gsutil.call('-m', 'cp', '-r', src_name, dest_prefix)
Karen Qiandcad7492019-04-26 03:11:16 +0000509
Karen Qianccd2b4d2019-05-03 22:25:59 +0000510 # Create .ready file and upload
Karen Qiandcad7492019-04-26 03:11:16 +0000511 _, ready_file_name = tempfile.mkstemp(suffix='.ready')
512 try:
Karen Qianccd2b4d2019-05-03 22:25:59 +0000513 gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
Karen Qiandcad7492019-04-26 03:11:16 +0000514 finally:
515 os.remove(ready_file_name)
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000516
Karen Qianccd2b4d2019-05-03 22:25:59 +0000517 # remove all other directory/.ready files in the same gs_path
518 # except for the directory/.ready file previously created
519 # which can be used for bootstrapping while the current one is
520 # being uploaded
521 if not prune:
522 return
523 prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
524 if not prev_dest_prefix:
525 return
526 for path in ls_out_set:
527 if (path == prev_dest_prefix + '/' or
528 path == prev_dest_prefix + '.ready'):
529 continue
530 if path.endswith('.ready'):
531 gsutil.call('rm', path)
532 continue
533 gsutil.call('-m', 'rm', '-r', path)
534
535
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000536 @staticmethod
537 def DeleteTmpPackFiles(path):
538 pack_dir = os.path.join(path, 'objects', 'pack')
szager@chromium.org33418492014-06-18 19:03:39 +0000539 if not os.path.isdir(pack_dir):
540 return
szager@chromium.orgcdfcd7c2014-06-10 23:40:46 +0000541 pack_files = [f for f in os.listdir(pack_dir) if
542 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
543 for f in pack_files:
544 f = os.path.join(pack_dir, f)
545 try:
546 os.remove(f)
547 logging.warn('Deleted stale temporary pack file %s' % f)
548 except OSError:
549 logging.warn('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000550
szager@chromium.org848fd492014-04-09 19:06:44 +0000551
agable@chromium.org5a306a22014-02-24 22:13:59 +0000552@subcommand.usage('[url of repo to check for caching]')
553def CMDexists(parser, args):
554 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000555 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000556 if not len(args) == 1:
557 parser.error('git cache exists only takes exactly one repo url.')
558 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000559 mirror = Mirror(url)
560 if mirror.exists():
561 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000562 return 0
563 return 1
564
565
hinoka@google.com563559c2014-04-02 00:36:24 +0000566@subcommand.usage('[url of repo to create a bootstrap zip file]')
567def CMDupdate_bootstrap(parser, args):
568 """Create and uploads a bootstrap tarball."""
569 # Lets just assert we can't do this on Windows.
570 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000571 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000572 return 1
573
Robert Iannucci0081c0f2019-09-29 08:30:54 +0000574 parser.add_option('--skip-populate', action='store_true',
575 help='Skips "populate" step if mirror already exists.')
Andrii Shyshkalovdcfe55f2019-09-21 03:35:39 +0000576 parser.add_option('--gc-aggressive', action='store_true',
577 help='Run aggressive repacking of the repo.')
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000578 parser.add_option('--prune', action='store_true',
Andrii Shyshkalov7a2205c2019-04-26 05:14:36 +0000579 help='Prune all other cached bundles of the same repo.')
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000580
hinoka@google.com563559c2014-04-02 00:36:24 +0000581 populate_args = args[:]
Robert Iannucci0081c0f2019-09-29 08:30:54 +0000582 options, args = parser.parse_args(args)
583 url = args[0]
584 mirror = Mirror(url)
585 if not options.skip_populate or not mirror.exists():
586 CMDpopulate(parser, populate_args)
587 else:
588 print('Skipped populate step.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000589
590 # Get the repo directory.
Andrii Shyshkalovc50b0962019-11-21 23:03:18 +0000591 _, args2 = parser.parse_args(args)
592 url = args2[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000593 mirror = Mirror(url)
Andrii Shyshkalovdcfe55f2019-09-21 03:35:39 +0000594 mirror.update_bootstrap(options.prune, options.gc_aggressive)
szager@chromium.org848fd492014-04-09 19:06:44 +0000595 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000596
597
agable@chromium.org5a306a22014-02-24 22:13:59 +0000598@subcommand.usage('[url of repo to add to or update in cache]')
599def CMDpopulate(parser, args):
600 """Ensure that the cache has all up-to-date objects for the given repo."""
601 parser.add_option('--depth', type='int',
602 help='Only cache DEPTH commits of history')
danakjc41f72c2019-11-05 17:12:01 +0000603 parser.add_option(
604 '--no-fetch-tags',
605 action='store_true',
606 help=('Don\'t fetch tags from the server. This can speed up '
607 'fetch considerably when there are many tags.'))
agable@chromium.org5a306a22014-02-24 22:13:59 +0000608 parser.add_option('--shallow', '-s', action='store_true',
609 help='Only cache 10000 commits of history')
610 parser.add_option('--ref', action='append',
611 help='Specify additional refs to be fetched')
pgervais@chromium.orgb9f27512014-08-08 15:52:33 +0000612 parser.add_option('--no_bootstrap', '--no-bootstrap',
613 action='store_true',
hinoka@google.com563559c2014-04-02 00:36:24 +0000614 help='Don\'t bootstrap from Google Storage')
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000615 parser.add_option('--ignore_locks',
616 '--ignore-locks',
Vadim Shtayura08049e22017-10-11 00:14:52 +0000617 action='store_true',
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000618 help='NOOP. This flag will be removed in the future.')
Robert Iannucci09315982019-10-05 08:12:03 +0000619 parser.add_option('--break-locks',
620 action='store_true',
621 help='Break any existing lock instead of just ignoring it')
Edward Lemur579c9862018-07-13 23:17:51 +0000622 parser.add_option('--reset-fetch-config', action='store_true', default=False,
623 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000624
agable@chromium.org5a306a22014-02-24 22:13:59 +0000625 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000626 if not len(args) == 1:
627 parser.error('git cache populate only takes exactly one repo url.')
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000628 if options.ignore_lock:
629 print('ignore_lock is no longer used. Please remove its usage.')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000630 url = args[0]
631
szager@chromium.org848fd492014-04-09 19:06:44 +0000632 mirror = Mirror(url, refs=options.ref)
633 kwargs = {
danakjc41f72c2019-11-05 17:12:01 +0000634 'no_fetch_tags': options.no_fetch_tags,
szager@chromium.org848fd492014-04-09 19:06:44 +0000635 'verbose': options.verbose,
636 'shallow': options.shallow,
637 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52 +0000638 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51 +0000639 'reset_fetch_config': options.reset_fetch_config,
szager@chromium.org848fd492014-04-09 19:06:44 +0000640 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000641 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000642 kwargs['depth'] = options.depth
643 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000644
645
szager@chromium.orgf3145112014-08-07 21:02:36 +0000646@subcommand.usage('Fetch new commits into cache and current checkout')
647def CMDfetch(parser, args):
648 """Update mirror, and fetch in cwd."""
649 parser.add_option('--all', action='store_true', help='Fetch all remotes')
szager@chromium.org66c8b852015-09-22 23:19:07 +0000650 parser.add_option('--no_bootstrap', '--no-bootstrap',
651 action='store_true',
652 help='Don\'t (re)bootstrap from Google Storage')
danakjc41f72c2019-11-05 17:12:01 +0000653 parser.add_option(
654 '--no-fetch-tags',
655 action='store_true',
656 help=('Don\'t fetch tags from the server. This can speed up '
657 'fetch considerably when there are many tags.'))
szager@chromium.orgf3145112014-08-07 21:02:36 +0000658 options, args = parser.parse_args(args)
659
660 # Figure out which remotes to fetch. This mimics the behavior of regular
661 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
662 # this will NOT try to traverse up the branching structure to find the
663 # ultimate remote to update.
664 remotes = []
665 if options.all:
666 assert not args, 'fatal: fetch --all does not take a repository argument'
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000667 remotes = subprocess.check_output([Mirror.git_exe, 'remote'])
668 remotes = remotes.decode('utf-8', 'ignore').splitlines()
szager@chromium.orgf3145112014-08-07 21:02:36 +0000669 elif args:
670 remotes = args
671 else:
672 current_branch = subprocess.check_output(
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000673 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD'])
674 current_branch = current_branch.decode('utf-8', 'ignore').strip()
szager@chromium.orgf3145112014-08-07 21:02:36 +0000675 if current_branch != 'HEAD':
676 upstream = subprocess.check_output(
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000677 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch])
678 upstream = upstream.decode('utf-8', 'ignore').strip()
szager@chromium.orgf3145112014-08-07 21:02:36 +0000679 if upstream and upstream != '.':
680 remotes = [upstream]
681 if not remotes:
682 remotes = ['origin']
683
684 cachepath = Mirror.GetCachePath()
685 git_dir = os.path.abspath(subprocess.check_output(
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000686 [Mirror.git_exe, 'rev-parse', '--git-dir']).decode('utf-8', 'ignore'))
szager@chromium.orgf3145112014-08-07 21:02:36 +0000687 git_dir = os.path.abspath(git_dir)
688 if git_dir.startswith(cachepath):
689 mirror = Mirror.FromPath(git_dir)
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000690 mirror.populate(
danakjc41f72c2019-11-05 17:12:01 +0000691 bootstrap=not options.no_bootstrap,
692 no_fetch_tags=options.no_fetch_tags,
693 lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000694 return 0
695 for remote in remotes:
696 remote_url = subprocess.check_output(
Edward Lesmes4c3eb702020-03-25 21:09:30 +0000697 [Mirror.git_exe, 'config', 'remote.%s.url' % remote])
698 remote_url = remote_url.decode('utf-8', 'ignore').strip()
szager@chromium.orgf3145112014-08-07 21:02:36 +0000699 if remote_url.startswith(cachepath):
700 mirror = Mirror.FromPath(remote_url)
701 mirror.print = lambda *args: None
702 print('Updating git cache...')
szager@chromium.orgdbb6f822016-02-02 22:59:30 +0000703 mirror.populate(
danakjc41f72c2019-11-05 17:12:01 +0000704 bootstrap=not options.no_bootstrap,
705 no_fetch_tags=options.no_fetch_tags,
706 lock_timeout=options.timeout)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000707 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
708 return 0
709
710
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000711@subcommand.usage('do not use - it is a noop.')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000712def CMDunlock(parser, args):
Josip Sokcevicd3affaa2020-05-20 20:42:50 +0000713 """This command does nothing."""
714 print('This command does nothing and will be removed in the future.')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000715
716
agable@chromium.org5a306a22014-02-24 22:13:59 +0000717class OptionParser(optparse.OptionParser):
718 """Wrapper class for OptionParser to handle global options."""
719
720 def __init__(self, *args, **kwargs):
721 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
722 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45 +0000723 help=(
724 'Path to the directory containing the caches. Normally '
725 'deduced from git config cache.cachepath or '
726 '$GIT_CACHE_PATH.'))
szager@chromium.org2c391af2014-05-23 09:07:15 +0000727 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000728 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000729 self.add_option('-q', '--quiet', action='store_true',
730 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000731 self.add_option('--timeout', type='int', default=0,
732 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000733
734 def parse_args(self, args=None, values=None):
735 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000736 if options.quiet:
737 options.verbose = 0
738
739 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
740 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000741
742 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000743 global_cache_dir = Mirror.GetCachePath()
744 except RuntimeError:
745 global_cache_dir = None
746 if options.cache_dir:
747 if global_cache_dir and (
748 os.path.abspath(options.cache_dir) !=
749 os.path.abspath(global_cache_dir)):
750 logging.warn('Overriding globally-configured cache directory.')
751 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000752
agable@chromium.org5a306a22014-02-24 22:13:59 +0000753 return options, args
754
755
756def main(argv):
757 dispatcher = subcommand.CommandDispatcher(__name__)
758 return dispatcher.execute(OptionParser(), argv)
759
760
761if __name__ == '__main__':
sbc@chromium.org013731e2015-02-26 18:28:43 +0000762 try:
763 sys.exit(main(sys.argv[1:]))
764 except KeyboardInterrupt:
765 sys.stderr.write('interrupted\n')
Edward Lemurdf746d02019-07-27 00:42:46 +0000766 sys.exit(1)