blob: a15b3f939d348428abefba644986ec8187221eae [file] [log] [blame]
Josip Sokcevic4de5dea2022-03-23 21:15:14 +00001#!/usr/bin/env python3
agable@chromium.org5a306a22014-02-24 22:13:59 +00002# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
agable@chromium.org5a306a22014-02-24 22:13:59 +00005"""A git command for managing a local cache of git repositories."""
6
Andrii Shyshkalov4f56f232017-11-23 02:19:25 -08007import contextlib
agable@chromium.org5a306a22014-02-24 22:13:59 +00008import logging
9import optparse
10import os
szager@chromium.org174766f2014-05-13 21:27:46 +000011import re
John Budorick47ec0692019-05-01 15:04:28 +000012import subprocess
13import sys
agable@chromium.org5a306a22014-02-24 22:13:59 +000014import tempfile
szager@chromium.org1132f5f2014-08-23 01:57:59 +000015import threading
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000016import time
Gavin Makcc976552023-08-28 17:01:52 +000017import urllib.parse
Raul Tambreb946b232019-03-26 14:48:46 +000018
hinoka@google.com563559c2014-04-02 00:36:24 +000019from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000020import gclient_utils
Josip Sokcevic14a83ae2020-05-21 01:36:34 +000021import lockfile
Edward Lesmescb047442021-05-06 20:18:49 +000022import metrics
agable@chromium.org5a306a22014-02-24 22:13:59 +000023import subcommand
24
szager@chromium.org301a7c32014-06-16 17:13:50 +000025# Analogous to gc.autopacklimit git config.
26GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 10:44:28 +090027
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +000028GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
29
Josip Sokcevic604f1602021-10-15 15:45:10 +000030# gsutil creates many processes and threads. Creating too many gsutil cp
31# processes may result in running out of resources, and may perform worse due to
32# contextr switching. This limits how many concurrent gsutil cp processes
33# git_cache runs.
34GSUTIL_CP_SEMAPHORE = threading.Semaphore(2)
35
szager@chromium.org848fd492014-04-09 19:06:44 +000036try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +000037 # pylint: disable=undefined-variable
38 WinErr = WindowsError
szager@chromium.org848fd492014-04-09 19:06:44 +000039except NameError:
Mike Frysinger124bb8e2023-09-06 05:48:55 +000040
41 class WinErr(Exception):
42 pass
43
agable@chromium.org5a306a22014-02-24 22:13:59 +000044
hinokadcd84042016-06-09 14:26:17 -070045class ClobberNeeded(Exception):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000046 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000047
dnj4625b5a2016-11-10 18:23:26 -080048
Mike Frysinger124bb8e2023-09-06 05:48:55 +000049def exponential_backoff_retry(fn,
50 excs=(Exception, ),
51 name=None,
52 count=10,
53 sleep_time=0.25,
54 printerr=None):
55 """Executes |fn| up to |count| times, backing off exponentially.
dnj4625b5a2016-11-10 18:23:26 -080056
57 Args:
58 fn (callable): The function to execute. If this raises a handled
59 exception, the function will retry with exponential backoff.
60 excs (tuple): A tuple of Exception types to handle. If one of these is
61 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
62 that is not in this list, it will immediately pass through. If |excs|
63 is empty, the Exception base class will be used.
64 name (str): Optional operation name to print in the retry string.
65 count (int): The number of times to try before allowing the exception to
66 pass through.
67 sleep_time (float): The initial number of seconds to sleep in between
68 retries. This will be doubled each retry.
69 printerr (callable): Function that will be called with the error string upon
70 failures. If None, |logging.warning| will be used.
71
72 Returns: The return value of the successful fn.
73 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000074 printerr = printerr or logging.warning
75 for i in range(count):
76 try:
77 return fn()
78 except excs as e:
79 if (i + 1) >= count:
80 raise
dnj4625b5a2016-11-10 18:23:26 -080081
Mike Frysinger124bb8e2023-09-06 05:48:55 +000082 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' %
83 ((name or 'operation'), sleep_time, (i + 1), count, e))
84 time.sleep(sleep_time)
85 sleep_time *= 2
dnj4625b5a2016-11-10 18:23:26 -080086
87
szager@chromium.org848fd492014-04-09 19:06:44 +000088class Mirror(object):
89
Mike Frysinger124bb8e2023-09-06 05:48:55 +000090 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
91 gsutil_exe = os.path.join(os.path.dirname(os.path.abspath(__file__)),
92 'gsutil.py')
93 cachepath_lock = threading.Lock()
szager@chromium.org848fd492014-04-09 19:06:44 +000094
Mike Frysinger124bb8e2023-09-06 05:48:55 +000095 UNSET_CACHEPATH = object()
Robert Iannuccia19649b2018-06-29 16:31:45 +000096
Mike Frysinger124bb8e2023-09-06 05:48:55 +000097 # Used for tests
98 _GIT_CONFIG_LOCATION = []
Robert Iannuccia19649b2018-06-29 16:31:45 +000099
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000100 @staticmethod
101 def parse_fetch_spec(spec):
102 """Parses and canonicalizes a fetch spec.
szager@chromium.org66c8b852015-09-22 23:19:07 +0000103
104 Returns (fetchspec, value_regex), where value_regex can be used
105 with 'git config --replace-all'.
106 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000107 parts = spec.split(':', 1)
108 src = parts[0].lstrip('+').rstrip('/')
109 if not src.startswith('refs/'):
110 src = 'refs/heads/%s' % src
111 dest = parts[1].rstrip('/') if len(parts) > 1 else src
112 regex = r'\+%s:.*' % src.replace('*', r'\*')
113 return ('+%s:%s' % (src, dest), regex)
szager@chromium.org66c8b852015-09-22 23:19:07 +0000114
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000115 def __init__(self, url, refs=None, commits=None, print_func=None):
116 self.url = url
117 self.fetch_specs = {self.parse_fetch_spec(ref) for ref in (refs or [])}
118 self.fetch_commits = set(commits or [])
119 self.basedir = self.UrlToCacheDir(url)
120 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
121 if print_func:
122 self.print = self.print_without_file
123 self.print_func = print_func
124 else:
125 self.print = print
loislo@chromium.org0fb693f2014-12-25 15:28:22 +0000126
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000127 def print_without_file(self, message, **_kwargs):
128 self.print_func(message)
szager@chromium.org848fd492014-04-09 19:06:44 +0000129
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000130 @contextlib.contextmanager
131 def print_duration_of(self, what):
132 start = time.time()
Vadim Shtayura08049e22017-10-11 00:14:52 +0000133 try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000134 yield
135 finally:
136 self.print('%s took %.1f minutes' % (what,
137 (time.time() - start) / 60.0))
138
139 @property
140 def bootstrap_bucket(self):
141 b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
142 if b:
143 return b
144 u = urllib.parse.urlparse(self.url)
145 if u.netloc == 'chromium.googlesource.com':
146 return 'chromium-git-cache'
147 # Not recognized.
148 return None
149
150 @property
151 def _gs_path(self):
152 return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
153
154 @classmethod
155 def FromPath(cls, path):
156 return cls(cls.CacheDirToUrl(path))
157
158 @staticmethod
159 def UrlToCacheDir(url):
160 """Convert a git url to a normalized form for the cache dir path."""
161 if os.path.isdir(url):
162 # Ignore the drive letter in Windows
163 url = os.path.splitdrive(url)[1]
164 return url.replace('-', '--').replace(os.sep, '-')
165
166 parsed = urllib.parse.urlparse(url)
167 norm_url = parsed.netloc + parsed.path
168 if norm_url.endswith('.git'):
169 norm_url = norm_url[:-len('.git')]
170
171 # Use the same dir for authenticated URLs and unauthenticated URLs.
172 norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
173
174 return norm_url.replace('-', '--').replace('/', '-').lower()
175
176 @staticmethod
177 def CacheDirToUrl(path):
178 """Convert a cache dir path to its corresponding url."""
179 netpath = re.sub(r'\b-\b', '/',
180 os.path.basename(path)).replace('--', '-')
181 return 'https://%s' % netpath
182
183 @classmethod
184 def SetCachePath(cls, cachepath):
185 with cls.cachepath_lock:
186 setattr(cls, 'cachepath', cachepath)
187
188 @classmethod
189 def GetCachePath(cls):
190 with cls.cachepath_lock:
191 if not hasattr(cls, 'cachepath'):
192 try:
193 cachepath = subprocess.check_output(
194 [cls.git_exe, 'config'] + cls._GIT_CONFIG_LOCATION +
195 ['cache.cachepath']).decode('utf-8', 'ignore').strip()
196 except subprocess.CalledProcessError:
197 cachepath = os.environ.get('GIT_CACHE_PATH',
198 cls.UNSET_CACHEPATH)
199 setattr(cls, 'cachepath', cachepath)
200
201 ret = getattr(cls, 'cachepath')
202 if ret is cls.UNSET_CACHEPATH:
203 raise RuntimeError('No cache.cachepath git configuration or '
204 '$GIT_CACHE_PATH is set.')
205 return ret
206
207 @staticmethod
208 def _GetMostRecentCacheDirectory(ls_out_set):
209 ready_file_pattern = re.compile(r'.*/(\d+).ready$')
210 ready_dirs = []
211
212 for name in ls_out_set:
213 m = ready_file_pattern.match(name)
214 # Given <path>/<number>.ready,
215 # we are interested in <path>/<number> directory
216 if m and (name[:-len('.ready')] + '/') in ls_out_set:
217 ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
218
219 if not ready_dirs:
220 return None
221
222 return max(ready_dirs)[1]
223
224 def Rename(self, src, dst):
225 # This is somehow racy on Windows.
226 # Catching OSError because WindowsError isn't portable and
227 # pylint complains.
228 exponential_backoff_retry(lambda: os.rename(src, dst),
229 excs=(OSError, ),
230 name='rename [%s] => [%s]' % (src, dst),
231 printerr=self.print)
232
233 def RunGit(self, cmd, print_stdout=True, **kwargs):
234 """Run git in a subprocess."""
235 cwd = kwargs.setdefault('cwd', self.mirror_path)
236 if "--git-dir" not in cmd:
237 cmd = ['--git-dir', os.path.abspath(cwd)] + cmd
238
239 kwargs.setdefault('print_stdout', False)
240 if print_stdout:
241 kwargs.setdefault('filter_fn', self.print)
242 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
243 env.setdefault('GIT_ASKPASS', 'true')
244 env.setdefault('SSH_ASKPASS', 'true')
245 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
246 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
247
248 def config(self, reset_fetch_config=False):
249 if reset_fetch_config:
250 try:
251 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'])
252 except subprocess.CalledProcessError as e:
253 # If exit code was 5, it means we attempted to unset a config
254 # that didn't exist. Ignore it.
255 if e.returncode != 5:
256 raise
257
258 # Don't run git-gc in a daemon. Bad things can happen if it gets
259 # killed.
260 try:
261 self.RunGit(['config', 'gc.autodetach', '0'])
Vadim Shtayura08049e22017-10-11 00:14:52 +0000262 except subprocess.CalledProcessError:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000263 # Hard error, need to clobber.
264 raise ClobberNeeded()
Robert Iannuccia19649b2018-06-29 16:31:45 +0000265
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000266 # Don't combine pack files into one big pack file. It's really slow for
267 # repositories, and there's no way to track progress and make sure it's
268 # not stuck.
269 if self.supported_project():
270 self.RunGit(['config', 'gc.autopacklimit', '0'])
szager@chromium.org848fd492014-04-09 19:06:44 +0000271
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000272 # Allocate more RAM for cache-ing delta chains, for better performance
273 # of "Resolving deltas".
274 self.RunGit([
275 'config', 'core.deltaBaseCacheLimit',
276 gclient_utils.DefaultDeltaBaseCacheLimit()
277 ])
Karen Qianccd2b4d2019-05-03 22:25:59 +0000278
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000279 self.RunGit(['config', 'remote.origin.url', self.url])
280 self.RunGit([
281 'config', '--replace-all', 'remote.origin.fetch',
282 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'
283 ])
284 for spec, value_regex in self.fetch_specs:
285 self.RunGit([
286 'config', '--replace-all', 'remote.origin.fetch', spec,
287 value_regex
288 ])
Karen Qianccd2b4d2019-05-03 22:25:59 +0000289
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000290 def bootstrap_repo(self, directory):
291 """Bootstrap the repo from Google Storage if possible.
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000292
293 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
294 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000295 if not self.bootstrap_bucket:
296 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000297
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000298 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000299
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000300 # Get the most recent version of the directory.
301 # This is determined from the most recent version of a .ready file.
302 # The .ready file is only uploaded when an entire directory has been
303 # uploaded to GS.
304 _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
305 ls_out_set = set(ls_out.strip().splitlines())
306 latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
Yuwei Huanga1fbdff2019-02-01 21:51:15 +0000307
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000308 if not latest_dir:
309 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
310 (self.mirror_path, self.bootstrap_bucket, ' '.join(
311 (ls_err or '').splitlines(True))))
312 return False
szager@chromium.org848fd492014-04-09 19:06:44 +0000313
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000314 try:
315 # create new temporary directory locally
316 tempdir = tempfile.mkdtemp(prefix='_cache_tmp',
317 dir=self.GetCachePath())
318 self.RunGit(['init', '-b', 'main', '--bare'], cwd=tempdir)
319 self.print('Downloading files in %s/* into %s.' %
320 (latest_dir, tempdir))
321 with self.print_duration_of('download'):
322 with GSUTIL_CP_SEMAPHORE:
323 code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
324 tempdir)
325 if code:
326 return False
327 # A quick validation that all references are valid.
328 self.RunGit(['for-each-ref'], print_stdout=False, cwd=tempdir)
329 except Exception as e:
330 self.print('Encountered error: %s' % str(e), file=sys.stderr)
331 gclient_utils.rmtree(tempdir)
332 return False
333 # delete the old directory
334 if os.path.exists(directory):
335 gclient_utils.rmtree(directory)
336 self.Rename(tempdir, directory)
337 return True
szager@chromium.org848fd492014-04-09 19:06:44 +0000338
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000339 def contains_revision(self, revision):
340 if not self.exists():
341 return False
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800342
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000343 if sys.platform.startswith('win'):
344 # Windows .bat scripts use ^ as escape sequence, which means we have
345 # to escape it with itself for every .bat invocation.
346 needle = '%s^^^^{commit}' % revision
347 else:
348 needle = '%s^{commit}' % revision
349 try:
350 # cat-file exits with 0 on success, that is git object of given hash
351 # was found.
352 self.RunGit(['cat-file', '-e', needle])
353 return True
354 except subprocess.CalledProcessError:
355 self.print('Commit with hash "%s" not found' % revision,
356 file=sys.stderr)
357 return False
Andrii Shyshkalov46a672b2017-11-24 18:04:43 -0800358
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000359 def exists(self):
360 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
szager@chromium.org848fd492014-04-09 19:06:44 +0000361
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000362 def supported_project(self):
363 """Returns true if this repo is known to have a bootstrap zip file."""
364 u = urllib.parse.urlparse(self.url)
365 return u.netloc in [
366 'chromium.googlesource.com', 'chrome-internal.googlesource.com'
367 ]
Ryan Tseng3beabd02017-03-15 13:57:58 -0700368
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000369 def _preserve_fetchspec(self):
370 """Read and preserve remote.origin.fetch from an existing mirror.
szager@chromium.org66c8b852015-09-22 23:19:07 +0000371
372 This modifies self.fetch_specs.
373 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000374 if not self.exists():
375 return
376 try:
377 config_fetchspecs = subprocess.check_output(
378 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
379 cwd=self.mirror_path).decode('utf-8', 'ignore')
380 for fetchspec in config_fetchspecs.splitlines():
381 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
382 except subprocess.CalledProcessError:
383 logging.warning(
384 'Tried and failed to preserve remote.origin.fetch from the '
385 'existing cache directory. You may need to manually edit '
386 '%s and "git cache fetch" again.' %
387 os.path.join(self.mirror_path, 'config'))
szager@chromium.org66c8b852015-09-22 23:19:07 +0000388
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000389 def _ensure_bootstrapped(self,
390 depth,
391 bootstrap,
392 reset_fetch_config,
393 force=False):
394 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
395 pack_files = []
396 if os.path.isdir(pack_dir):
397 pack_files = [
398 f for f in os.listdir(pack_dir) if f.endswith('.pack')
399 ]
400 self.print('%s has %d .pack files, re-bootstrapping if >%d or ==0' %
401 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000402
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000403 # master->main branch migration left the cache in some builders to have
404 # its HEAD still pointing to refs/heads/master. This causes bot_update
405 # to fail. If in this state, delete the cache and force bootstrap.
406 try:
407 with open(os.path.join(self.mirror_path, 'HEAD')) as f:
408 head_ref = f.read()
409 except FileNotFoundError:
410 head_ref = ''
Aravind Vasudevan6eccb0e2023-03-06 17:28:15 +0000411
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000412 # Check only when HEAD points to master.
413 if 'master' in head_ref:
414 # Some repos could still have master so verify if the ref exists
415 # first.
416 show_ref_master_cmd = subprocess.run(
417 [Mirror.git_exe, 'show-ref', '--verify', 'refs/heads/master'],
418 cwd=self.mirror_path)
Aravind Vasudevan6eccb0e2023-03-06 17:28:15 +0000419
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000420 if show_ref_master_cmd.returncode != 0:
421 # Remove mirror
422 gclient_utils.rmtree(self.mirror_path)
Aravind Vasudevan6eccb0e2023-03-06 17:28:15 +0000423
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000424 # force bootstrap
425 force = True
Aravind Vasudevan6eccb0e2023-03-06 17:28:15 +0000426
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000427 should_bootstrap = (force or not self.exists()
428 or len(pack_files) > GC_AUTOPACKLIMIT
429 or len(pack_files) == 0)
Karen Qian0cbd5a52019-04-29 20:14:50 +0000430
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000431 if not should_bootstrap:
432 if depth and os.path.exists(
433 os.path.join(self.mirror_path, 'shallow')):
434 logging.warning(
435 'Shallow fetch requested, but repo cache already exists.')
436 return
Karen Qian0cbd5a52019-04-29 20:14:50 +0000437
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000438 if not self.exists():
439 if os.path.exists(self.mirror_path):
440 # If the mirror path exists but self.exists() returns false,
441 # we're in an unexpected state. Nuke the previous mirror
442 # directory and start fresh.
443 gclient_utils.rmtree(self.mirror_path)
444 os.mkdir(self.mirror_path)
445 elif not reset_fetch_config:
446 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
447 self._preserve_fetchspec()
Karen Qian0cbd5a52019-04-29 20:14:50 +0000448
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000449 bootstrapped = (not depth and bootstrap
450 and self.bootstrap_repo(self.mirror_path))
Karen Qian0cbd5a52019-04-29 20:14:50 +0000451
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000452 if not bootstrapped:
453 if not self.exists() or not self.supported_project():
454 # Bootstrap failed due to:
455 # 1. No previous cache.
456 # 2. Project doesn't have a bootstrap folder.
457 # Start with a bare git dir.
458 self.RunGit(['init', '--bare'])
459 # Set appropriate symbolic-ref
460 remote_info = exponential_backoff_retry(
461 lambda: subprocess.check_output(
462 [
463 self.git_exe, '--git-dir',
464 os.path.abspath(self.mirror_path), 'remote', 'show',
465 self.url
466 ],
467 cwd=self.mirror_path).decode('utf-8', 'ignore').strip())
468 default_branch_regexp = re.compile(r'HEAD branch: (.*)$')
469 m = default_branch_regexp.search(remote_info, re.MULTILINE)
470 if m:
471 self.RunGit(
472 ['symbolic-ref', 'HEAD', 'refs/heads/' + m.groups()[0]])
473 else:
474 # Bootstrap failed, previous cache exists; warn and continue.
475 logging.warning(
476 'Git cache has a lot of pack files (%d). Tried to '
477 're-bootstrap but failed. Continuing with non-optimized '
478 'repository.' % len(pack_files))
479
480 def _fetch(self,
481 verbose,
482 depth,
483 no_fetch_tags,
484 reset_fetch_config,
485 prune=True):
486 self.config(reset_fetch_config)
487
488 fetch_cmd = ['fetch']
489 if verbose:
490 fetch_cmd.extend(['-v', '--progress'])
491 if depth:
492 fetch_cmd.extend(['--depth', str(depth)])
493 if no_fetch_tags:
494 fetch_cmd.append('--no-tags')
495 if prune:
496 fetch_cmd.append('--prune')
497 fetch_cmd.append('origin')
498
499 fetch_specs = subprocess.check_output(
Joanna Wangea99f9a2023-08-17 02:20:43 +0000500 [
501 self.git_exe, '--git-dir',
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000502 os.path.abspath(self.mirror_path), 'config', '--get-all',
503 'remote.origin.fetch'
Joanna Wangea99f9a2023-08-17 02:20:43 +0000504 ],
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000505 cwd=self.mirror_path).decode('utf-8',
506 'ignore').strip().splitlines()
507 for spec in fetch_specs:
508 try:
509 self.print('Fetching %s' % spec)
510 with self.print_duration_of('fetch %s' % spec):
511 self.RunGit(fetch_cmd + [spec], retry=True)
512 except subprocess.CalledProcessError:
513 if spec == '+refs/heads/*:refs/heads/*':
514 raise ClobberNeeded() # Corrupted cache.
515 logging.warning('Fetch of %s failed' % spec)
516 for commit in self.fetch_commits:
517 self.print('Fetching %s' % commit)
518 try:
519 with self.print_duration_of('fetch %s' % commit):
520 self.RunGit(['fetch', 'origin', commit], retry=True)
521 except subprocess.CalledProcessError:
522 logging.warning('Fetch of %s failed' % commit)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000523
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000524 def populate(self,
525 depth=None,
526 no_fetch_tags=False,
527 shallow=False,
528 bootstrap=False,
529 verbose=False,
530 lock_timeout=0,
531 reset_fetch_config=False):
532 assert self.GetCachePath()
533 if shallow and not depth:
534 depth = 10000
535 gclient_utils.safe_makedirs(self.GetCachePath())
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000536
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000537 with lockfile.lock(self.mirror_path, lock_timeout):
538 try:
539 self._ensure_bootstrapped(depth, bootstrap, reset_fetch_config)
540 self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config)
541 except ClobberNeeded:
542 # This is a major failure, we need to clean and force a
543 # bootstrap.
544 gclient_utils.rmtree(self.mirror_path)
545 self.print(GIT_CACHE_CORRUPT_MESSAGE)
546 self._ensure_bootstrapped(depth,
547 bootstrap,
548 reset_fetch_config,
549 force=True)
550 self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config)
Josip Sokcevic6afaa6c2020-05-08 18:20:17 +0000551
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000552 def update_bootstrap(self, prune=False, gc_aggressive=False):
553 # NOTE: There have been cases where repos were being recursively
554 # uploaded to google storage. E.g.
555 # `<host_url>-<repo>/<gen_number>/<host_url>-<repo>/` in GS and
556 # <host_url>-<repo>/<host_url>-<repo>/ on the bot. Check for recursed
557 # files on the bot here and remove them if found before we upload to GS.
558 # See crbug.com/1370443; keep this check until root cause is found.
559 recursed_dir = os.path.join(self.mirror_path,
560 self.mirror_path.split(os.path.sep)[-1])
561 if os.path.exists(recursed_dir):
562 self.print('Deleting unexpected directory: %s' % recursed_dir)
563 gclient_utils.rmtree(recursed_dir)
hinoka@chromium.orgaa1e1a42014-06-26 21:58:51 +0000564
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000565 # The folder is <git number>
566 gen_number = subprocess.check_output([self.git_exe, 'number'],
567 cwd=self.mirror_path).decode(
568 'utf-8', 'ignore').strip()
569 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
szager@chromium.org848fd492014-04-09 19:06:44 +0000570
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000571 dest_prefix = '%s/%s' % (self._gs_path, gen_number)
szager@chromium.org848fd492014-04-09 19:06:44 +0000572
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000573 # ls_out lists contents in the format: gs://blah/blah/123...
574 self.print('running "gsutil ls %s":' % self._gs_path)
575 ls_code, ls_out, ls_error = gsutil.check_call_with_retries(
576 'ls', self._gs_path)
577 if ls_code != 0:
578 self.print(ls_error)
579 else:
580 self.print(ls_out)
Joanna Wang38d16732022-10-10 17:12:47 +0000581
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000582 # Check to see if folder already exists in gs
583 ls_out_set = set(ls_out.strip().splitlines())
584 if (dest_prefix + '/' in ls_out_set
585 and dest_prefix + '.ready' in ls_out_set):
586 print('Cache %s already exists.' % dest_prefix)
587 return
Karen Qiandcad7492019-04-26 03:11:16 +0000588
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000589 # Reduce the number of individual files to download & write on disk.
590 self.RunGit(['pack-refs', '--all'])
Karen Qiandcad7492019-04-26 03:11:16 +0000591
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000592 # Run Garbage Collect to compress packfile.
593 gc_args = ['gc', '--prune=all']
594 if gc_aggressive:
595 # The default "gc --aggressive" is often too aggressive for some
596 # machines, since it attempts to create as many threads as there are
597 # CPU cores, while not limiting per-thread memory usage, which puts
598 # too much pressure on RAM on high-core machines, causing them to
599 # thrash. Using lower-level commands gives more control over those
600 # settings.
Karen Qiandcad7492019-04-26 03:11:16 +0000601
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000602 # This might not be strictly necessary, but it's fast and is
603 # normally run by 'gc --aggressive', so it shouldn't hurt.
604 self.RunGit(['reflog', 'expire', '--all'])
Karen Qiandcad7492019-04-26 03:11:16 +0000605
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000606 # These are the default repack settings for 'gc --aggressive'.
607 gc_args = [
608 'repack', '-d', '-l', '-f', '--depth=50', '--window=250', '-A',
609 '--unpack-unreachable=all'
610 ]
611 # A 1G memory limit seems to provide comparable pack results as the
612 # default, even for our largest repos, while preventing runaway
613 # memory (at least on current Chromium builders which have about 4G
614 # RAM per core).
615 gc_args.append('--window-memory=1g')
616 # NOTE: It might also be possible to avoid thrashing with a larger
617 # window (e.g. "--window-memory=2g") by limiting the number of
618 # threads created (e.g. "--threads=[cores/2]"). Some limited testing
619 # didn't show much difference in outcomes on our current repos, but
620 # it might be worth trying if the repos grow much larger and the
621 # packs don't seem to be getting compressed enough.
622 self.RunGit(gc_args)
Andrii Shyshkalov46b91c02020-10-27 17:25:47 +0000623
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000624 self.print('running "gsutil -m rsync -r -d %s %s"' %
625 (self.mirror_path, dest_prefix))
626 gsutil.call('-m', 'rsync', '-r', '-d', self.mirror_path, dest_prefix)
Michael Moss77480942020-06-22 18:32:37 +0000627
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000628 # Create .ready file and upload
629 _, ready_file_name = tempfile.mkstemp(suffix='.ready')
630 try:
631 self.print('running "gsutil cp %s %s.ready"' %
632 (ready_file_name, dest_prefix))
633 gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
634 finally:
635 os.remove(ready_file_name)
Michael Moss77480942020-06-22 18:32:37 +0000636
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000637 # remove all other directory/.ready files in the same gs_path
638 # except for the directory/.ready file previously created
639 # which can be used for bootstrapping while the current one is
640 # being uploaded
641 if not prune:
642 return
643 prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
644 if not prev_dest_prefix:
645 return
646 for path in ls_out_set:
647 if path in (prev_dest_prefix + '/', prev_dest_prefix + '.ready'):
648 continue
649 if path.endswith('.ready'):
650 gsutil.call('rm', path)
651 continue
652 gsutil.call('-m', 'rm', '-r', path)
Andrii Shyshkalov199182f2019-04-26 16:01:20 +0000653
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000654 @staticmethod
655 def DeleteTmpPackFiles(path):
656 pack_dir = os.path.join(path, 'objects', 'pack')
657 if not os.path.isdir(pack_dir):
658 return
659 pack_files = [
660 f for f in os.listdir(pack_dir)
661 if f.startswith('.tmp-') or f.startswith('tmp_pack_')
662 ]
663 for f in pack_files:
664 f = os.path.join(pack_dir, f)
665 try:
666 os.remove(f)
667 logging.warning('Deleted stale temporary pack file %s' % f)
668 except OSError:
669 logging.warning('Unable to delete temporary pack file %s' % f)
szager@chromium.org174766f2014-05-13 21:27:46 +0000670
szager@chromium.org848fd492014-04-09 19:06:44 +0000671
agable@chromium.org5a306a22014-02-24 22:13:59 +0000672@subcommand.usage('[url of repo to check for caching]')
Edward Lesmescb047442021-05-06 20:18:49 +0000673@metrics.collector.collect_metrics('git cache exists')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000674def CMDexists(parser, args):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000675 """Check to see if there already is a cache of the given repo."""
676 _, args = parser.parse_args(args)
677 if not len(args) == 1:
678 parser.error('git cache exists only takes exactly one repo url.')
679 url = args[0]
680 mirror = Mirror(url)
681 if mirror.exists():
682 print(mirror.mirror_path)
683 return 0
684 return 1
agable@chromium.org5a306a22014-02-24 22:13:59 +0000685
686
hinoka@google.com563559c2014-04-02 00:36:24 +0000687@subcommand.usage('[url of repo to create a bootstrap zip file]')
Edward Lesmescb047442021-05-06 20:18:49 +0000688@metrics.collector.collect_metrics('git cache update-bootstrap')
hinoka@google.com563559c2014-04-02 00:36:24 +0000689def CMDupdate_bootstrap(parser, args):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000690 """Create and uploads a bootstrap tarball."""
691 # Lets just assert we can't do this on Windows.
692 if sys.platform.startswith('win'):
693 print('Sorry, update bootstrap will not work on Windows.',
694 file=sys.stderr)
695 return 1
hinoka@google.com563559c2014-04-02 00:36:24 +0000696
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000697 parser.add_option('--skip-populate',
698 action='store_true',
699 help='Skips "populate" step if mirror already exists.')
700 parser.add_option('--gc-aggressive',
701 action='store_true',
702 help='Run aggressive repacking of the repo.')
703 parser.add_option('--prune',
704 action='store_true',
705 help='Prune all other cached bundles of the same repo.')
hinoka@chromium.orgc8444f32014-06-18 23:18:17 +0000706
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000707 populate_args = args[:]
708 options, args = parser.parse_args(args)
709 url = args[0]
710 mirror = Mirror(url)
711 if not options.skip_populate or not mirror.exists():
712 CMDpopulate(parser, populate_args)
713 else:
714 print('Skipped populate step.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000715
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000716 # Get the repo directory.
717 _, args2 = parser.parse_args(args)
718 url = args2[0]
719 mirror = Mirror(url)
720 mirror.update_bootstrap(options.prune, options.gc_aggressive)
721 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000722
723
agable@chromium.org5a306a22014-02-24 22:13:59 +0000724@subcommand.usage('[url of repo to add to or update in cache]')
Edward Lesmescb047442021-05-06 20:18:49 +0000725@metrics.collector.collect_metrics('git cache populate')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000726def CMDpopulate(parser, args):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000727 """Ensure that the cache has all up-to-date objects for the given repo."""
728 parser.add_option('--depth',
729 type='int',
730 help='Only cache DEPTH commits of history')
731 parser.add_option(
732 '--no-fetch-tags',
733 action='store_true',
734 help=('Don\'t fetch tags from the server. This can speed up '
735 'fetch considerably when there are many tags.'))
736 parser.add_option('--shallow',
737 '-s',
738 action='store_true',
739 help='Only cache 10000 commits of history')
740 parser.add_option('--ref',
741 action='append',
742 help='Specify additional refs to be fetched')
743 parser.add_option('--commit',
744 action='append',
745 help='Specify additional commits to be fetched')
746 parser.add_option('--no_bootstrap',
747 '--no-bootstrap',
748 action='store_true',
749 help='Don\'t bootstrap from Google Storage')
750 parser.add_option('--ignore_locks',
751 '--ignore-locks',
752 action='store_true',
753 help='NOOP. This flag will be removed in the future.')
754 parser.add_option(
755 '--break-locks',
756 action='store_true',
757 help='Break any existing lock instead of just ignoring it')
758 parser.add_option(
759 '--reset-fetch-config',
760 action='store_true',
761 default=False,
762 help='Reset the fetch config before populating the cache.')
hinoka@google.com563559c2014-04-02 00:36:24 +0000763
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000764 options, args = parser.parse_args(args)
765 if not len(args) == 1:
766 parser.error('git cache populate only takes exactly one repo url.')
767 if options.ignore_locks:
768 print('ignore_locks is no longer used. Please remove its usage.')
769 if options.break_locks:
770 print('break_locks is no longer used. Please remove its usage.')
771 url = args[0]
agable@chromium.org5a306a22014-02-24 22:13:59 +0000772
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000773 mirror = Mirror(url, refs=options.ref, commits=options.commit)
774 kwargs = {
775 'no_fetch_tags': options.no_fetch_tags,
776 'verbose': options.verbose,
777 'shallow': options.shallow,
778 'bootstrap': not options.no_bootstrap,
779 'lock_timeout': options.timeout,
780 'reset_fetch_config': options.reset_fetch_config,
781 }
782 if options.depth:
783 kwargs['depth'] = options.depth
784 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000785
786
szager@chromium.orgf3145112014-08-07 21:02:36 +0000787@subcommand.usage('Fetch new commits into cache and current checkout')
Edward Lesmescb047442021-05-06 20:18:49 +0000788@metrics.collector.collect_metrics('git cache fetch')
szager@chromium.orgf3145112014-08-07 21:02:36 +0000789def CMDfetch(parser, args):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000790 """Update mirror, and fetch in cwd."""
791 parser.add_option('--all', action='store_true', help='Fetch all remotes')
792 parser.add_option('--no_bootstrap',
793 '--no-bootstrap',
794 action='store_true',
795 help='Don\'t (re)bootstrap from Google Storage')
796 parser.add_option(
797 '--no-fetch-tags',
798 action='store_true',
799 help=('Don\'t fetch tags from the server. This can speed up '
800 'fetch considerably when there are many tags.'))
801 options, args = parser.parse_args(args)
szager@chromium.orgf3145112014-08-07 21:02:36 +0000802
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000803 # Figure out which remotes to fetch. This mimics the behavior of regular
804 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
805 # this will NOT try to traverse up the branching structure to find the
806 # ultimate remote to update.
807 remotes = []
808 if options.all:
809 assert not args, 'fatal: fetch --all does not take repository argument'
810 remotes = subprocess.check_output([Mirror.git_exe, 'remote'])
811 remotes = remotes.decode('utf-8', 'ignore').splitlines()
812 elif args:
813 remotes = args
814 else:
815 current_branch = subprocess.check_output(
816 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD'])
817 current_branch = current_branch.decode('utf-8', 'ignore').strip()
818 if current_branch != 'HEAD':
819 upstream = subprocess.check_output(
820 [Mirror.git_exe, 'config',
821 'branch.%s.remote' % current_branch])
822 upstream = upstream.decode('utf-8', 'ignore').strip()
823 if upstream and upstream != '.':
824 remotes = [upstream]
825 if not remotes:
826 remotes = ['origin']
szager@chromium.orgf3145112014-08-07 21:02:36 +0000827
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000828 cachepath = Mirror.GetCachePath()
829 git_dir = os.path.abspath(
830 subprocess.check_output([Mirror.git_exe, 'rev-parse',
831 '--git-dir']).decode('utf-8', 'ignore'))
832 git_dir = os.path.abspath(git_dir)
833 if git_dir.startswith(cachepath):
834 mirror = Mirror.FromPath(git_dir)
835 mirror.populate(bootstrap=not options.no_bootstrap,
836 no_fetch_tags=options.no_fetch_tags,
837 lock_timeout=options.timeout)
838 return 0
839 for remote in remotes:
840 remote_url = subprocess.check_output(
841 [Mirror.git_exe, 'config',
842 'remote.%s.url' % remote])
843 remote_url = remote_url.decode('utf-8', 'ignore').strip()
844 if remote_url.startswith(cachepath):
845 mirror = Mirror.FromPath(remote_url)
846 mirror.print = lambda *args: None
847 print('Updating git cache...')
848 mirror.populate(bootstrap=not options.no_bootstrap,
849 no_fetch_tags=options.no_fetch_tags,
850 lock_timeout=options.timeout)
851 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
szager@chromium.orgf3145112014-08-07 21:02:36 +0000852 return 0
szager@chromium.orgf3145112014-08-07 21:02:36 +0000853
854
Josip Sokcevic14a83ae2020-05-21 01:36:34 +0000855@subcommand.usage('do not use - it is a noop.')
Edward Lesmescb047442021-05-06 20:18:49 +0000856@metrics.collector.collect_metrics('git cache unlock')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000857def CMDunlock(parser, args):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000858 """This command does nothing."""
859 print('This command does nothing and will be removed in the future.')
Vadim Shtayura08049e22017-10-11 00:14:52 +0000860
861
agable@chromium.org5a306a22014-02-24 22:13:59 +0000862class OptionParser(optparse.OptionParser):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000863 """Wrapper class for OptionParser to handle global options."""
864 def __init__(self, *args, **kwargs):
865 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
866 self.add_option(
867 '-c',
868 '--cache-dir',
869 help=('Path to the directory containing the caches. Normally '
870 'deduced from git config cache.cachepath or '
871 '$GIT_CACHE_PATH.'))
872 self.add_option(
873 '-v',
874 '--verbose',
875 action='count',
876 default=1,
877 help='Increase verbosity (can be passed multiple times)')
878 self.add_option('-q',
879 '--quiet',
880 action='store_true',
881 help='Suppress all extraneous output')
882 self.add_option('--timeout',
883 type='int',
884 default=0,
885 help='Timeout for acquiring cache lock, in seconds')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000886
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000887 def parse_args(self, args=None, values=None):
888 # Create an optparse.Values object that will store only the actual
889 # passed options, without the defaults.
890 actual_options = optparse.Values()
891 _, args = optparse.OptionParser.parse_args(self, args, actual_options)
892 # Create an optparse.Values object with the default options.
893 options = optparse.Values(self.get_default_values().__dict__)
894 # Update it with the options passed by the user.
895 options._update_careful(actual_options.__dict__)
896 # Store the options passed by the user in an _actual_options attribute.
897 # We store only the keys, and not the values, since the values can
898 # contain arbitrary information, which might be PII.
899 metrics.collector.add('arguments', list(actual_options.__dict__.keys()))
agable@chromium.org5a306a22014-02-24 22:13:59 +0000900
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000901 if options.quiet:
902 options.verbose = 0
Edward Lesmescb047442021-05-06 20:18:49 +0000903
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000904 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
905 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
szager@chromium.org2c391af2014-05-23 09:07:15 +0000906
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000907 try:
908 global_cache_dir = Mirror.GetCachePath()
909 except RuntimeError:
910 global_cache_dir = None
911 if options.cache_dir:
912 if global_cache_dir and (os.path.abspath(options.cache_dir) !=
913 os.path.abspath(global_cache_dir)):
914 logging.warning(
915 'Overriding globally-configured cache directory.')
916 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000917
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000918 return options, args
agable@chromium.org5a306a22014-02-24 22:13:59 +0000919
920
921def main(argv):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000922 dispatcher = subcommand.CommandDispatcher(__name__)
923 return dispatcher.execute(OptionParser(), argv)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000924
925
926if __name__ == '__main__':
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000927 try:
928 with metrics.collector.print_notice_and_exit():
929 sys.exit(main(sys.argv[1:]))
930 except KeyboardInterrupt:
931 sys.stderr.write('interrupted\n')
932 sys.exit(1)