blob: 957ae9529c83454a78566fd54d031abf20392b66 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
agable@chromium.org5a306a22014-02-24 22:13:59 +00009import errno
10import logging
11import optparse
12import os
szager@chromium.org174766f2014-05-13 21:27:46 +000013import re
agable@chromium.org5a306a22014-02-24 22:13:59 +000014import tempfile
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000015import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000016import subprocess
17import sys
18import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000019import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000020
hinoka@google.com563559c2014-04-02 00:36:24 +000021from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000022import gclient_utils
23import subcommand
24
szager@chromium.org848fd492014-04-09 19:06:44 +000025try:
26 # pylint: disable=E0602
27 WinErr = WindowsError
28except NameError:
29 class WinErr(Exception):
30 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000031
32class LockError(Exception):
33 pass
34
35
36class Lockfile(object):
37 """Class to represent a cross-platform process-specific lockfile."""
38
39 def __init__(self, path):
40 self.path = os.path.abspath(path)
41 self.lockfile = self.path + ".lock"
42 self.pid = os.getpid()
43
44 def _read_pid(self):
45 """Read the pid stored in the lockfile.
46
47 Note: This method is potentially racy. By the time it returns the lockfile
48 may have been unlocked, removed, or stolen by some other process.
49 """
50 try:
51 with open(self.lockfile, 'r') as f:
52 pid = int(f.readline().strip())
53 except (IOError, ValueError):
54 pid = None
55 return pid
56
57 def _make_lockfile(self):
58 """Safely creates a lockfile containing the current pid."""
59 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
60 fd = os.open(self.lockfile, open_flags, 0o644)
61 f = os.fdopen(fd, 'w')
szager@chromium.org848fd492014-04-09 19:06:44 +000062 print(self.pid, file=f)
agable@chromium.org5a306a22014-02-24 22:13:59 +000063 f.close()
64
65 def _remove_lockfile(self):
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000066 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
67
68 See gclient_utils.py:rmtree docstring for more explanation on the
69 windows case.
70 """
71 if sys.platform == 'win32':
72 lockfile = os.path.normcase(self.lockfile)
73 for _ in xrange(3):
74 exitcode = subprocess.call(['cmd.exe', '/c',
75 'del', '/f', '/q', lockfile])
76 if exitcode == 0:
77 return
78 time.sleep(3)
79 raise LockError('Failed to remove lock: %s' % lockfile)
80 else:
81 os.remove(self.lockfile)
agable@chromium.org5a306a22014-02-24 22:13:59 +000082
83 def lock(self):
84 """Acquire the lock.
85
86 Note: This is a NON-BLOCKING FAIL-FAST operation.
87 Do. Or do not. There is no try.
88 """
89 try:
90 self._make_lockfile()
91 except OSError as e:
92 if e.errno == errno.EEXIST:
93 raise LockError("%s is already locked" % self.path)
94 else:
95 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
96
97 def unlock(self):
98 """Release the lock."""
99 if not self.is_locked():
100 raise LockError("%s is not locked" % self.path)
101 if not self.i_am_locking():
102 raise LockError("%s is locked, but not by me" % self.path)
103 self._remove_lockfile()
104
105 def break_lock(self):
106 """Remove the lock, even if it was created by someone else."""
107 try:
108 self._remove_lockfile()
109 return True
110 except OSError as exc:
111 if exc.errno == errno.ENOENT:
112 return False
113 else:
114 raise
115
116 def is_locked(self):
117 """Test if the file is locked by anyone.
118
119 Note: This method is potentially racy. By the time it returns the lockfile
120 may have been unlocked, removed, or stolen by some other process.
121 """
122 return os.path.exists(self.lockfile)
123
124 def i_am_locking(self):
125 """Test if the file is locked by this process."""
126 return self.is_locked() and self.pid == self._read_pid()
127
128 def __enter__(self):
129 self.lock()
130 return self
131
132 def __exit__(self, *_exc):
szager@chromium.org848fd492014-04-09 19:06:44 +0000133 # Windows is unreliable when it comes to file locking. YMMV.
134 try:
135 self.unlock()
136 except WinErr:
137 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +0000138
139
szager@chromium.org848fd492014-04-09 19:06:44 +0000140class Mirror(object):
141
142 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
143 gsutil_exe = os.path.join(
144 os.path.dirname(os.path.abspath(__file__)),
145 'third_party', 'gsutil', 'gsutil')
146 bootstrap_bucket = 'chromium-git-cache'
147
148 def __init__(self, url, refs=None, print_func=None):
149 self.url = url
150 self.refs = refs or []
151 self.basedir = self.UrlToCacheDir(url)
152 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
153 self.print = print_func or print
154
szager@chromium.org174766f2014-05-13 21:27:46 +0000155 @classmethod
156 def FromPath(cls, path):
157 return cls(cls.CacheDirToUrl(path))
158
szager@chromium.org848fd492014-04-09 19:06:44 +0000159 @staticmethod
160 def UrlToCacheDir(url):
161 """Convert a git url to a normalized form for the cache dir path."""
162 parsed = urlparse.urlparse(url)
163 norm_url = parsed.netloc + parsed.path
164 if norm_url.endswith('.git'):
165 norm_url = norm_url[:-len('.git')]
166 return norm_url.replace('-', '--').replace('/', '-').lower()
167
168 @staticmethod
szager@chromium.org174766f2014-05-13 21:27:46 +0000169 def CacheDirToUrl(path):
170 """Convert a cache dir path to its corresponding url."""
171 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
172 return 'https://%s' % netpath
173
174 @staticmethod
szager@chromium.org848fd492014-04-09 19:06:44 +0000175 def FindExecutable(executable):
176 """This mimics the "which" utility."""
177 path_folders = os.environ.get('PATH').split(os.pathsep)
178
179 for path_folder in path_folders:
180 target = os.path.join(path_folder, executable)
181 # Just incase we have some ~/blah paths.
182 target = os.path.abspath(os.path.expanduser(target))
183 if os.path.isfile(target) and os.access(target, os.X_OK):
184 return target
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000185 if sys.platform.startswith('win'):
186 for suffix in ('.bat', '.cmd', '.exe'):
187 alt_target = target + suffix
szager@chromium.org4039b312014-04-09 21:56:46 +0000188 if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK):
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000189 return alt_target
szager@chromium.org848fd492014-04-09 19:06:44 +0000190 return None
191
192 @classmethod
193 def SetCachePath(cls, cachepath):
194 setattr(cls, 'cachepath', cachepath)
195
196 @classmethod
197 def GetCachePath(cls):
198 if not hasattr(cls, 'cachepath'):
199 try:
200 cachepath = subprocess.check_output(
201 [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
202 except subprocess.CalledProcessError:
203 cachepath = None
204 if not cachepath:
205 raise RuntimeError('No global cache.cachepath git configuration found.')
206 setattr(cls, 'cachepath', cachepath)
207 return getattr(cls, 'cachepath')
208
209 def RunGit(self, cmd, **kwargs):
210 """Run git in a subprocess."""
211 cwd = kwargs.setdefault('cwd', self.mirror_path)
212 kwargs.setdefault('print_stdout', False)
213 kwargs.setdefault('filter_fn', self.print)
214 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
215 env.setdefault('GIT_ASKPASS', 'true')
216 env.setdefault('SSH_ASKPASS', 'true')
217 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
218 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
219
220 def config(self, cwd=None):
221 if cwd is None:
222 cwd = self.mirror_path
223 self.RunGit(['config', 'core.deltaBaseCacheLimit',
224 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
225 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
226 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
227 '+refs/heads/*:refs/heads/*'], cwd=cwd)
228 for ref in self.refs:
229 ref = ref.lstrip('+').rstrip('/')
230 if ref.startswith('refs/'):
231 refspec = '+%s:%s' % (ref, ref)
232 else:
233 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
234 self.RunGit(['config', '--add', 'remote.origin.fetch', refspec], cwd=cwd)
235
236 def bootstrap_repo(self, directory):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000237 """Bootstrap the repo from Google Stroage if possible."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000238
hinoka@google.com776a2c32014-04-25 07:54:25 +0000239 python_fallback = False
240 if sys.platform.startswith('win') and not self.FindExecutable('7z'):
241 python_fallback = True
242 elif sys.platform.startswith('darwin'):
243 # The OSX version of unzip doesn't support zip64.
244 python_fallback = True
245 elif not self.FindExecutable('unzip'):
246 python_fallback = True
szager@chromium.org848fd492014-04-09 19:06:44 +0000247
248 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
249 gsutil = Gsutil(
250 self.gsutil_exe, boto_path=os.devnull, bypass_prodaccess=True)
251 # Get the most recent version of the zipfile.
252 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
253 ls_out_sorted = sorted(ls_out.splitlines())
254 if not ls_out_sorted:
255 # This repo is not on Google Storage.
256 return False
257 latest_checkout = ls_out_sorted[-1]
258
259 # Download zip file to a temporary directory.
260 try:
261 tempdir = tempfile.mkdtemp()
262 self.print('Downloading %s' % latest_checkout)
263 code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
264 if code:
265 self.print('%s\n%s' % (out, err))
266 return False
267 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
268
hinoka@google.com776a2c32014-04-25 07:54:25 +0000269 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
270 if not python_fallback:
271 if sys.platform.startswith('win'):
272 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
273 else:
274 cmd = ['unzip', filename, '-d', directory]
275 retcode = subprocess.call(cmd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000276 else:
hinoka@google.com776a2c32014-04-25 07:54:25 +0000277 try:
278 with zipfile.ZipFile(filename, 'r') as f:
279 f.printdir()
280 f.extractall(directory)
281 except Exception as e:
282 self.print('Encountered error: %s' % str(e), file=sys.stderr)
283 retcode = 1
284 else:
285 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000286 finally:
287 # Clean up the downloaded zipfile.
288 gclient_utils.rmtree(tempdir)
289
290 if retcode:
291 self.print(
292 'Extracting bootstrap zipfile %s failed.\n'
293 'Resuming normal operations.' % filename)
294 return False
295 return True
296
297 def exists(self):
298 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
299
300 def populate(self, depth=None, shallow=False, bootstrap=False,
301 verbose=False):
302 if shallow and not depth:
303 depth = 10000
304 gclient_utils.safe_makedirs(self.GetCachePath())
305
306 v = []
307 if verbose:
308 v = ['-v', '--progress']
309
310 d = []
311 if depth:
312 d = ['--depth', str(depth)]
313
314
315 with Lockfile(self.mirror_path):
316 # Setup from scratch if the repo is new or is in a bad state.
317 tempdir = None
318 if not os.path.exists(os.path.join(self.mirror_path, 'config')):
319 gclient_utils.rmtree(self.mirror_path)
320 tempdir = tempfile.mkdtemp(
321 suffix=self.basedir, dir=self.GetCachePath())
322 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
323 if not bootstrapped:
324 self.RunGit(['init', '--bare'], cwd=tempdir)
325 else:
326 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
327 logging.warn(
328 'Shallow fetch requested, but repo cache already exists.')
329 d = []
330
331 rundir = tempdir or self.mirror_path
332 self.config(rundir)
333 fetch_cmd = ['fetch'] + v + d + ['origin']
334 fetch_specs = subprocess.check_output(
335 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
336 cwd=rundir).strip().splitlines()
337 for spec in fetch_specs:
338 try:
339 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
340 except subprocess.CalledProcessError:
341 logging.warn('Fetch of %s failed' % spec)
342 if tempdir:
343 os.rename(tempdir, self.mirror_path)
344
345 def update_bootstrap(self):
346 # The files are named <git number>.zip
347 gen_number = subprocess.check_output(
348 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
349 self.RunGit(['gc']) # Run Garbage Collect to compress packfile.
350 # Creating a temp file and then deleting it ensures we can use this name.
351 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
352 os.remove(tmp_zipfile)
353 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
354 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
355 dest_name = 'gs://%s/%s/%s.zip' % (
356 self.bootstrap_bucket, self.basedir, gen_number)
357 gsutil.call('cp', tmp_zipfile, dest_name)
358 os.remove(tmp_zipfile)
359
szager@chromium.org174766f2014-05-13 21:27:46 +0000360
361 @staticmethod
362 def BreakLocks(path):
363 did_unlock = False
364 lf = Lockfile(path)
365 if lf.break_lock():
366 did_unlock = True
367 # Look for lock files that might have been left behind by an interrupted
368 # git process.
369 lf = os.path.join(path, 'config.lock')
370 if os.path.exists(lf):
371 os.remove(lf)
372 did_unlock = True
373 return did_unlock
374
szager@chromium.org848fd492014-04-09 19:06:44 +0000375 def unlock(self):
szager@chromium.org174766f2014-05-13 21:27:46 +0000376 return self.BreakLocks(self.mirror_path)
377
378 @classmethod
379 def UnlockAll(cls):
380 cachepath = cls.GetCachePath()
381 dirlist = os.listdir(cachepath)
382 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
383 if os.path.isdir(os.path.join(cachepath, path))])
384 for dirent in dirlist:
385 if (dirent.endswith('.lock') and
386 os.path.isfile(os.path.join(cachepath, dirent))):
387 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
388
389 unlocked_repos = []
390 for repo_dir in repo_dirs:
391 if cls.BreakLocks(repo_dir):
392 unlocked_repos.append(repo_dir)
393
394 return unlocked_repos
szager@chromium.org848fd492014-04-09 19:06:44 +0000395
agable@chromium.org5a306a22014-02-24 22:13:59 +0000396@subcommand.usage('[url of repo to check for caching]')
397def CMDexists(parser, args):
398 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000399 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000400 if not len(args) == 1:
401 parser.error('git cache exists only takes exactly one repo url.')
402 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000403 mirror = Mirror(url)
404 if mirror.exists():
405 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000406 return 0
407 return 1
408
409
hinoka@google.com563559c2014-04-02 00:36:24 +0000410@subcommand.usage('[url of repo to create a bootstrap zip file]')
411def CMDupdate_bootstrap(parser, args):
412 """Create and uploads a bootstrap tarball."""
413 # Lets just assert we can't do this on Windows.
414 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000415 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000416 return 1
417
418 # First, we need to ensure the cache is populated.
419 populate_args = args[:]
420 populate_args.append('--no_bootstrap')
421 CMDpopulate(parser, populate_args)
422
423 # Get the repo directory.
szager@chromium.org848fd492014-04-09 19:06:44 +0000424 _, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000425 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000426 mirror = Mirror(url)
427 mirror.update_bootstrap()
428 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000429
430
agable@chromium.org5a306a22014-02-24 22:13:59 +0000431@subcommand.usage('[url of repo to add to or update in cache]')
432def CMDpopulate(parser, args):
433 """Ensure that the cache has all up-to-date objects for the given repo."""
434 parser.add_option('--depth', type='int',
435 help='Only cache DEPTH commits of history')
436 parser.add_option('--shallow', '-s', action='store_true',
437 help='Only cache 10000 commits of history')
438 parser.add_option('--ref', action='append',
439 help='Specify additional refs to be fetched')
hinoka@google.com563559c2014-04-02 00:36:24 +0000440 parser.add_option('--no_bootstrap', action='store_true',
441 help='Don\'t bootstrap from Google Storage')
442
agable@chromium.org5a306a22014-02-24 22:13:59 +0000443 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000444 if not len(args) == 1:
445 parser.error('git cache populate only takes exactly one repo url.')
446 url = args[0]
447
szager@chromium.org848fd492014-04-09 19:06:44 +0000448 mirror = Mirror(url, refs=options.ref)
449 kwargs = {
450 'verbose': options.verbose,
451 'shallow': options.shallow,
452 'bootstrap': not options.no_bootstrap,
453 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000454 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000455 kwargs['depth'] = options.depth
456 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000457
458
459@subcommand.usage('[url of repo to unlock, or -a|--all]')
460def CMDunlock(parser, args):
461 """Unlock one or all repos if their lock files are still around."""
462 parser.add_option('--force', '-f', action='store_true',
463 help='Actually perform the action')
464 parser.add_option('--all', '-a', action='store_true',
465 help='Unlock all repository caches')
466 options, args = parser.parse_args(args)
467 if len(args) > 1 or (len(args) == 0 and not options.all):
468 parser.error('git cache unlock takes exactly one repo url, or --all')
469
agable@chromium.org5a306a22014-02-24 22:13:59 +0000470 if not options.force:
szager@chromium.org174766f2014-05-13 21:27:46 +0000471 cachepath = Mirror.GetCachePath()
472 lockfiles = [os.path.join(cachepath, path)
473 for path in os.listdir(cachepath)
474 if path.endswith('.lock') and os.path.isfile(path)]
agable@chromium.org5a306a22014-02-24 22:13:59 +0000475 parser.error('git cache unlock requires -f|--force to do anything. '
476 'Refusing to unlock the following repo caches: '
477 ', '.join(lockfiles))
478
szager@chromium.org848fd492014-04-09 19:06:44 +0000479 unlocked_repos = []
szager@chromium.org174766f2014-05-13 21:27:46 +0000480 if options.all:
481 unlocked_repos.extend(Mirror.UnlockAll())
482 else:
483 m = Mirror(args[0])
484 if m.unlock():
485 unlocked_repos.append(m.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000486
szager@chromium.org848fd492014-04-09 19:06:44 +0000487 if unlocked_repos:
488 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
489 unlocked_repos))
agable@chromium.org5a306a22014-02-24 22:13:59 +0000490
491
492class OptionParser(optparse.OptionParser):
493 """Wrapper class for OptionParser to handle global options."""
494
495 def __init__(self, *args, **kwargs):
496 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
497 self.add_option('-c', '--cache-dir',
498 help='Path to the directory containing the cache')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000499 self.add_option('-v', '--verbose', action='count', default=1,
agable@chromium.org5a306a22014-02-24 22:13:59 +0000500 help='Increase verbosity (can be passed multiple times)')
szager@chromium.org2c391af2014-05-23 09:07:15 +0000501 self.add_option('-q', '--quiet', action='store_true',
502 help='Suppress all extraneous output')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000503
504 def parse_args(self, args=None, values=None):
505 options, args = optparse.OptionParser.parse_args(self, args, values)
szager@chromium.org2c391af2014-05-23 09:07:15 +0000506 if options.quiet:
507 options.verbose = 0
508
509 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
510 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000511
512 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000513 global_cache_dir = Mirror.GetCachePath()
514 except RuntimeError:
515 global_cache_dir = None
516 if options.cache_dir:
517 if global_cache_dir and (
518 os.path.abspath(options.cache_dir) !=
519 os.path.abspath(global_cache_dir)):
520 logging.warn('Overriding globally-configured cache directory.')
521 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000522
agable@chromium.org5a306a22014-02-24 22:13:59 +0000523 return options, args
524
525
526def main(argv):
527 dispatcher = subcommand.CommandDispatcher(__name__)
528 return dispatcher.execute(OptionParser(), argv)
529
530
531if __name__ == '__main__':
532 sys.exit(main(sys.argv[1:]))