blob: 52e42c59274781f580491428b67afa4d6bf66280 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
agable@chromium.org5a306a22014-02-24 22:13:59 +00009import errno
10import logging
11import optparse
12import os
13import tempfile
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000014import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import subprocess
16import sys
17import urlparse
hinoka@google.com776a2c32014-04-25 07:54:25 +000018import zipfile
agable@chromium.org5a306a22014-02-24 22:13:59 +000019
hinoka@google.com563559c2014-04-02 00:36:24 +000020from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000021import gclient_utils
22import subcommand
23
szager@chromium.org848fd492014-04-09 19:06:44 +000024try:
25 # pylint: disable=E0602
26 WinErr = WindowsError
27except NameError:
28 class WinErr(Exception):
29 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000030
31class LockError(Exception):
32 pass
33
34
35class Lockfile(object):
36 """Class to represent a cross-platform process-specific lockfile."""
37
38 def __init__(self, path):
39 self.path = os.path.abspath(path)
40 self.lockfile = self.path + ".lock"
41 self.pid = os.getpid()
42
43 def _read_pid(self):
44 """Read the pid stored in the lockfile.
45
46 Note: This method is potentially racy. By the time it returns the lockfile
47 may have been unlocked, removed, or stolen by some other process.
48 """
49 try:
50 with open(self.lockfile, 'r') as f:
51 pid = int(f.readline().strip())
52 except (IOError, ValueError):
53 pid = None
54 return pid
55
56 def _make_lockfile(self):
57 """Safely creates a lockfile containing the current pid."""
58 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
59 fd = os.open(self.lockfile, open_flags, 0o644)
60 f = os.fdopen(fd, 'w')
szager@chromium.org848fd492014-04-09 19:06:44 +000061 print(self.pid, file=f)
agable@chromium.org5a306a22014-02-24 22:13:59 +000062 f.close()
63
64 def _remove_lockfile(self):
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000065 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
66
67 See gclient_utils.py:rmtree docstring for more explanation on the
68 windows case.
69 """
70 if sys.platform == 'win32':
71 lockfile = os.path.normcase(self.lockfile)
72 for _ in xrange(3):
73 exitcode = subprocess.call(['cmd.exe', '/c',
74 'del', '/f', '/q', lockfile])
75 if exitcode == 0:
76 return
77 time.sleep(3)
78 raise LockError('Failed to remove lock: %s' % lockfile)
79 else:
80 os.remove(self.lockfile)
agable@chromium.org5a306a22014-02-24 22:13:59 +000081
82 def lock(self):
83 """Acquire the lock.
84
85 Note: This is a NON-BLOCKING FAIL-FAST operation.
86 Do. Or do not. There is no try.
87 """
88 try:
89 self._make_lockfile()
90 except OSError as e:
91 if e.errno == errno.EEXIST:
92 raise LockError("%s is already locked" % self.path)
93 else:
94 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
95
96 def unlock(self):
97 """Release the lock."""
98 if not self.is_locked():
99 raise LockError("%s is not locked" % self.path)
100 if not self.i_am_locking():
101 raise LockError("%s is locked, but not by me" % self.path)
102 self._remove_lockfile()
103
104 def break_lock(self):
105 """Remove the lock, even if it was created by someone else."""
106 try:
107 self._remove_lockfile()
108 return True
109 except OSError as exc:
110 if exc.errno == errno.ENOENT:
111 return False
112 else:
113 raise
114
115 def is_locked(self):
116 """Test if the file is locked by anyone.
117
118 Note: This method is potentially racy. By the time it returns the lockfile
119 may have been unlocked, removed, or stolen by some other process.
120 """
121 return os.path.exists(self.lockfile)
122
123 def i_am_locking(self):
124 """Test if the file is locked by this process."""
125 return self.is_locked() and self.pid == self._read_pid()
126
127 def __enter__(self):
128 self.lock()
129 return self
130
131 def __exit__(self, *_exc):
szager@chromium.org848fd492014-04-09 19:06:44 +0000132 # Windows is unreliable when it comes to file locking. YMMV.
133 try:
134 self.unlock()
135 except WinErr:
136 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +0000137
138
szager@chromium.org848fd492014-04-09 19:06:44 +0000139class Mirror(object):
140
141 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
142 gsutil_exe = os.path.join(
143 os.path.dirname(os.path.abspath(__file__)),
144 'third_party', 'gsutil', 'gsutil')
145 bootstrap_bucket = 'chromium-git-cache'
146
147 def __init__(self, url, refs=None, print_func=None):
148 self.url = url
149 self.refs = refs or []
150 self.basedir = self.UrlToCacheDir(url)
151 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
152 self.print = print_func or print
153
154 @staticmethod
155 def UrlToCacheDir(url):
156 """Convert a git url to a normalized form for the cache dir path."""
157 parsed = urlparse.urlparse(url)
158 norm_url = parsed.netloc + parsed.path
159 if norm_url.endswith('.git'):
160 norm_url = norm_url[:-len('.git')]
161 return norm_url.replace('-', '--').replace('/', '-').lower()
162
163 @staticmethod
164 def FindExecutable(executable):
165 """This mimics the "which" utility."""
166 path_folders = os.environ.get('PATH').split(os.pathsep)
167
168 for path_folder in path_folders:
169 target = os.path.join(path_folder, executable)
170 # Just incase we have some ~/blah paths.
171 target = os.path.abspath(os.path.expanduser(target))
172 if os.path.isfile(target) and os.access(target, os.X_OK):
173 return target
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000174 if sys.platform.startswith('win'):
175 for suffix in ('.bat', '.cmd', '.exe'):
176 alt_target = target + suffix
szager@chromium.org4039b312014-04-09 21:56:46 +0000177 if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK):
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000178 return alt_target
szager@chromium.org848fd492014-04-09 19:06:44 +0000179 return None
180
181 @classmethod
182 def SetCachePath(cls, cachepath):
183 setattr(cls, 'cachepath', cachepath)
184
185 @classmethod
186 def GetCachePath(cls):
187 if not hasattr(cls, 'cachepath'):
188 try:
189 cachepath = subprocess.check_output(
190 [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
191 except subprocess.CalledProcessError:
192 cachepath = None
193 if not cachepath:
194 raise RuntimeError('No global cache.cachepath git configuration found.')
195 setattr(cls, 'cachepath', cachepath)
196 return getattr(cls, 'cachepath')
197
198 def RunGit(self, cmd, **kwargs):
199 """Run git in a subprocess."""
200 cwd = kwargs.setdefault('cwd', self.mirror_path)
201 kwargs.setdefault('print_stdout', False)
202 kwargs.setdefault('filter_fn', self.print)
203 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
204 env.setdefault('GIT_ASKPASS', 'true')
205 env.setdefault('SSH_ASKPASS', 'true')
206 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
207 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
208
209 def config(self, cwd=None):
210 if cwd is None:
211 cwd = self.mirror_path
212 self.RunGit(['config', 'core.deltaBaseCacheLimit',
213 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
214 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
215 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
216 '+refs/heads/*:refs/heads/*'], cwd=cwd)
217 for ref in self.refs:
218 ref = ref.lstrip('+').rstrip('/')
219 if ref.startswith('refs/'):
220 refspec = '+%s:%s' % (ref, ref)
221 else:
222 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
223 self.RunGit(['config', '--add', 'remote.origin.fetch', refspec], cwd=cwd)
224
225 def bootstrap_repo(self, directory):
hinoka@google.com776a2c32014-04-25 07:54:25 +0000226 """Bootstrap the repo from Google Stroage if possible."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000227
hinoka@google.com776a2c32014-04-25 07:54:25 +0000228 python_fallback = False
229 if sys.platform.startswith('win') and not self.FindExecutable('7z'):
230 python_fallback = True
231 elif sys.platform.startswith('darwin'):
232 # The OSX version of unzip doesn't support zip64.
233 python_fallback = True
234 elif not self.FindExecutable('unzip'):
235 python_fallback = True
szager@chromium.org848fd492014-04-09 19:06:44 +0000236
237 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
238 gsutil = Gsutil(
239 self.gsutil_exe, boto_path=os.devnull, bypass_prodaccess=True)
240 # Get the most recent version of the zipfile.
241 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
242 ls_out_sorted = sorted(ls_out.splitlines())
243 if not ls_out_sorted:
244 # This repo is not on Google Storage.
245 return False
246 latest_checkout = ls_out_sorted[-1]
247
248 # Download zip file to a temporary directory.
249 try:
250 tempdir = tempfile.mkdtemp()
251 self.print('Downloading %s' % latest_checkout)
252 code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
253 if code:
254 self.print('%s\n%s' % (out, err))
255 return False
256 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
257
hinoka@google.com776a2c32014-04-25 07:54:25 +0000258 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
259 if not python_fallback:
260 if sys.platform.startswith('win'):
261 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
262 else:
263 cmd = ['unzip', filename, '-d', directory]
264 retcode = subprocess.call(cmd)
szager@chromium.org848fd492014-04-09 19:06:44 +0000265 else:
hinoka@google.com776a2c32014-04-25 07:54:25 +0000266 try:
267 with zipfile.ZipFile(filename, 'r') as f:
268 f.printdir()
269 f.extractall(directory)
270 except Exception as e:
271 self.print('Encountered error: %s' % str(e), file=sys.stderr)
272 retcode = 1
273 else:
274 retcode = 0
szager@chromium.org848fd492014-04-09 19:06:44 +0000275 finally:
276 # Clean up the downloaded zipfile.
277 gclient_utils.rmtree(tempdir)
278
279 if retcode:
280 self.print(
281 'Extracting bootstrap zipfile %s failed.\n'
282 'Resuming normal operations.' % filename)
283 return False
284 return True
285
286 def exists(self):
287 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
288
289 def populate(self, depth=None, shallow=False, bootstrap=False,
290 verbose=False):
291 if shallow and not depth:
292 depth = 10000
293 gclient_utils.safe_makedirs(self.GetCachePath())
294
295 v = []
296 if verbose:
297 v = ['-v', '--progress']
298
299 d = []
300 if depth:
301 d = ['--depth', str(depth)]
302
303
304 with Lockfile(self.mirror_path):
305 # Setup from scratch if the repo is new or is in a bad state.
306 tempdir = None
307 if not os.path.exists(os.path.join(self.mirror_path, 'config')):
308 gclient_utils.rmtree(self.mirror_path)
309 tempdir = tempfile.mkdtemp(
310 suffix=self.basedir, dir=self.GetCachePath())
311 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
312 if not bootstrapped:
313 self.RunGit(['init', '--bare'], cwd=tempdir)
314 else:
315 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
316 logging.warn(
317 'Shallow fetch requested, but repo cache already exists.')
318 d = []
319
320 rundir = tempdir or self.mirror_path
321 self.config(rundir)
322 fetch_cmd = ['fetch'] + v + d + ['origin']
323 fetch_specs = subprocess.check_output(
324 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
325 cwd=rundir).strip().splitlines()
326 for spec in fetch_specs:
327 try:
328 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
329 except subprocess.CalledProcessError:
330 logging.warn('Fetch of %s failed' % spec)
331 if tempdir:
332 os.rename(tempdir, self.mirror_path)
333
334 def update_bootstrap(self):
335 # The files are named <git number>.zip
336 gen_number = subprocess.check_output(
337 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
338 self.RunGit(['gc']) # Run Garbage Collect to compress packfile.
339 # Creating a temp file and then deleting it ensures we can use this name.
340 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
341 os.remove(tmp_zipfile)
342 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
343 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
344 dest_name = 'gs://%s/%s/%s.zip' % (
345 self.bootstrap_bucket, self.basedir, gen_number)
346 gsutil.call('cp', tmp_zipfile, dest_name)
347 os.remove(tmp_zipfile)
348
349 def unlock(self):
350 lf = Lockfile(self.mirror_path)
351 config_lock = os.path.join(self.mirror_path, 'config.lock')
352 if os.path.exists(config_lock):
353 os.remove(config_lock)
354 lf.break_lock()
355
agable@chromium.org5a306a22014-02-24 22:13:59 +0000356@subcommand.usage('[url of repo to check for caching]')
357def CMDexists(parser, args):
358 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000359 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000360 if not len(args) == 1:
361 parser.error('git cache exists only takes exactly one repo url.')
362 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000363 mirror = Mirror(url)
364 if mirror.exists():
365 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000366 return 0
367 return 1
368
369
hinoka@google.com563559c2014-04-02 00:36:24 +0000370@subcommand.usage('[url of repo to create a bootstrap zip file]')
371def CMDupdate_bootstrap(parser, args):
372 """Create and uploads a bootstrap tarball."""
373 # Lets just assert we can't do this on Windows.
374 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000375 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000376 return 1
377
378 # First, we need to ensure the cache is populated.
379 populate_args = args[:]
380 populate_args.append('--no_bootstrap')
381 CMDpopulate(parser, populate_args)
382
383 # Get the repo directory.
szager@chromium.org848fd492014-04-09 19:06:44 +0000384 _, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000385 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000386 mirror = Mirror(url)
387 mirror.update_bootstrap()
388 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000389
390
agable@chromium.org5a306a22014-02-24 22:13:59 +0000391@subcommand.usage('[url of repo to add to or update in cache]')
392def CMDpopulate(parser, args):
393 """Ensure that the cache has all up-to-date objects for the given repo."""
394 parser.add_option('--depth', type='int',
395 help='Only cache DEPTH commits of history')
396 parser.add_option('--shallow', '-s', action='store_true',
397 help='Only cache 10000 commits of history')
398 parser.add_option('--ref', action='append',
399 help='Specify additional refs to be fetched')
hinoka@google.com563559c2014-04-02 00:36:24 +0000400 parser.add_option('--no_bootstrap', action='store_true',
401 help='Don\'t bootstrap from Google Storage')
402
agable@chromium.org5a306a22014-02-24 22:13:59 +0000403 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000404 if not len(args) == 1:
405 parser.error('git cache populate only takes exactly one repo url.')
406 url = args[0]
407
szager@chromium.org848fd492014-04-09 19:06:44 +0000408 mirror = Mirror(url, refs=options.ref)
409 kwargs = {
410 'verbose': options.verbose,
411 'shallow': options.shallow,
412 'bootstrap': not options.no_bootstrap,
413 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000414 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000415 kwargs['depth'] = options.depth
416 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000417
418
419@subcommand.usage('[url of repo to unlock, or -a|--all]')
420def CMDunlock(parser, args):
421 """Unlock one or all repos if their lock files are still around."""
422 parser.add_option('--force', '-f', action='store_true',
423 help='Actually perform the action')
424 parser.add_option('--all', '-a', action='store_true',
425 help='Unlock all repository caches')
426 options, args = parser.parse_args(args)
427 if len(args) > 1 or (len(args) == 0 and not options.all):
428 parser.error('git cache unlock takes exactly one repo url, or --all')
429
szager@chromium.org848fd492014-04-09 19:06:44 +0000430 repo_dirs = []
agable@chromium.org5a306a22014-02-24 22:13:59 +0000431 if not options.all:
432 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000433 repo_dirs.append(Mirror(url).mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000434 else:
szager@chromium.org848fd492014-04-09 19:06:44 +0000435 cachepath = Mirror.GetCachePath()
436 repo_dirs = [os.path.join(cachepath, path)
437 for path in os.listdir(cachepath)
438 if os.path.isdir(os.path.join(cachepath, path))]
439 repo_dirs.extend([os.path.join(cachepath,
hinoka@google.comb16a1652014-03-05 20:22:00 +0000440 lockfile.replace('.lock', ''))
szager@chromium.org848fd492014-04-09 19:06:44 +0000441 for lockfile in os.listdir(cachepath)
442 if os.path.isfile(os.path.join(cachepath,
hinoka@google.comb16a1652014-03-05 20:22:00 +0000443 lockfile))
444 and lockfile.endswith('.lock')
szager@chromium.org848fd492014-04-09 19:06:44 +0000445 and os.path.join(cachepath, lockfile)
hinoka@google.comb16a1652014-03-05 20:22:00 +0000446 not in repo_dirs])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000447 lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
448 if os.path.exists(repo_dir + '.lock')]
449
450 if not options.force:
451 parser.error('git cache unlock requires -f|--force to do anything. '
452 'Refusing to unlock the following repo caches: '
453 ', '.join(lockfiles))
454
szager@chromium.org848fd492014-04-09 19:06:44 +0000455 unlocked_repos = []
456 untouched_repos = []
agable@chromium.org5a306a22014-02-24 22:13:59 +0000457 for repo_dir in repo_dirs:
458 lf = Lockfile(repo_dir)
hinoka@google.comb16a1652014-03-05 20:22:00 +0000459 config_lock = os.path.join(repo_dir, 'config.lock')
460 unlocked = False
461 if os.path.exists(config_lock):
462 os.remove(config_lock)
463 unlocked = True
agable@chromium.org5a306a22014-02-24 22:13:59 +0000464 if lf.break_lock():
hinoka@google.comb16a1652014-03-05 20:22:00 +0000465 unlocked = True
466
467 if unlocked:
szager@chromium.org848fd492014-04-09 19:06:44 +0000468 unlocked_repos.append(repo_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000469 else:
szager@chromium.org848fd492014-04-09 19:06:44 +0000470 untouched_repos.append(repo_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000471
szager@chromium.org848fd492014-04-09 19:06:44 +0000472 if unlocked_repos:
473 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
474 unlocked_repos))
475 if untouched_repos:
476 logging.debug('Did not touch these caches:\n %s' % '\n '.join(
477 untouched_repos))
agable@chromium.org5a306a22014-02-24 22:13:59 +0000478
479
480class OptionParser(optparse.OptionParser):
481 """Wrapper class for OptionParser to handle global options."""
482
483 def __init__(self, *args, **kwargs):
484 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
485 self.add_option('-c', '--cache-dir',
486 help='Path to the directory containing the cache')
487 self.add_option('-v', '--verbose', action='count', default=0,
488 help='Increase verbosity (can be passed multiple times)')
489
490 def parse_args(self, args=None, values=None):
491 options, args = optparse.OptionParser.parse_args(self, args, values)
492
493 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000494 global_cache_dir = Mirror.GetCachePath()
495 except RuntimeError:
496 global_cache_dir = None
497 if options.cache_dir:
498 if global_cache_dir and (
499 os.path.abspath(options.cache_dir) !=
500 os.path.abspath(global_cache_dir)):
501 logging.warn('Overriding globally-configured cache directory.')
502 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000503
504 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
505 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
506
507 return options, args
508
509
510def main(argv):
511 dispatcher = subcommand.CommandDispatcher(__name__)
512 return dispatcher.execute(OptionParser(), argv)
513
514
515if __name__ == '__main__':
516 sys.exit(main(sys.argv[1:]))