blob: 5c1f85a46eeaa454d280dfe89c1550f012894683 [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
szager@chromium.org848fd492014-04-09 19:06:44 +00008from __future__ import print_function
agable@chromium.org5a306a22014-02-24 22:13:59 +00009import errno
10import logging
11import optparse
12import os
13import tempfile
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000014import time
agable@chromium.org5a306a22014-02-24 22:13:59 +000015import subprocess
16import sys
17import urlparse
18
hinoka@google.com563559c2014-04-02 00:36:24 +000019from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000020import gclient_utils
21import subcommand
22
szager@chromium.org848fd492014-04-09 19:06:44 +000023try:
24 # pylint: disable=E0602
25 WinErr = WindowsError
26except NameError:
27 class WinErr(Exception):
28 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +000029
30class LockError(Exception):
31 pass
32
33
34class Lockfile(object):
35 """Class to represent a cross-platform process-specific lockfile."""
36
37 def __init__(self, path):
38 self.path = os.path.abspath(path)
39 self.lockfile = self.path + ".lock"
40 self.pid = os.getpid()
41
42 def _read_pid(self):
43 """Read the pid stored in the lockfile.
44
45 Note: This method is potentially racy. By the time it returns the lockfile
46 may have been unlocked, removed, or stolen by some other process.
47 """
48 try:
49 with open(self.lockfile, 'r') as f:
50 pid = int(f.readline().strip())
51 except (IOError, ValueError):
52 pid = None
53 return pid
54
55 def _make_lockfile(self):
56 """Safely creates a lockfile containing the current pid."""
57 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
58 fd = os.open(self.lockfile, open_flags, 0o644)
59 f = os.fdopen(fd, 'w')
szager@chromium.org848fd492014-04-09 19:06:44 +000060 print(self.pid, file=f)
agable@chromium.org5a306a22014-02-24 22:13:59 +000061 f.close()
62
63 def _remove_lockfile(self):
pgervais@chromium.orgf3726102014-04-17 17:24:15 +000064 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
65
66 See gclient_utils.py:rmtree docstring for more explanation on the
67 windows case.
68 """
69 if sys.platform == 'win32':
70 lockfile = os.path.normcase(self.lockfile)
71 for _ in xrange(3):
72 exitcode = subprocess.call(['cmd.exe', '/c',
73 'del', '/f', '/q', lockfile])
74 if exitcode == 0:
75 return
76 time.sleep(3)
77 raise LockError('Failed to remove lock: %s' % lockfile)
78 else:
79 os.remove(self.lockfile)
agable@chromium.org5a306a22014-02-24 22:13:59 +000080
81 def lock(self):
82 """Acquire the lock.
83
84 Note: This is a NON-BLOCKING FAIL-FAST operation.
85 Do. Or do not. There is no try.
86 """
87 try:
88 self._make_lockfile()
89 except OSError as e:
90 if e.errno == errno.EEXIST:
91 raise LockError("%s is already locked" % self.path)
92 else:
93 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
94
95 def unlock(self):
96 """Release the lock."""
97 if not self.is_locked():
98 raise LockError("%s is not locked" % self.path)
99 if not self.i_am_locking():
100 raise LockError("%s is locked, but not by me" % self.path)
101 self._remove_lockfile()
102
103 def break_lock(self):
104 """Remove the lock, even if it was created by someone else."""
105 try:
106 self._remove_lockfile()
107 return True
108 except OSError as exc:
109 if exc.errno == errno.ENOENT:
110 return False
111 else:
112 raise
113
114 def is_locked(self):
115 """Test if the file is locked by anyone.
116
117 Note: This method is potentially racy. By the time it returns the lockfile
118 may have been unlocked, removed, or stolen by some other process.
119 """
120 return os.path.exists(self.lockfile)
121
122 def i_am_locking(self):
123 """Test if the file is locked by this process."""
124 return self.is_locked() and self.pid == self._read_pid()
125
126 def __enter__(self):
127 self.lock()
128 return self
129
130 def __exit__(self, *_exc):
szager@chromium.org848fd492014-04-09 19:06:44 +0000131 # Windows is unreliable when it comes to file locking. YMMV.
132 try:
133 self.unlock()
134 except WinErr:
135 pass
agable@chromium.org5a306a22014-02-24 22:13:59 +0000136
137
szager@chromium.org848fd492014-04-09 19:06:44 +0000138class Mirror(object):
139
140 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
141 gsutil_exe = os.path.join(
142 os.path.dirname(os.path.abspath(__file__)),
143 'third_party', 'gsutil', 'gsutil')
144 bootstrap_bucket = 'chromium-git-cache'
145
146 def __init__(self, url, refs=None, print_func=None):
147 self.url = url
148 self.refs = refs or []
149 self.basedir = self.UrlToCacheDir(url)
150 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
151 self.print = print_func or print
152
153 @staticmethod
154 def UrlToCacheDir(url):
155 """Convert a git url to a normalized form for the cache dir path."""
156 parsed = urlparse.urlparse(url)
157 norm_url = parsed.netloc + parsed.path
158 if norm_url.endswith('.git'):
159 norm_url = norm_url[:-len('.git')]
160 return norm_url.replace('-', '--').replace('/', '-').lower()
161
162 @staticmethod
163 def FindExecutable(executable):
164 """This mimics the "which" utility."""
165 path_folders = os.environ.get('PATH').split(os.pathsep)
166
167 for path_folder in path_folders:
168 target = os.path.join(path_folder, executable)
169 # Just incase we have some ~/blah paths.
170 target = os.path.abspath(os.path.expanduser(target))
171 if os.path.isfile(target) and os.access(target, os.X_OK):
172 return target
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000173 if sys.platform.startswith('win'):
174 for suffix in ('.bat', '.cmd', '.exe'):
175 alt_target = target + suffix
szager@chromium.org4039b312014-04-09 21:56:46 +0000176 if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK):
szager@chromium.org6b5faf52014-04-09 21:54:21 +0000177 return alt_target
szager@chromium.org848fd492014-04-09 19:06:44 +0000178 return None
179
180 @classmethod
181 def SetCachePath(cls, cachepath):
182 setattr(cls, 'cachepath', cachepath)
183
184 @classmethod
185 def GetCachePath(cls):
186 if not hasattr(cls, 'cachepath'):
187 try:
188 cachepath = subprocess.check_output(
189 [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
190 except subprocess.CalledProcessError:
191 cachepath = None
192 if not cachepath:
193 raise RuntimeError('No global cache.cachepath git configuration found.')
194 setattr(cls, 'cachepath', cachepath)
195 return getattr(cls, 'cachepath')
196
197 def RunGit(self, cmd, **kwargs):
198 """Run git in a subprocess."""
199 cwd = kwargs.setdefault('cwd', self.mirror_path)
200 kwargs.setdefault('print_stdout', False)
201 kwargs.setdefault('filter_fn', self.print)
202 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
203 env.setdefault('GIT_ASKPASS', 'true')
204 env.setdefault('SSH_ASKPASS', 'true')
205 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
206 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
207
208 def config(self, cwd=None):
209 if cwd is None:
210 cwd = self.mirror_path
211 self.RunGit(['config', 'core.deltaBaseCacheLimit',
212 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
213 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
214 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
215 '+refs/heads/*:refs/heads/*'], cwd=cwd)
216 for ref in self.refs:
217 ref = ref.lstrip('+').rstrip('/')
218 if ref.startswith('refs/'):
219 refspec = '+%s:%s' % (ref, ref)
220 else:
221 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
222 self.RunGit(['config', '--add', 'remote.origin.fetch', refspec], cwd=cwd)
223
224 def bootstrap_repo(self, directory):
225 """Bootstrap the repo from Google Stroage if possible.
226
227 Requires 7z on Windows and Unzip on Linux/Mac.
228 """
229 if sys.platform.startswith('win'):
230 if not self.FindExecutable('7z'):
231 self.print('''
232Cannot find 7z in the path. If you want git cache to be able to bootstrap from
233Google Storage, please install 7z from:
234
235http://www.7-zip.org/download.html
236''')
237 return False
238 else:
239 if not self.FindExecutable('unzip'):
240 self.print('''
241Cannot find unzip in the path. If you want git cache to be able to bootstrap
242from Google Storage, please ensure unzip is present on your system.
243''')
244 return False
245
246 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
247 gsutil = Gsutil(
248 self.gsutil_exe, boto_path=os.devnull, bypass_prodaccess=True)
249 # Get the most recent version of the zipfile.
250 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
251 ls_out_sorted = sorted(ls_out.splitlines())
252 if not ls_out_sorted:
253 # This repo is not on Google Storage.
254 return False
255 latest_checkout = ls_out_sorted[-1]
256
257 # Download zip file to a temporary directory.
258 try:
259 tempdir = tempfile.mkdtemp()
260 self.print('Downloading %s' % latest_checkout)
261 code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
262 if code:
263 self.print('%s\n%s' % (out, err))
264 return False
265 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
266
267 # Unpack the file with 7z on Windows, or unzip everywhere else.
268 if sys.platform.startswith('win'):
269 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
270 else:
271 cmd = ['unzip', filename, '-d', directory]
272 retcode = subprocess.call(cmd)
273 finally:
274 # Clean up the downloaded zipfile.
275 gclient_utils.rmtree(tempdir)
276
277 if retcode:
278 self.print(
279 'Extracting bootstrap zipfile %s failed.\n'
280 'Resuming normal operations.' % filename)
281 return False
282 return True
283
284 def exists(self):
285 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
286
287 def populate(self, depth=None, shallow=False, bootstrap=False,
288 verbose=False):
289 if shallow and not depth:
290 depth = 10000
291 gclient_utils.safe_makedirs(self.GetCachePath())
292
293 v = []
294 if verbose:
295 v = ['-v', '--progress']
296
297 d = []
298 if depth:
299 d = ['--depth', str(depth)]
300
301
302 with Lockfile(self.mirror_path):
303 # Setup from scratch if the repo is new or is in a bad state.
304 tempdir = None
305 if not os.path.exists(os.path.join(self.mirror_path, 'config')):
306 gclient_utils.rmtree(self.mirror_path)
307 tempdir = tempfile.mkdtemp(
308 suffix=self.basedir, dir=self.GetCachePath())
309 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
310 if not bootstrapped:
311 self.RunGit(['init', '--bare'], cwd=tempdir)
312 else:
313 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
314 logging.warn(
315 'Shallow fetch requested, but repo cache already exists.')
316 d = []
317
318 rundir = tempdir or self.mirror_path
319 self.config(rundir)
320 fetch_cmd = ['fetch'] + v + d + ['origin']
321 fetch_specs = subprocess.check_output(
322 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
323 cwd=rundir).strip().splitlines()
324 for spec in fetch_specs:
325 try:
326 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
327 except subprocess.CalledProcessError:
328 logging.warn('Fetch of %s failed' % spec)
329 if tempdir:
330 os.rename(tempdir, self.mirror_path)
331
332 def update_bootstrap(self):
333 # The files are named <git number>.zip
334 gen_number = subprocess.check_output(
335 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
336 self.RunGit(['gc']) # Run Garbage Collect to compress packfile.
337 # Creating a temp file and then deleting it ensures we can use this name.
338 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
339 os.remove(tmp_zipfile)
340 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
341 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
342 dest_name = 'gs://%s/%s/%s.zip' % (
343 self.bootstrap_bucket, self.basedir, gen_number)
344 gsutil.call('cp', tmp_zipfile, dest_name)
345 os.remove(tmp_zipfile)
346
347 def unlock(self):
348 lf = Lockfile(self.mirror_path)
349 config_lock = os.path.join(self.mirror_path, 'config.lock')
350 if os.path.exists(config_lock):
351 os.remove(config_lock)
352 lf.break_lock()
353
agable@chromium.org5a306a22014-02-24 22:13:59 +0000354@subcommand.usage('[url of repo to check for caching]')
355def CMDexists(parser, args):
356 """Check to see if there already is a cache of the given repo."""
szager@chromium.org848fd492014-04-09 19:06:44 +0000357 _, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000358 if not len(args) == 1:
359 parser.error('git cache exists only takes exactly one repo url.')
360 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000361 mirror = Mirror(url)
362 if mirror.exists():
363 print(mirror.mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000364 return 0
365 return 1
366
367
hinoka@google.com563559c2014-04-02 00:36:24 +0000368@subcommand.usage('[url of repo to create a bootstrap zip file]')
369def CMDupdate_bootstrap(parser, args):
370 """Create and uploads a bootstrap tarball."""
371 # Lets just assert we can't do this on Windows.
372 if sys.platform.startswith('win'):
szager@chromium.org848fd492014-04-09 19:06:44 +0000373 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
hinoka@google.com563559c2014-04-02 00:36:24 +0000374 return 1
375
376 # First, we need to ensure the cache is populated.
377 populate_args = args[:]
378 populate_args.append('--no_bootstrap')
379 CMDpopulate(parser, populate_args)
380
381 # Get the repo directory.
szager@chromium.org848fd492014-04-09 19:06:44 +0000382 _, args = parser.parse_args(args)
hinoka@google.com563559c2014-04-02 00:36:24 +0000383 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000384 mirror = Mirror(url)
385 mirror.update_bootstrap()
386 return 0
hinoka@google.com563559c2014-04-02 00:36:24 +0000387
388
agable@chromium.org5a306a22014-02-24 22:13:59 +0000389@subcommand.usage('[url of repo to add to or update in cache]')
390def CMDpopulate(parser, args):
391 """Ensure that the cache has all up-to-date objects for the given repo."""
392 parser.add_option('--depth', type='int',
393 help='Only cache DEPTH commits of history')
394 parser.add_option('--shallow', '-s', action='store_true',
395 help='Only cache 10000 commits of history')
396 parser.add_option('--ref', action='append',
397 help='Specify additional refs to be fetched')
hinoka@google.com563559c2014-04-02 00:36:24 +0000398 parser.add_option('--no_bootstrap', action='store_true',
399 help='Don\'t bootstrap from Google Storage')
400
agable@chromium.org5a306a22014-02-24 22:13:59 +0000401 options, args = parser.parse_args(args)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000402 if not len(args) == 1:
403 parser.error('git cache populate only takes exactly one repo url.')
404 url = args[0]
405
szager@chromium.org848fd492014-04-09 19:06:44 +0000406 mirror = Mirror(url, refs=options.ref)
407 kwargs = {
408 'verbose': options.verbose,
409 'shallow': options.shallow,
410 'bootstrap': not options.no_bootstrap,
411 }
agable@chromium.org5a306a22014-02-24 22:13:59 +0000412 if options.depth:
szager@chromium.org848fd492014-04-09 19:06:44 +0000413 kwargs['depth'] = options.depth
414 mirror.populate(**kwargs)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000415
416
417@subcommand.usage('[url of repo to unlock, or -a|--all]')
418def CMDunlock(parser, args):
419 """Unlock one or all repos if their lock files are still around."""
420 parser.add_option('--force', '-f', action='store_true',
421 help='Actually perform the action')
422 parser.add_option('--all', '-a', action='store_true',
423 help='Unlock all repository caches')
424 options, args = parser.parse_args(args)
425 if len(args) > 1 or (len(args) == 0 and not options.all):
426 parser.error('git cache unlock takes exactly one repo url, or --all')
427
szager@chromium.org848fd492014-04-09 19:06:44 +0000428 repo_dirs = []
agable@chromium.org5a306a22014-02-24 22:13:59 +0000429 if not options.all:
430 url = args[0]
szager@chromium.org848fd492014-04-09 19:06:44 +0000431 repo_dirs.append(Mirror(url).mirror_path)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000432 else:
szager@chromium.org848fd492014-04-09 19:06:44 +0000433 cachepath = Mirror.GetCachePath()
434 repo_dirs = [os.path.join(cachepath, path)
435 for path in os.listdir(cachepath)
436 if os.path.isdir(os.path.join(cachepath, path))]
437 repo_dirs.extend([os.path.join(cachepath,
hinoka@google.comb16a1652014-03-05 20:22:00 +0000438 lockfile.replace('.lock', ''))
szager@chromium.org848fd492014-04-09 19:06:44 +0000439 for lockfile in os.listdir(cachepath)
440 if os.path.isfile(os.path.join(cachepath,
hinoka@google.comb16a1652014-03-05 20:22:00 +0000441 lockfile))
442 and lockfile.endswith('.lock')
szager@chromium.org848fd492014-04-09 19:06:44 +0000443 and os.path.join(cachepath, lockfile)
hinoka@google.comb16a1652014-03-05 20:22:00 +0000444 not in repo_dirs])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000445 lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
446 if os.path.exists(repo_dir + '.lock')]
447
448 if not options.force:
449 parser.error('git cache unlock requires -f|--force to do anything. '
450 'Refusing to unlock the following repo caches: '
451 ', '.join(lockfiles))
452
szager@chromium.org848fd492014-04-09 19:06:44 +0000453 unlocked_repos = []
454 untouched_repos = []
agable@chromium.org5a306a22014-02-24 22:13:59 +0000455 for repo_dir in repo_dirs:
456 lf = Lockfile(repo_dir)
hinoka@google.comb16a1652014-03-05 20:22:00 +0000457 config_lock = os.path.join(repo_dir, 'config.lock')
458 unlocked = False
459 if os.path.exists(config_lock):
460 os.remove(config_lock)
461 unlocked = True
agable@chromium.org5a306a22014-02-24 22:13:59 +0000462 if lf.break_lock():
hinoka@google.comb16a1652014-03-05 20:22:00 +0000463 unlocked = True
464
465 if unlocked:
szager@chromium.org848fd492014-04-09 19:06:44 +0000466 unlocked_repos.append(repo_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000467 else:
szager@chromium.org848fd492014-04-09 19:06:44 +0000468 untouched_repos.append(repo_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000469
szager@chromium.org848fd492014-04-09 19:06:44 +0000470 if unlocked_repos:
471 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
472 unlocked_repos))
473 if untouched_repos:
474 logging.debug('Did not touch these caches:\n %s' % '\n '.join(
475 untouched_repos))
agable@chromium.org5a306a22014-02-24 22:13:59 +0000476
477
478class OptionParser(optparse.OptionParser):
479 """Wrapper class for OptionParser to handle global options."""
480
481 def __init__(self, *args, **kwargs):
482 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
483 self.add_option('-c', '--cache-dir',
484 help='Path to the directory containing the cache')
485 self.add_option('-v', '--verbose', action='count', default=0,
486 help='Increase verbosity (can be passed multiple times)')
487
488 def parse_args(self, args=None, values=None):
489 options, args = optparse.OptionParser.parse_args(self, args, values)
490
491 try:
szager@chromium.org848fd492014-04-09 19:06:44 +0000492 global_cache_dir = Mirror.GetCachePath()
493 except RuntimeError:
494 global_cache_dir = None
495 if options.cache_dir:
496 if global_cache_dir and (
497 os.path.abspath(options.cache_dir) !=
498 os.path.abspath(global_cache_dir)):
499 logging.warn('Overriding globally-configured cache directory.')
500 Mirror.SetCachePath(options.cache_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000501
502 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
503 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
504
505 return options, args
506
507
508def main(argv):
509 dispatcher = subcommand.CommandDispatcher(__name__)
510 return dispatcher.execute(OptionParser(), argv)
511
512
513if __name__ == '__main__':
514 sys.exit(main(sys.argv[1:]))