blob: be44ec50b038171cf2333cfde11b221e3f6aa18f [file] [log] [blame]
agable@chromium.org5a306a22014-02-24 22:13:59 +00001#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
8import errno
9import logging
10import optparse
11import os
12import tempfile
13import subprocess
14import sys
15import urlparse
16
hinoka@google.com563559c2014-04-02 00:36:24 +000017from download_from_google_storage import Gsutil
agable@chromium.org5a306a22014-02-24 22:13:59 +000018import gclient_utils
19import subcommand
20
21
22GIT_EXECUTABLE = 'git.bat' if sys.platform.startswith('win') else 'git'
hinoka@google.com563559c2014-04-02 00:36:24 +000023BOOTSTRAP_BUCKET = 'chromium-git-cache'
24GSUTIL_DEFAULT_PATH = os.path.join(
25 os.path.dirname(os.path.abspath(__file__)),
26 'third_party', 'gsutil', 'gsutil')
agable@chromium.org5a306a22014-02-24 22:13:59 +000027
28
agable@chromium.org5a306a22014-02-24 22:13:59 +000029def UrlToCacheDir(url):
30 """Convert a git url to a normalized form for the cache dir path."""
31 parsed = urlparse.urlparse(url)
32 norm_url = parsed.netloc + parsed.path
33 if norm_url.endswith('.git'):
34 norm_url = norm_url[:-len('.git')]
35 return norm_url.replace('-', '--').replace('/', '-').lower()
36
37
38def RunGit(cmd, **kwargs):
39 """Run git in a subprocess."""
40 kwargs.setdefault('cwd', os.getcwd())
41 if kwargs.get('filter_fn'):
42 kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn'))
43 kwargs.setdefault('print_stdout', False)
44 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
45 env.setdefault('GIT_ASKPASS', 'true')
46 env.setdefault('SSH_ASKPASS', 'true')
47 else:
48 kwargs.setdefault('print_stdout', True)
49 stdout = kwargs.get('stdout', sys.stdout)
50 print >> stdout, 'running "git %s" in "%s"' % (' '.join(cmd), kwargs['cwd'])
51 gclient_utils.CheckCallAndFilter([GIT_EXECUTABLE] + cmd, **kwargs)
52
53
54class LockError(Exception):
55 pass
56
57
58class Lockfile(object):
59 """Class to represent a cross-platform process-specific lockfile."""
60
61 def __init__(self, path):
62 self.path = os.path.abspath(path)
63 self.lockfile = self.path + ".lock"
64 self.pid = os.getpid()
65
66 def _read_pid(self):
67 """Read the pid stored in the lockfile.
68
69 Note: This method is potentially racy. By the time it returns the lockfile
70 may have been unlocked, removed, or stolen by some other process.
71 """
72 try:
73 with open(self.lockfile, 'r') as f:
74 pid = int(f.readline().strip())
75 except (IOError, ValueError):
76 pid = None
77 return pid
78
79 def _make_lockfile(self):
80 """Safely creates a lockfile containing the current pid."""
81 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
82 fd = os.open(self.lockfile, open_flags, 0o644)
83 f = os.fdopen(fd, 'w')
84 print >> f, self.pid
85 f.close()
86
87 def _remove_lockfile(self):
88 """Delete the lockfile. Complains (implicitly) if it doesn't exist."""
89 os.remove(self.lockfile)
90
91 def lock(self):
92 """Acquire the lock.
93
94 Note: This is a NON-BLOCKING FAIL-FAST operation.
95 Do. Or do not. There is no try.
96 """
97 try:
98 self._make_lockfile()
99 except OSError as e:
100 if e.errno == errno.EEXIST:
101 raise LockError("%s is already locked" % self.path)
102 else:
103 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
104
105 def unlock(self):
106 """Release the lock."""
107 if not self.is_locked():
108 raise LockError("%s is not locked" % self.path)
109 if not self.i_am_locking():
110 raise LockError("%s is locked, but not by me" % self.path)
111 self._remove_lockfile()
112
113 def break_lock(self):
114 """Remove the lock, even if it was created by someone else."""
115 try:
116 self._remove_lockfile()
117 return True
118 except OSError as exc:
119 if exc.errno == errno.ENOENT:
120 return False
121 else:
122 raise
123
124 def is_locked(self):
125 """Test if the file is locked by anyone.
126
127 Note: This method is potentially racy. By the time it returns the lockfile
128 may have been unlocked, removed, or stolen by some other process.
129 """
130 return os.path.exists(self.lockfile)
131
132 def i_am_locking(self):
133 """Test if the file is locked by this process."""
134 return self.is_locked() and self.pid == self._read_pid()
135
136 def __enter__(self):
137 self.lock()
138 return self
139
140 def __exit__(self, *_exc):
141 self.unlock()
142
143
144@subcommand.usage('[url of repo to check for caching]')
145def CMDexists(parser, args):
146 """Check to see if there already is a cache of the given repo."""
147 options, args = parser.parse_args(args)
148 if not len(args) == 1:
149 parser.error('git cache exists only takes exactly one repo url.')
150 url = args[0]
151 repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
152 flag_file = os.path.join(repo_dir, 'config')
153 if os.path.isdir(repo_dir) and os.path.isfile(flag_file):
154 print repo_dir
155 return 0
156 return 1
157
158
hinoka@google.com563559c2014-04-02 00:36:24 +0000159@subcommand.usage('[url of repo to create a bootstrap zip file]')
160def CMDupdate_bootstrap(parser, args):
161 """Create and uploads a bootstrap tarball."""
162 # Lets just assert we can't do this on Windows.
163 if sys.platform.startswith('win'):
164 print >> sys.stderr, 'Sorry, update bootstrap will not work on Windows.'
165 return 1
166
167 # First, we need to ensure the cache is populated.
168 populate_args = args[:]
169 populate_args.append('--no_bootstrap')
170 CMDpopulate(parser, populate_args)
171
172 # Get the repo directory.
173 options, args = parser.parse_args(args)
174 url = args[0]
175 repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
176
177 # The files are named <git number>.zip
178 gen_number = subprocess.check_output(['git', 'number', 'master'],
179 cwd=repo_dir).strip()
180 RunGit(['gc'], cwd=repo_dir) # Run Garbage Collect to compress packfile.
181 # Creating a temp file and then deleting it ensures we can use this name.
182 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
183 os.remove(tmp_zipfile)
184 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=repo_dir)
185 gsutil = Gsutil(path=GSUTIL_DEFAULT_PATH, boto_path=None)
186 dest_name = 'gs://%s/%s/%s.zip' % (BOOTSTRAP_BUCKET,
187 UrlToCacheDir(url),
188 gen_number)
189 gsutil.call('cp', tmp_zipfile, dest_name)
190 os.remove(tmp_zipfile)
191
192
agable@chromium.org5a306a22014-02-24 22:13:59 +0000193@subcommand.usage('[url of repo to add to or update in cache]')
194def CMDpopulate(parser, args):
195 """Ensure that the cache has all up-to-date objects for the given repo."""
196 parser.add_option('--depth', type='int',
197 help='Only cache DEPTH commits of history')
198 parser.add_option('--shallow', '-s', action='store_true',
199 help='Only cache 10000 commits of history')
200 parser.add_option('--ref', action='append',
201 help='Specify additional refs to be fetched')
hinoka@google.com563559c2014-04-02 00:36:24 +0000202 parser.add_option('--no_bootstrap', action='store_true',
203 help='Don\'t bootstrap from Google Storage')
204
agable@chromium.org5a306a22014-02-24 22:13:59 +0000205 options, args = parser.parse_args(args)
206 if options.shallow and not options.depth:
207 options.depth = 10000
208 if not len(args) == 1:
209 parser.error('git cache populate only takes exactly one repo url.')
210 url = args[0]
211
212 gclient_utils.safe_makedirs(options.cache_dir)
213 repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
214
215 v = []
216 filter_fn = lambda l: '[up to date]' not in l
217 if options.verbose:
218 v = ['-v', '--progress']
219 filter_fn = None
220
221 d = []
222 if options.depth:
223 d = ['--depth', '%d' % options.depth]
224
hinoka@google.com563559c2014-04-02 00:36:24 +0000225 def _find(executable):
226 """This mimics the "which" utility."""
227 path_folders = os.environ.get('PATH').split(os.pathsep)
228
229 for path_folder in path_folders:
230 target = os.path.join(path_folder, executable)
231 # Just incase we have some ~/blah paths.
232 target = os.path.abspath(os.path.expanduser(target))
233 if os.path.isfile(target) and os.access(target, os.X_OK):
234 return target
235 return False
236
237 def _maybe_bootstrap_repo(directory):
238 """Bootstrap the repo from Google Stroage if possible.
239
240 Requires 7z on Windows and Unzip on Linux/Mac.
241 """
242 if options.no_bootstrap:
243 return False
244 if sys.platform.startswith('win'):
245 if not _find('7z'):
246 print 'Cannot find 7z in the path.'
247 print 'If you want git cache to be able to bootstrap from '
248 print 'Google Storage, please install 7z from:'
249 print 'http://www.7-zip.org/download.html'
250 return False
251 else:
252 if not _find('unzip'):
253 print 'Cannot find unzip in the path.'
254 print 'If you want git cache to be able to bootstrap from '
255 print 'Google Storage, please ensure unzip is present on your system.'
256 return False
257
258 folder = UrlToCacheDir(url)
259 gs_folder = 'gs://%s/%s' % (BOOTSTRAP_BUCKET, folder)
260 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=os.devnull,
261 bypass_prodaccess=True)
262 # Get the most recent version of the zipfile.
263 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
264 ls_out_sorted = sorted(ls_out.splitlines())
265 if not ls_out_sorted:
266 # This repo is not on Google Storage.
267 return False
268 latest_checkout = ls_out_sorted[-1]
269
270 # Download zip file to a temporary directory.
271 tempdir = tempfile.mkdtemp()
272 print 'Downloading %s...' % latest_checkout
273 code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
274 if code:
275 print '%s\n%s' % (out, err)
276 return False
277 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
278
279 # Unpack the file with 7z on Windows, or unzip everywhere else.
280 if sys.platform.startswith('win'):
281 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
282 else:
283 cmd = ['unzip', filename, '-d', directory]
284 retcode = subprocess.call(cmd)
285
286 # Clean up the downloaded zipfile.
287 gclient_utils.rmtree(tempdir)
288 if retcode:
289 print 'Extracting bootstrap zipfile %s failed.' % filename
290 print 'Resuming normal operations'
291 return False
292 return True
293
agable@chromium.org5a306a22014-02-24 22:13:59 +0000294 def _config(directory):
szager@chromium.orgfc616382014-03-18 20:32:04 +0000295 RunGit(['config', 'core.deltaBaseCacheLimit',
296 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=directory)
agable@chromium.org99f9c922014-03-12 01:43:39 +0000297 RunGit(['config', 'remote.origin.url', url],
agable@chromium.org5a306a22014-02-24 22:13:59 +0000298 cwd=directory)
299 RunGit(['config', '--replace-all', 'remote.origin.fetch',
300 '+refs/heads/*:refs/heads/*'],
301 cwd=directory)
hinoka@chromium.orgfc330cb2014-02-27 21:33:52 +0000302 RunGit(['config', '--add', 'remote.origin.fetch',
303 '+refs/tags/*:refs/tags/*'],
304 cwd=directory)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000305 for ref in options.ref or []:
306 ref = ref.rstrip('/')
307 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
308 RunGit(['config', '--add', 'remote.origin.fetch', refspec],
309 cwd=directory)
310
311 with Lockfile(repo_dir):
312 # Setup from scratch if the repo is new or is in a bad state.
313 if not os.path.exists(os.path.join(repo_dir, 'config')):
314 gclient_utils.rmtree(repo_dir)
315 tempdir = tempfile.mkdtemp(suffix=UrlToCacheDir(url),
316 dir=options.cache_dir)
hinoka@google.com563559c2014-04-02 00:36:24 +0000317 bootstrapped = _maybe_bootstrap_repo(tempdir)
318 if not bootstrapped:
319 RunGit(['init', '--bare'], cwd=tempdir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000320 _config(tempdir)
hinoka@chromium.orgfc330cb2014-02-27 21:33:52 +0000321 fetch_cmd = ['fetch'] + v + d + ['origin']
agable@chromium.org5a306a22014-02-24 22:13:59 +0000322 RunGit(fetch_cmd, filter_fn=filter_fn, cwd=tempdir, retry=True)
323 os.rename(tempdir, repo_dir)
324 else:
325 _config(repo_dir)
326 if options.depth and os.path.exists(os.path.join(repo_dir, 'shallow')):
327 logging.warn('Shallow fetch requested, but repo cache already exists.')
hinoka@chromium.orgfc330cb2014-02-27 21:33:52 +0000328 fetch_cmd = ['fetch'] + v + ['origin']
agable@chromium.org5a306a22014-02-24 22:13:59 +0000329 RunGit(fetch_cmd, filter_fn=filter_fn, cwd=repo_dir, retry=True)
330
331
332@subcommand.usage('[url of repo to unlock, or -a|--all]')
333def CMDunlock(parser, args):
334 """Unlock one or all repos if their lock files are still around."""
335 parser.add_option('--force', '-f', action='store_true',
336 help='Actually perform the action')
337 parser.add_option('--all', '-a', action='store_true',
338 help='Unlock all repository caches')
339 options, args = parser.parse_args(args)
340 if len(args) > 1 or (len(args) == 0 and not options.all):
341 parser.error('git cache unlock takes exactly one repo url, or --all')
342
343 if not options.all:
344 url = args[0]
345 repo_dirs = [os.path.join(options.cache_dir, UrlToCacheDir(url))]
346 else:
hinoka@google.com267f33e2014-02-28 22:02:32 +0000347 repo_dirs = [os.path.join(options.cache_dir, path)
348 for path in os.listdir(options.cache_dir)
349 if os.path.isdir(os.path.join(options.cache_dir, path))]
hinoka@google.comb16a1652014-03-05 20:22:00 +0000350 repo_dirs.extend([os.path.join(options.cache_dir,
351 lockfile.replace('.lock', ''))
352 for lockfile in os.listdir(options.cache_dir)
353 if os.path.isfile(os.path.join(options.cache_dir,
354 lockfile))
355 and lockfile.endswith('.lock')
356 and os.path.join(options.cache_dir, lockfile)
357 not in repo_dirs])
agable@chromium.org5a306a22014-02-24 22:13:59 +0000358 lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
359 if os.path.exists(repo_dir + '.lock')]
360
361 if not options.force:
362 parser.error('git cache unlock requires -f|--force to do anything. '
363 'Refusing to unlock the following repo caches: '
364 ', '.join(lockfiles))
365
366 unlocked = []
367 untouched = []
368 for repo_dir in repo_dirs:
369 lf = Lockfile(repo_dir)
hinoka@google.comb16a1652014-03-05 20:22:00 +0000370 config_lock = os.path.join(repo_dir, 'config.lock')
371 unlocked = False
372 if os.path.exists(config_lock):
373 os.remove(config_lock)
374 unlocked = True
agable@chromium.org5a306a22014-02-24 22:13:59 +0000375 if lf.break_lock():
hinoka@google.comb16a1652014-03-05 20:22:00 +0000376 unlocked = True
377
378 if unlocked:
hinoka@google.com563559c2014-04-02 00:36:24 +0000379 unlocked.append(repo_dir)
agable@chromium.org5a306a22014-02-24 22:13:59 +0000380 else:
381 untouched.append(repo_dir)
382
383 if unlocked:
384 logging.info('Broke locks on these caches: %s' % unlocked)
385 if untouched:
386 logging.debug('Did not touch these caches: %s' % untouched)
387
388
389class OptionParser(optparse.OptionParser):
390 """Wrapper class for OptionParser to handle global options."""
391
392 def __init__(self, *args, **kwargs):
393 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
394 self.add_option('-c', '--cache-dir',
395 help='Path to the directory containing the cache')
396 self.add_option('-v', '--verbose', action='count', default=0,
397 help='Increase verbosity (can be passed multiple times)')
398
399 def parse_args(self, args=None, values=None):
400 options, args = optparse.OptionParser.parse_args(self, args, values)
401
402 try:
403 global_cache_dir = subprocess.check_output(
404 [GIT_EXECUTABLE, 'config', '--global', 'cache.cachepath']).strip()
405 if options.cache_dir:
szager@chromium.org38a0f572014-03-28 06:24:12 +0000406 if global_cache_dir and (
407 os.path.abspath(options.cache_dir) !=
408 os.path.abspath(global_cache_dir)):
409 logging.warn('Overriding globally-configured cache directory.')
agable@chromium.org5a306a22014-02-24 22:13:59 +0000410 else:
411 options.cache_dir = global_cache_dir
412 except subprocess.CalledProcessError:
413 if not options.cache_dir:
414 self.error('No cache directory specified on command line '
415 'or in cache.cachepath.')
416 options.cache_dir = os.path.abspath(options.cache_dir)
417
418 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
419 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
420
421 return options, args
422
423
424def main(argv):
425 dispatcher = subcommand.CommandDispatcher(__name__)
426 return dispatcher.execute(OptionParser(), argv)
427
428
429if __name__ == '__main__':
430 sys.exit(main(sys.argv[1:]))