agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """A git command for managing a local cache of git repositories.""" |
| 7 | |
| 8 | import errno |
| 9 | import logging |
| 10 | import optparse |
| 11 | import os |
| 12 | import tempfile |
| 13 | import subprocess |
| 14 | import sys |
| 15 | import urlparse |
| 16 | |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 17 | from download_from_google_storage import Gsutil |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 18 | import gclient_utils |
| 19 | import subcommand |
| 20 | |
| 21 | |
| 22 | GIT_EXECUTABLE = 'git.bat' if sys.platform.startswith('win') else 'git' |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 23 | BOOTSTRAP_BUCKET = 'chromium-git-cache' |
| 24 | GSUTIL_DEFAULT_PATH = os.path.join( |
| 25 | os.path.dirname(os.path.abspath(__file__)), |
| 26 | 'third_party', 'gsutil', 'gsutil') |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 27 | |
| 28 | |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 29 | def UrlToCacheDir(url): |
| 30 | """Convert a git url to a normalized form for the cache dir path.""" |
| 31 | parsed = urlparse.urlparse(url) |
| 32 | norm_url = parsed.netloc + parsed.path |
| 33 | if norm_url.endswith('.git'): |
| 34 | norm_url = norm_url[:-len('.git')] |
| 35 | return norm_url.replace('-', '--').replace('/', '-').lower() |
| 36 | |
| 37 | |
| 38 | def RunGit(cmd, **kwargs): |
| 39 | """Run git in a subprocess.""" |
| 40 | kwargs.setdefault('cwd', os.getcwd()) |
| 41 | if kwargs.get('filter_fn'): |
| 42 | kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn')) |
| 43 | kwargs.setdefault('print_stdout', False) |
| 44 | env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy()) |
| 45 | env.setdefault('GIT_ASKPASS', 'true') |
| 46 | env.setdefault('SSH_ASKPASS', 'true') |
| 47 | else: |
| 48 | kwargs.setdefault('print_stdout', True) |
| 49 | stdout = kwargs.get('stdout', sys.stdout) |
| 50 | print >> stdout, 'running "git %s" in "%s"' % (' '.join(cmd), kwargs['cwd']) |
| 51 | gclient_utils.CheckCallAndFilter([GIT_EXECUTABLE] + cmd, **kwargs) |
| 52 | |
| 53 | |
| 54 | class LockError(Exception): |
| 55 | pass |
| 56 | |
| 57 | |
| 58 | class Lockfile(object): |
| 59 | """Class to represent a cross-platform process-specific lockfile.""" |
| 60 | |
| 61 | def __init__(self, path): |
| 62 | self.path = os.path.abspath(path) |
| 63 | self.lockfile = self.path + ".lock" |
| 64 | self.pid = os.getpid() |
| 65 | |
| 66 | def _read_pid(self): |
| 67 | """Read the pid stored in the lockfile. |
| 68 | |
| 69 | Note: This method is potentially racy. By the time it returns the lockfile |
| 70 | may have been unlocked, removed, or stolen by some other process. |
| 71 | """ |
| 72 | try: |
| 73 | with open(self.lockfile, 'r') as f: |
| 74 | pid = int(f.readline().strip()) |
| 75 | except (IOError, ValueError): |
| 76 | pid = None |
| 77 | return pid |
| 78 | |
| 79 | def _make_lockfile(self): |
| 80 | """Safely creates a lockfile containing the current pid.""" |
| 81 | open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) |
| 82 | fd = os.open(self.lockfile, open_flags, 0o644) |
| 83 | f = os.fdopen(fd, 'w') |
| 84 | print >> f, self.pid |
| 85 | f.close() |
| 86 | |
| 87 | def _remove_lockfile(self): |
| 88 | """Delete the lockfile. Complains (implicitly) if it doesn't exist.""" |
| 89 | os.remove(self.lockfile) |
| 90 | |
| 91 | def lock(self): |
| 92 | """Acquire the lock. |
| 93 | |
| 94 | Note: This is a NON-BLOCKING FAIL-FAST operation. |
| 95 | Do. Or do not. There is no try. |
| 96 | """ |
| 97 | try: |
| 98 | self._make_lockfile() |
| 99 | except OSError as e: |
| 100 | if e.errno == errno.EEXIST: |
| 101 | raise LockError("%s is already locked" % self.path) |
| 102 | else: |
| 103 | raise LockError("Failed to create %s (err %s)" % (self.path, e.errno)) |
| 104 | |
| 105 | def unlock(self): |
| 106 | """Release the lock.""" |
| 107 | if not self.is_locked(): |
| 108 | raise LockError("%s is not locked" % self.path) |
| 109 | if not self.i_am_locking(): |
| 110 | raise LockError("%s is locked, but not by me" % self.path) |
| 111 | self._remove_lockfile() |
| 112 | |
| 113 | def break_lock(self): |
| 114 | """Remove the lock, even if it was created by someone else.""" |
| 115 | try: |
| 116 | self._remove_lockfile() |
| 117 | return True |
| 118 | except OSError as exc: |
| 119 | if exc.errno == errno.ENOENT: |
| 120 | return False |
| 121 | else: |
| 122 | raise |
| 123 | |
| 124 | def is_locked(self): |
| 125 | """Test if the file is locked by anyone. |
| 126 | |
| 127 | Note: This method is potentially racy. By the time it returns the lockfile |
| 128 | may have been unlocked, removed, or stolen by some other process. |
| 129 | """ |
| 130 | return os.path.exists(self.lockfile) |
| 131 | |
| 132 | def i_am_locking(self): |
| 133 | """Test if the file is locked by this process.""" |
| 134 | return self.is_locked() and self.pid == self._read_pid() |
| 135 | |
| 136 | def __enter__(self): |
| 137 | self.lock() |
| 138 | return self |
| 139 | |
| 140 | def __exit__(self, *_exc): |
| 141 | self.unlock() |
| 142 | |
| 143 | |
| 144 | @subcommand.usage('[url of repo to check for caching]') |
| 145 | def CMDexists(parser, args): |
| 146 | """Check to see if there already is a cache of the given repo.""" |
| 147 | options, args = parser.parse_args(args) |
| 148 | if not len(args) == 1: |
| 149 | parser.error('git cache exists only takes exactly one repo url.') |
| 150 | url = args[0] |
| 151 | repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url)) |
| 152 | flag_file = os.path.join(repo_dir, 'config') |
| 153 | if os.path.isdir(repo_dir) and os.path.isfile(flag_file): |
| 154 | print repo_dir |
| 155 | return 0 |
| 156 | return 1 |
| 157 | |
| 158 | |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 159 | @subcommand.usage('[url of repo to create a bootstrap zip file]') |
| 160 | def CMDupdate_bootstrap(parser, args): |
| 161 | """Create and uploads a bootstrap tarball.""" |
| 162 | # Lets just assert we can't do this on Windows. |
| 163 | if sys.platform.startswith('win'): |
| 164 | print >> sys.stderr, 'Sorry, update bootstrap will not work on Windows.' |
| 165 | return 1 |
| 166 | |
| 167 | # First, we need to ensure the cache is populated. |
| 168 | populate_args = args[:] |
| 169 | populate_args.append('--no_bootstrap') |
| 170 | CMDpopulate(parser, populate_args) |
| 171 | |
| 172 | # Get the repo directory. |
| 173 | options, args = parser.parse_args(args) |
| 174 | url = args[0] |
| 175 | repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url)) |
| 176 | |
| 177 | # The files are named <git number>.zip |
| 178 | gen_number = subprocess.check_output(['git', 'number', 'master'], |
| 179 | cwd=repo_dir).strip() |
| 180 | RunGit(['gc'], cwd=repo_dir) # Run Garbage Collect to compress packfile. |
| 181 | # Creating a temp file and then deleting it ensures we can use this name. |
| 182 | _, tmp_zipfile = tempfile.mkstemp(suffix='.zip') |
| 183 | os.remove(tmp_zipfile) |
| 184 | subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=repo_dir) |
| 185 | gsutil = Gsutil(path=GSUTIL_DEFAULT_PATH, boto_path=None) |
| 186 | dest_name = 'gs://%s/%s/%s.zip' % (BOOTSTRAP_BUCKET, |
| 187 | UrlToCacheDir(url), |
| 188 | gen_number) |
| 189 | gsutil.call('cp', tmp_zipfile, dest_name) |
| 190 | os.remove(tmp_zipfile) |
| 191 | |
| 192 | |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 193 | @subcommand.usage('[url of repo to add to or update in cache]') |
| 194 | def CMDpopulate(parser, args): |
| 195 | """Ensure that the cache has all up-to-date objects for the given repo.""" |
| 196 | parser.add_option('--depth', type='int', |
| 197 | help='Only cache DEPTH commits of history') |
| 198 | parser.add_option('--shallow', '-s', action='store_true', |
| 199 | help='Only cache 10000 commits of history') |
| 200 | parser.add_option('--ref', action='append', |
| 201 | help='Specify additional refs to be fetched') |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 202 | parser.add_option('--no_bootstrap', action='store_true', |
| 203 | help='Don\'t bootstrap from Google Storage') |
| 204 | |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 205 | options, args = parser.parse_args(args) |
| 206 | if options.shallow and not options.depth: |
| 207 | options.depth = 10000 |
| 208 | if not len(args) == 1: |
| 209 | parser.error('git cache populate only takes exactly one repo url.') |
| 210 | url = args[0] |
| 211 | |
| 212 | gclient_utils.safe_makedirs(options.cache_dir) |
| 213 | repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url)) |
| 214 | |
| 215 | v = [] |
| 216 | filter_fn = lambda l: '[up to date]' not in l |
| 217 | if options.verbose: |
| 218 | v = ['-v', '--progress'] |
| 219 | filter_fn = None |
| 220 | |
| 221 | d = [] |
| 222 | if options.depth: |
| 223 | d = ['--depth', '%d' % options.depth] |
| 224 | |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 225 | def _find(executable): |
| 226 | """This mimics the "which" utility.""" |
| 227 | path_folders = os.environ.get('PATH').split(os.pathsep) |
| 228 | |
| 229 | for path_folder in path_folders: |
| 230 | target = os.path.join(path_folder, executable) |
| 231 | # Just incase we have some ~/blah paths. |
| 232 | target = os.path.abspath(os.path.expanduser(target)) |
| 233 | if os.path.isfile(target) and os.access(target, os.X_OK): |
| 234 | return target |
| 235 | return False |
| 236 | |
| 237 | def _maybe_bootstrap_repo(directory): |
| 238 | """Bootstrap the repo from Google Stroage if possible. |
| 239 | |
| 240 | Requires 7z on Windows and Unzip on Linux/Mac. |
| 241 | """ |
| 242 | if options.no_bootstrap: |
| 243 | return False |
| 244 | if sys.platform.startswith('win'): |
| 245 | if not _find('7z'): |
| 246 | print 'Cannot find 7z in the path.' |
| 247 | print 'If you want git cache to be able to bootstrap from ' |
| 248 | print 'Google Storage, please install 7z from:' |
| 249 | print 'http://www.7-zip.org/download.html' |
| 250 | return False |
| 251 | else: |
| 252 | if not _find('unzip'): |
| 253 | print 'Cannot find unzip in the path.' |
| 254 | print 'If you want git cache to be able to bootstrap from ' |
| 255 | print 'Google Storage, please ensure unzip is present on your system.' |
| 256 | return False |
| 257 | |
| 258 | folder = UrlToCacheDir(url) |
| 259 | gs_folder = 'gs://%s/%s' % (BOOTSTRAP_BUCKET, folder) |
| 260 | gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=os.devnull, |
| 261 | bypass_prodaccess=True) |
| 262 | # Get the most recent version of the zipfile. |
| 263 | _, ls_out, _ = gsutil.check_call('ls', gs_folder) |
| 264 | ls_out_sorted = sorted(ls_out.splitlines()) |
| 265 | if not ls_out_sorted: |
| 266 | # This repo is not on Google Storage. |
| 267 | return False |
| 268 | latest_checkout = ls_out_sorted[-1] |
| 269 | |
| 270 | # Download zip file to a temporary directory. |
| 271 | tempdir = tempfile.mkdtemp() |
| 272 | print 'Downloading %s...' % latest_checkout |
| 273 | code, out, err = gsutil.check_call('cp', latest_checkout, tempdir) |
| 274 | if code: |
| 275 | print '%s\n%s' % (out, err) |
| 276 | return False |
| 277 | filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) |
| 278 | |
| 279 | # Unpack the file with 7z on Windows, or unzip everywhere else. |
| 280 | if sys.platform.startswith('win'): |
| 281 | cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename] |
| 282 | else: |
| 283 | cmd = ['unzip', filename, '-d', directory] |
| 284 | retcode = subprocess.call(cmd) |
| 285 | |
| 286 | # Clean up the downloaded zipfile. |
| 287 | gclient_utils.rmtree(tempdir) |
| 288 | if retcode: |
| 289 | print 'Extracting bootstrap zipfile %s failed.' % filename |
| 290 | print 'Resuming normal operations' |
| 291 | return False |
| 292 | return True |
| 293 | |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 294 | def _config(directory): |
szager@chromium.org | fc61638 | 2014-03-18 20:32:04 +0000 | [diff] [blame] | 295 | RunGit(['config', 'core.deltaBaseCacheLimit', |
| 296 | gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=directory) |
agable@chromium.org | 99f9c92 | 2014-03-12 01:43:39 +0000 | [diff] [blame] | 297 | RunGit(['config', 'remote.origin.url', url], |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 298 | cwd=directory) |
| 299 | RunGit(['config', '--replace-all', 'remote.origin.fetch', |
| 300 | '+refs/heads/*:refs/heads/*'], |
| 301 | cwd=directory) |
hinoka@chromium.org | fc330cb | 2014-02-27 21:33:52 +0000 | [diff] [blame] | 302 | RunGit(['config', '--add', 'remote.origin.fetch', |
| 303 | '+refs/tags/*:refs/tags/*'], |
| 304 | cwd=directory) |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 305 | for ref in options.ref or []: |
| 306 | ref = ref.rstrip('/') |
| 307 | refspec = '+refs/%s/*:refs/%s/*' % (ref, ref) |
| 308 | RunGit(['config', '--add', 'remote.origin.fetch', refspec], |
| 309 | cwd=directory) |
| 310 | |
| 311 | with Lockfile(repo_dir): |
| 312 | # Setup from scratch if the repo is new or is in a bad state. |
| 313 | if not os.path.exists(os.path.join(repo_dir, 'config')): |
| 314 | gclient_utils.rmtree(repo_dir) |
| 315 | tempdir = tempfile.mkdtemp(suffix=UrlToCacheDir(url), |
| 316 | dir=options.cache_dir) |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 317 | bootstrapped = _maybe_bootstrap_repo(tempdir) |
| 318 | if not bootstrapped: |
| 319 | RunGit(['init', '--bare'], cwd=tempdir) |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 320 | _config(tempdir) |
hinoka@chromium.org | fc330cb | 2014-02-27 21:33:52 +0000 | [diff] [blame] | 321 | fetch_cmd = ['fetch'] + v + d + ['origin'] |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 322 | RunGit(fetch_cmd, filter_fn=filter_fn, cwd=tempdir, retry=True) |
| 323 | os.rename(tempdir, repo_dir) |
| 324 | else: |
| 325 | _config(repo_dir) |
| 326 | if options.depth and os.path.exists(os.path.join(repo_dir, 'shallow')): |
| 327 | logging.warn('Shallow fetch requested, but repo cache already exists.') |
hinoka@chromium.org | fc330cb | 2014-02-27 21:33:52 +0000 | [diff] [blame] | 328 | fetch_cmd = ['fetch'] + v + ['origin'] |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 329 | RunGit(fetch_cmd, filter_fn=filter_fn, cwd=repo_dir, retry=True) |
| 330 | |
| 331 | |
| 332 | @subcommand.usage('[url of repo to unlock, or -a|--all]') |
| 333 | def CMDunlock(parser, args): |
| 334 | """Unlock one or all repos if their lock files are still around.""" |
| 335 | parser.add_option('--force', '-f', action='store_true', |
| 336 | help='Actually perform the action') |
| 337 | parser.add_option('--all', '-a', action='store_true', |
| 338 | help='Unlock all repository caches') |
| 339 | options, args = parser.parse_args(args) |
| 340 | if len(args) > 1 or (len(args) == 0 and not options.all): |
| 341 | parser.error('git cache unlock takes exactly one repo url, or --all') |
| 342 | |
| 343 | if not options.all: |
| 344 | url = args[0] |
| 345 | repo_dirs = [os.path.join(options.cache_dir, UrlToCacheDir(url))] |
| 346 | else: |
hinoka@google.com | 267f33e | 2014-02-28 22:02:32 +0000 | [diff] [blame] | 347 | repo_dirs = [os.path.join(options.cache_dir, path) |
| 348 | for path in os.listdir(options.cache_dir) |
| 349 | if os.path.isdir(os.path.join(options.cache_dir, path))] |
hinoka@google.com | b16a165 | 2014-03-05 20:22:00 +0000 | [diff] [blame] | 350 | repo_dirs.extend([os.path.join(options.cache_dir, |
| 351 | lockfile.replace('.lock', '')) |
| 352 | for lockfile in os.listdir(options.cache_dir) |
| 353 | if os.path.isfile(os.path.join(options.cache_dir, |
| 354 | lockfile)) |
| 355 | and lockfile.endswith('.lock') |
| 356 | and os.path.join(options.cache_dir, lockfile) |
| 357 | not in repo_dirs]) |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 358 | lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs |
| 359 | if os.path.exists(repo_dir + '.lock')] |
| 360 | |
| 361 | if not options.force: |
| 362 | parser.error('git cache unlock requires -f|--force to do anything. ' |
| 363 | 'Refusing to unlock the following repo caches: ' |
| 364 | ', '.join(lockfiles)) |
| 365 | |
| 366 | unlocked = [] |
| 367 | untouched = [] |
| 368 | for repo_dir in repo_dirs: |
| 369 | lf = Lockfile(repo_dir) |
hinoka@google.com | b16a165 | 2014-03-05 20:22:00 +0000 | [diff] [blame] | 370 | config_lock = os.path.join(repo_dir, 'config.lock') |
| 371 | unlocked = False |
| 372 | if os.path.exists(config_lock): |
| 373 | os.remove(config_lock) |
| 374 | unlocked = True |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 375 | if lf.break_lock(): |
hinoka@google.com | b16a165 | 2014-03-05 20:22:00 +0000 | [diff] [blame] | 376 | unlocked = True |
| 377 | |
| 378 | if unlocked: |
hinoka@google.com | 563559c | 2014-04-02 00:36:24 +0000 | [diff] [blame^] | 379 | unlocked.append(repo_dir) |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 380 | else: |
| 381 | untouched.append(repo_dir) |
| 382 | |
| 383 | if unlocked: |
| 384 | logging.info('Broke locks on these caches: %s' % unlocked) |
| 385 | if untouched: |
| 386 | logging.debug('Did not touch these caches: %s' % untouched) |
| 387 | |
| 388 | |
| 389 | class OptionParser(optparse.OptionParser): |
| 390 | """Wrapper class for OptionParser to handle global options.""" |
| 391 | |
| 392 | def __init__(self, *args, **kwargs): |
| 393 | optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs) |
| 394 | self.add_option('-c', '--cache-dir', |
| 395 | help='Path to the directory containing the cache') |
| 396 | self.add_option('-v', '--verbose', action='count', default=0, |
| 397 | help='Increase verbosity (can be passed multiple times)') |
| 398 | |
| 399 | def parse_args(self, args=None, values=None): |
| 400 | options, args = optparse.OptionParser.parse_args(self, args, values) |
| 401 | |
| 402 | try: |
| 403 | global_cache_dir = subprocess.check_output( |
| 404 | [GIT_EXECUTABLE, 'config', '--global', 'cache.cachepath']).strip() |
| 405 | if options.cache_dir: |
szager@chromium.org | 38a0f57 | 2014-03-28 06:24:12 +0000 | [diff] [blame] | 406 | if global_cache_dir and ( |
| 407 | os.path.abspath(options.cache_dir) != |
| 408 | os.path.abspath(global_cache_dir)): |
| 409 | logging.warn('Overriding globally-configured cache directory.') |
agable@chromium.org | 5a306a2 | 2014-02-24 22:13:59 +0000 | [diff] [blame] | 410 | else: |
| 411 | options.cache_dir = global_cache_dir |
| 412 | except subprocess.CalledProcessError: |
| 413 | if not options.cache_dir: |
| 414 | self.error('No cache directory specified on command line ' |
| 415 | 'or in cache.cachepath.') |
| 416 | options.cache_dir = os.path.abspath(options.cache_dir) |
| 417 | |
| 418 | levels = [logging.WARNING, logging.INFO, logging.DEBUG] |
| 419 | logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)]) |
| 420 | |
| 421 | return options, args |
| 422 | |
| 423 | |
| 424 | def main(argv): |
| 425 | dispatcher = subcommand.CommandDispatcher(__name__) |
| 426 | return dispatcher.execute(OptionParser(), argv) |
| 427 | |
| 428 | |
| 429 | if __name__ == '__main__': |
| 430 | sys.exit(main(sys.argv[1:])) |