Stop using compression for git cache.
Change git cache download from GS git directory directly.
Bug: 943696
Change-Id: Ibe473effbf18d5635736c3ca0ab0ef0bbf21be8b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1575003
Reviewed-by: Andrii Shyshkalov <tandrii@chromium.org>
Commit-Queue: Karen Qian <karenqian@google.com>
diff --git a/git_cache.py b/git_cache.py
index bd534d0..5bc15d7 100755
--- a/git_cache.py
+++ b/git_cache.py
@@ -376,87 +376,54 @@
"""
if not self.bootstrap_bucket:
return False
- python_fallback = (
- (sys.platform.startswith('win') and
- not gclient_utils.FindExecutable('7z')) or
- (not gclient_utils.FindExecutable('unzip')) or
- ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
- )
- gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
gsutil = Gsutil(self.gsutil_exe, boto_path=None)
- # Get the most recent version of the zipfile.
- _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
- def compare_filenames(a, b):
- # |a| and |b| look like gs://.../.../9999.zip. They both have the same
- # gs://bootstrap_bucket/basedir/ prefix because they come from the same
- # `gsutil ls`.
- # This function only compares the numeral parts before .zip.
- regex_pattern = r'/(\d+)\.zip$'
- match_a = re.search(regex_pattern, a)
- match_b = re.search(regex_pattern, b)
- if (match_a is not None) and (match_b is not None):
- num_a = int(match_a.group(1))
- num_b = int(match_b.group(1))
- return cmp(num_a, num_b)
- # If it doesn't match the format, fallback to string comparison.
- return cmp(a, b)
+ # Get the most recent version of the directory.
+ # This is determined from the most recent version of a .ready file.
+ # The .ready file is only uploaded when an entire directory has been
+ # uploaded to GS.
+ _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
- ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames)
- if not ls_out_sorted:
- # This repo is not on Google Storage.
+ ready_file_pattern = re.compile(r'.*/(\d+).ready$')
+
+ objects = set(ls_out.strip().splitlines())
+ ready_dirs = []
+
+ for name in objects:
+ m = ready_file_pattern.match(name)
+ # Given <path>/<number>.ready,
+ # we are interested in <path>/<number> directory
+
+ if m and (name[:-len('.ready')] + '/') in objects:
+ ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
+
+ if not ready_dirs:
self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
(self.mirror_path, self.bootstrap_bucket,
- ' '.join((ls_err or '').splitlines(True))))
+ ' '.join((ls_err or '').splitlines(True))))
return False
- latest_checkout = ls_out_sorted[-1]
+ latest_dir = max(ready_dirs)[1]
- # Download zip file to a temporary directory.
try:
+ # create new temporary directory locally
tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
- self.print('Downloading %s' % latest_checkout)
+ self.RunGit(['init', '--bare'], cwd=tempdir)
+ self.print('Downloading files in %s/* into %s.' %
+ (latest_dir, tempdir))
with self.print_duration_of('download'):
- code = gsutil.call('cp', latest_checkout, tempdir)
+ code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
+ tempdir)
if code:
return False
- filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
-
- # Unpack the file with 7z on Windows, unzip on linux, or fallback.
- with self.print_duration_of('unzip'):
- if not python_fallback:
- if sys.platform.startswith('win'):
- cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
- else:
- cmd = ['unzip', filename, '-d', directory]
- retcode = subprocess.call(cmd)
- else:
- try:
- with zipfile.ZipFile(filename, 'r') as f:
- f.printdir()
- f.extractall(directory)
- except Exception as e:
- self.print('Encountered error: %s' % str(e), file=sys.stderr)
- retcode = 1
- else:
- retcode = 0
- finally:
- # Clean up the downloaded zipfile.
- #
- # This is somehow racy on Windows.
- # Catching OSError because WindowsError isn't portable and
- # pylint complains.
- exponential_backoff_retry(
- lambda: gclient_utils.rm_file_or_tree(tempdir),
- excs=(OSError,),
- name='rmtree [%s]' % (tempdir,),
- printerr=self.print)
-
- if retcode:
- self.print(
- 'Extracting bootstrap zipfile %s failed.\n'
- 'Resuming normal operations.' % filename)
+ except Exception as e:
+ self.print('Encountered error: %s' % str(e), file=sys.stderr)
+ gclient_utils.rmtree(tempdir)
return False
+ # delete the old directory
+ if os.path.exists(directory):
+ gclient_utils.rmtree(directory)
+ self.Rename(tempdir, directory)
return True
def contains_revision(self, revision):
@@ -507,47 +474,45 @@
% os.path.join(self.mirror_path, 'config'))
def _ensure_bootstrapped(self, depth, bootstrap, force=False):
- tempdir = None
pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
pack_files = []
-
if os.path.isdir(pack_dir):
pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
self.print('%s has %d .pack files, re-bootstrapping if >%d' %
- (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
+ (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
should_bootstrap = (force or
not self.exists() or
len(pack_files) > GC_AUTOPACKLIMIT)
- if should_bootstrap:
- if self.exists():
- # Re-bootstrapping an existing mirror; preserve existing fetch spec.
- self._preserve_fetchspec()
- tempdir = tempfile.mkdtemp(
- prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
- bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
- if bootstrapped:
- # Bootstrap succeeded; delete previous cache, if any.
- gclient_utils.rmtree(self.mirror_path)
- elif not self.exists() or not self.supported_project():
- # Bootstrap failed due to either
- # 1. No previous cache
- # 2. Project doesn't have a bootstrap zip file
+
+ if not should_bootstrap:
+ if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
+ logging.warn(
+ 'Shallow fetch requested, but repo cache already exists.')
+ return
+
+ if self.exists():
+ # Re-bootstrapping an existing mirror; preserve existing fetch spec.
+ self._preserve_fetchspec()
+ else:
+ os.mkdir(self.mirror_path)
+
+ bootstrapped = (not depth and bootstrap and
+ self.bootstrap_repo(self.mirror_path))
+
+ if not bootstrapped:
+ if not self.exists() or not self.supported_project():
+ # Bootstrap failed due to:
+ # 1. No previous cache.
+ # 2. Project doesn't have a bootstrap folder.
# Start with a bare git dir.
- self.RunGit(['init', '--bare'], cwd=tempdir)
+ self.RunGit(['init', '--bare'], cwd=self.mirror_path)
else:
# Bootstrap failed, previous cache exists; warn and continue.
logging.warn(
'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
'but failed. Continuing with non-optimized repository.'
% len(pack_files))
- gclient_utils.rmtree(tempdir)
- tempdir = None
- else:
- if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
- logging.warn(
- 'Shallow fetch requested, but repo cache already exists.')
- return tempdir
def _fetch(self, rundir, verbose, depth, reset_fetch_config):
self.config(rundir, reset_fetch_config)
@@ -583,23 +548,16 @@
if not ignore_lock:
lockfile.lock()
- tempdir = None
try:
- tempdir = self._ensure_bootstrapped(depth, bootstrap)
- rundir = tempdir or self.mirror_path
- self._fetch(rundir, verbose, depth, reset_fetch_config)
+ self._ensure_bootstrapped(depth, bootstrap)
+ self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
except ClobberNeeded:
# This is a major failure, we need to clean and force a bootstrap.
- gclient_utils.rmtree(rundir)
+ gclient_utils.rmtree(self.mirror_path)
self.print(GIT_CACHE_CORRUPT_MESSAGE)
- tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
- assert tempdir
- self._fetch(tempdir, verbose, depth, reset_fetch_config)
+ self._ensure_bootstrapped(depth, bootstrap, force=True)
+ self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
finally:
- if tempdir:
- if os.path.exists(self.mirror_path):
- gclient_utils.rmtree(self.mirror_path)
- self.Rename(tempdir, self.mirror_path)
if not ignore_lock:
lockfile.unlock()
@@ -906,4 +864,4 @@
sys.exit(main(sys.argv[1:]))
except KeyboardInterrupt:
sys.stderr.write('interrupted\n')
- sys.exit(1)
+ sys.exit(1)
\ No newline at end of file