Dirk Pranke | 1107857 | 2022-02-02 13:49:59 -0800 | [diff] [blame] | 1 | #!/usr/bin/env vpython3 |
| 2 | # Copyright 2022 Google LLC |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | """Download all the LOBS in //site.""" |
| 17 | |
| 18 | import argparse |
| 19 | import hashlib |
| 20 | import io |
| 21 | import os |
| 22 | import sys |
| 23 | import time |
| 24 | import urllib3 |
| 25 | from urllib.error import HTTPError, URLError |
| 26 | |
| 27 | import common |
| 28 | |
| 29 | http = None |
| 30 | |
| 31 | def main(): |
| 32 | parser = argparse.ArgumentParser() |
| 33 | parser.add_argument('-f', '--force', action='store_true') |
| 34 | parser.add_argument('-j', '--jobs', type=int, default=common.cpu_count()) |
| 35 | parser.add_argument('-m', '--multiprocess', action='store_true', |
| 36 | default=False) |
| 37 | args = parser.parse_args() |
| 38 | q = common.JobQueue(_handle, args.jobs, args.multiprocess) |
| 39 | paths = [path.replace('.sha1', '') |
| 40 | for path in common.walk(common.SITE_DIR) |
| 41 | if path.endswith('.sha1')] |
| 42 | |
| 43 | stdin = '' |
| 44 | for path in paths: |
| 45 | with open(os.path.join(common.SITE_DIR, path + '.sha1'), 'r') as fp: |
| 46 | expected_sha1 = fp.read().strip() |
| 47 | |
| 48 | if not args.force and os.path.exists(os.path.join(common.SITE_DIR, path)): |
| 49 | with open(os.path.join(common.SITE_DIR, path), 'rb') as fp: |
| 50 | s = hashlib.sha1() |
| 51 | s.update(fp.read()) |
| 52 | actual_sha1 = s.hexdigest() |
| 53 | if args.force or (actual_sha1 != expected_sha1): |
| 54 | q.request(path, (args, expected_sha1)) |
| 55 | else: |
| 56 | q.request(path, (args, expected_sha1)) |
| 57 | |
| 58 | if not len(q.all_tasks()): |
| 59 | return 0 |
| 60 | |
| 61 | start = time.time() |
| 62 | updated = 0 |
| 63 | failed = False |
| 64 | total_bytes = 0 |
| 65 | for path, res, resp in q.results(): |
| 66 | did_update, num_bytes = resp |
| 67 | if res: |
| 68 | print('%s failed: %s' % (path, res)) |
| 69 | failed = True |
| 70 | if did_update: |
| 71 | updated += 1 |
| 72 | total_bytes += num_bytes |
| 73 | end = time.time() |
| 74 | |
| 75 | print('Fetched %d LOBs (%.1fMB) in %.3f seconds (%.1fMbps).' % |
| 76 | (updated, |
| 77 | (total_bytes / 1_000_000), |
| 78 | (end - start), |
| 79 | (total_bytes * 8 / (end - start) / 1_000_000))) |
| 80 | return 1 if failed else 0 |
| 81 | |
| 82 | |
| 83 | def _url(expected_sha1): |
| 84 | return 'https://storage.googleapis.com/%s/%s' % ( |
| 85 | 'chromium-website-lob-storage', expected_sha1) |
| 86 | |
| 87 | |
| 88 | def _handle(path, obj): |
| 89 | args, expected_sha1 = obj |
| 90 | global http |
| 91 | if http is None: |
| 92 | http = urllib3.PoolManager() |
| 93 | url = _url(expected_sha1) |
| 94 | total_bytes = 0 |
| 95 | for i in range(4): |
| 96 | try: |
| 97 | resp = http.request('GET', url) |
| 98 | s = hashlib.sha1() |
| 99 | s.update(resp.data) |
| 100 | actual_sha1 = s.hexdigest() |
| 101 | if actual_sha1 != expected_sha1: |
| 102 | return ('sha1 mismatch: expected %s, got %s' % ( |
| 103 | expected_sha1, actual_sha1), (False, len(resp.data))) |
| 104 | common.write_binary_file(os.path.join(common.SITE_DIR, path), |
| 105 | resp.data) |
| 106 | except (HTTPError, URLError, TimeoutError) as e: |
| 107 | if i < 4: |
| 108 | time.sleep(1) |
| 109 | else: |
| 110 | return str(e), (False, 0) |
| 111 | except Exception as e: |
| 112 | return str(e), (False, 0) |
| 113 | return '', (True, len(resp.data)) |
| 114 | |
| 115 | if __name__ == '__main__': |
| 116 | sys.exit(main()) |
| 117 | |