blob: c10052e4c97f77e4f46801a78dceee27bfbd41d7 [file] [log] [blame]
Dirk Pranke11078572022-02-02 13:49:59 -08001#!/usr/bin/env vpython3
2# Copyright 2022 Google LLC
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Download all the LOBS in //site."""
17
18import argparse
19import hashlib
20import io
21import os
22import sys
23import time
24import urllib3
25from urllib.error import HTTPError, URLError
26
27import common
28
29http = None
30
31def main():
32 parser = argparse.ArgumentParser()
33 parser.add_argument('-f', '--force', action='store_true')
34 parser.add_argument('-j', '--jobs', type=int, default=common.cpu_count())
35 parser.add_argument('-m', '--multiprocess', action='store_true',
36 default=False)
37 args = parser.parse_args()
38 q = common.JobQueue(_handle, args.jobs, args.multiprocess)
39 paths = [path.replace('.sha1', '')
40 for path in common.walk(common.SITE_DIR)
41 if path.endswith('.sha1')]
42
43 stdin = ''
44 for path in paths:
45 with open(os.path.join(common.SITE_DIR, path + '.sha1'), 'r') as fp:
46 expected_sha1 = fp.read().strip()
47
48 if not args.force and os.path.exists(os.path.join(common.SITE_DIR, path)):
49 with open(os.path.join(common.SITE_DIR, path), 'rb') as fp:
50 s = hashlib.sha1()
51 s.update(fp.read())
52 actual_sha1 = s.hexdigest()
53 if args.force or (actual_sha1 != expected_sha1):
54 q.request(path, (args, expected_sha1))
55 else:
56 q.request(path, (args, expected_sha1))
57
58 if not len(q.all_tasks()):
59 return 0
60
61 start = time.time()
62 updated = 0
63 failed = False
64 total_bytes = 0
65 for path, res, resp in q.results():
66 did_update, num_bytes = resp
67 if res:
68 print('%s failed: %s' % (path, res))
69 failed = True
70 if did_update:
71 updated += 1
72 total_bytes += num_bytes
73 end = time.time()
74
75 print('Fetched %d LOBs (%.1fMB) in %.3f seconds (%.1fMbps).' %
76 (updated,
77 (total_bytes / 1_000_000),
78 (end - start),
79 (total_bytes * 8 / (end - start) / 1_000_000)))
80 return 1 if failed else 0
81
82
83def _url(expected_sha1):
84 return 'https://storage.googleapis.com/%s/%s' % (
85 'chromium-website-lob-storage', expected_sha1)
86
87
88def _handle(path, obj):
89 args, expected_sha1 = obj
90 global http
91 if http is None:
92 http = urllib3.PoolManager()
93 url = _url(expected_sha1)
94 total_bytes = 0
95 for i in range(4):
96 try:
97 resp = http.request('GET', url)
98 s = hashlib.sha1()
99 s.update(resp.data)
100 actual_sha1 = s.hexdigest()
101 if actual_sha1 != expected_sha1:
102 return ('sha1 mismatch: expected %s, got %s' % (
103 expected_sha1, actual_sha1), (False, len(resp.data)))
104 common.write_binary_file(os.path.join(common.SITE_DIR, path),
105 resp.data)
106 except (HTTPError, URLError, TimeoutError) as e:
107 if i < 4:
108 time.sleep(1)
109 else:
110 return str(e), (False, 0)
111 except Exception as e:
112 return str(e), (False, 0)
113 return '', (True, len(resp.data))
114
115if __name__ == '__main__':
116 sys.exit(main())
117