Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 1 | # Copyright 2014 The Swarming Authors. All rights reserved. |
| 2 | # Use of this source code is governed under the Apache License, Version 2.0 that |
| 3 | # can be found in the LICENSE file. |
| 4 | |
| 5 | """Understands .isolated files and can do local operations on them.""" |
| 6 | |
| 7 | import hashlib |
| 8 | import re |
| 9 | |
| 10 | |
| 11 | # Version stored and expected in .isolated files. |
| 12 | ISOLATED_FILE_VERSION = '1.4' |
| 13 | |
| 14 | |
| 15 | # Chunk size to use when doing disk I/O. |
| 16 | DISK_FILE_CHUNK = 1024 * 1024 |
| 17 | |
| 18 | |
| 19 | # Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly |
| 20 | # specify the names here. |
| 21 | SUPPORTED_ALGOS = { |
| 22 | 'md5': hashlib.md5, |
| 23 | 'sha-1': hashlib.sha1, |
| 24 | 'sha-512': hashlib.sha512, |
| 25 | } |
| 26 | |
| 27 | |
| 28 | # Used for serialization. |
| 29 | SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems()) |
| 30 | |
| 31 | |
| 32 | def is_valid_hash(value, algo): |
| 33 | """Returns if the value is a valid hash for the corresponding algorithm.""" |
| 34 | size = 2 * algo().digest_size |
| 35 | return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value)) |
| 36 | |
| 37 | |
| 38 | def get_hash_algo(_namespace): |
| 39 | """Return hash algorithm class to use when uploading to given |namespace|.""" |
| 40 | # TODO(vadimsh): Implement this at some point. |
| 41 | return hashlib.sha1 |
| 42 | |
| 43 | |
| 44 | def hash_file(filepath, algo): |
| 45 | """Calculates the hash of a file without reading it all in memory at once. |
| 46 | |
| 47 | |algo| should be one of hashlib hashing algorithm. |
| 48 | """ |
| 49 | digest = algo() |
| 50 | with open(filepath, 'rb') as f: |
| 51 | while True: |
| 52 | chunk = f.read(DISK_FILE_CHUNK) |
| 53 | if not chunk: |
| 54 | break |
| 55 | digest.update(chunk) |
| 56 | return digest.hexdigest() |