Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 1 | # Copyright 2014 The Swarming Authors. All rights reserved. |
| 2 | # Use of this source code is governed under the Apache License, Version 2.0 that |
| 3 | # can be found in the LICENSE file. |
| 4 | |
| 5 | """Understands .isolated files and can do local operations on them.""" |
| 6 | |
| 7 | import hashlib |
| 8 | import re |
| 9 | |
| 10 | |
| 11 | # Version stored and expected in .isolated files. |
| 12 | ISOLATED_FILE_VERSION = '1.4' |
| 13 | |
| 14 | |
| 15 | # Chunk size to use when doing disk I/O. |
| 16 | DISK_FILE_CHUNK = 1024 * 1024 |
| 17 | |
| 18 | |
Marc-Antoine Ruel | 1e7658c | 2014-08-28 19:46:39 -0400 | [diff] [blame^] | 19 | # The file size to be used when we don't know the correct file size, |
| 20 | # generally used for .isolated files. |
| 21 | UNKNOWN_FILE_SIZE = None |
| 22 | |
| 23 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 24 | # Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly |
| 25 | # specify the names here. |
| 26 | SUPPORTED_ALGOS = { |
| 27 | 'md5': hashlib.md5, |
| 28 | 'sha-1': hashlib.sha1, |
| 29 | 'sha-512': hashlib.sha512, |
| 30 | } |
| 31 | |
| 32 | |
| 33 | # Used for serialization. |
| 34 | SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems()) |
| 35 | |
| 36 | |
Marc-Antoine Ruel | 1e7658c | 2014-08-28 19:46:39 -0400 | [diff] [blame^] | 37 | class IsolatedError(ValueError): |
| 38 | """Generic failure to load a .isolated file.""" |
| 39 | pass |
| 40 | |
| 41 | |
| 42 | class MappingError(OSError): |
| 43 | """Failed to recreate the tree.""" |
| 44 | pass |
| 45 | |
| 46 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 47 | def is_valid_hash(value, algo): |
| 48 | """Returns if the value is a valid hash for the corresponding algorithm.""" |
| 49 | size = 2 * algo().digest_size |
| 50 | return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value)) |
| 51 | |
| 52 | |
| 53 | def get_hash_algo(_namespace): |
| 54 | """Return hash algorithm class to use when uploading to given |namespace|.""" |
| 55 | # TODO(vadimsh): Implement this at some point. |
| 56 | return hashlib.sha1 |
| 57 | |
| 58 | |
| 59 | def hash_file(filepath, algo): |
| 60 | """Calculates the hash of a file without reading it all in memory at once. |
| 61 | |
| 62 | |algo| should be one of hashlib hashing algorithm. |
| 63 | """ |
| 64 | digest = algo() |
| 65 | with open(filepath, 'rb') as f: |
| 66 | while True: |
| 67 | chunk = f.read(DISK_FILE_CHUNK) |
| 68 | if not chunk: |
| 69 | break |
| 70 | digest.update(chunk) |
| 71 | return digest.hexdigest() |