blob: 719efe0769dd1186630f90e5d0a513840762fda8 [file] [log] [blame]
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001# Copyright 2014 The Swarming Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0 that
3# can be found in the LICENSE file.
4
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
8import re
9
10
11# Version stored and expected in .isolated files.
12ISOLATED_FILE_VERSION = '1.4'
13
14
15# Chunk size to use when doing disk I/O.
16DISK_FILE_CHUNK = 1024 * 1024
17
18
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040019# The file size to be used when we don't know the correct file size,
20# generally used for .isolated files.
21UNKNOWN_FILE_SIZE = None
22
23
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040024# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
25# specify the names here.
26SUPPORTED_ALGOS = {
27 'md5': hashlib.md5,
28 'sha-1': hashlib.sha1,
29 'sha-512': hashlib.sha512,
30}
31
32
33# Used for serialization.
34SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
35
36
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040037class IsolatedError(ValueError):
38 """Generic failure to load a .isolated file."""
39 pass
40
41
42class MappingError(OSError):
43 """Failed to recreate the tree."""
44 pass
45
46
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040047def is_valid_hash(value, algo):
48 """Returns if the value is a valid hash for the corresponding algorithm."""
49 size = 2 * algo().digest_size
50 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
51
52
53def get_hash_algo(_namespace):
54 """Return hash algorithm class to use when uploading to given |namespace|."""
55 # TODO(vadimsh): Implement this at some point.
56 return hashlib.sha1
57
58
59def hash_file(filepath, algo):
60 """Calculates the hash of a file without reading it all in memory at once.
61
62 |algo| should be one of hashlib hashing algorithm.
63 """
64 digest = algo()
65 with open(filepath, 'rb') as f:
66 while True:
67 chunk = f.read(DISK_FILE_CHUNK)
68 if not chunk:
69 break
70 digest.update(chunk)
71 return digest.hexdigest()