blob: c419d9c0094e1847aa6a3f5efda6f252f7ecf5e5 [file] [log] [blame]
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001# Copyright 2014 The Swarming Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0 that
3# can be found in the LICENSE file.
4
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
8import re
9
10
11# Version stored and expected in .isolated files.
12ISOLATED_FILE_VERSION = '1.4'
13
14
15# Chunk size to use when doing disk I/O.
16DISK_FILE_CHUNK = 1024 * 1024
17
18
19# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
20# specify the names here.
21SUPPORTED_ALGOS = {
22 'md5': hashlib.md5,
23 'sha-1': hashlib.sha1,
24 'sha-512': hashlib.sha512,
25}
26
27
28# Used for serialization.
29SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
30
31
32def is_valid_hash(value, algo):
33 """Returns if the value is a valid hash for the corresponding algorithm."""
34 size = 2 * algo().digest_size
35 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
36
37
38def get_hash_algo(_namespace):
39 """Return hash algorithm class to use when uploading to given |namespace|."""
40 # TODO(vadimsh): Implement this at some point.
41 return hashlib.sha1
42
43
44def hash_file(filepath, algo):
45 """Calculates the hash of a file without reading it all in memory at once.
46
47 |algo| should be one of hashlib hashing algorithm.
48 """
49 digest = algo()
50 with open(filepath, 'rb') as f:
51 while True:
52 chunk = f.read(DISK_FILE_CHUNK)
53 if not chunk:
54 break
55 digest.update(chunk)
56 return digest.hexdigest()