blob: a67bad194cb4516ac4474327ad5a823a5193f090 [file] [log] [blame]
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001# Copyright 2014 The Swarming Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0 that
3# can be found in the LICENSE file.
4
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040016from utils import threading_utils
17from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
19
20# Version stored and expected in .isolated files.
21ISOLATED_FILE_VERSION = '1.4'
22
23
24# Chunk size to use when doing disk I/O.
25DISK_FILE_CHUNK = 1024 * 1024
26
27
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040028# The file size to be used when we don't know the correct file size,
29# generally used for .isolated files.
30UNKNOWN_FILE_SIZE = None
31
32
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040033# Maximum expected delay (in seconds) between successive file fetches
34# in run_tha_test. If it takes longer than that, a deadlock might be happening
35# and all stack frames for all threads are dumped to log.
36DEADLOCK_TIMEOUT = 5 * 60
37
38
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040039# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
40# specify the names here.
41SUPPORTED_ALGOS = {
42 'md5': hashlib.md5,
43 'sha-1': hashlib.sha1,
44 'sha-512': hashlib.sha512,
45}
46
47
48# Used for serialization.
49SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
50
51
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040052class IsolatedError(ValueError):
53 """Generic failure to load a .isolated file."""
54 pass
55
56
57class MappingError(OSError):
58 """Failed to recreate the tree."""
59 pass
60
61
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040062def is_valid_hash(value, algo):
63 """Returns if the value is a valid hash for the corresponding algorithm."""
64 size = 2 * algo().digest_size
65 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
66
67
68def get_hash_algo(_namespace):
69 """Return hash algorithm class to use when uploading to given |namespace|."""
70 # TODO(vadimsh): Implement this at some point.
71 return hashlib.sha1
72
73
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040074def is_namespace_with_compression(namespace):
75 """Returns True if given |namespace| stores compressed objects."""
76 return namespace.endswith(('-gzip', '-deflate'))
77
78
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040079def hash_file(filepath, algo):
80 """Calculates the hash of a file without reading it all in memory at once.
81
82 |algo| should be one of hashlib hashing algorithm.
83 """
84 digest = algo()
85 with open(filepath, 'rb') as f:
86 while True:
87 chunk = f.read(DISK_FILE_CHUNK)
88 if not chunk:
89 break
90 digest.update(chunk)
91 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040092
93
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040094class WorkerPool(threading_utils.AutoRetryThreadPool):
95 """Thread pool that automatically retries on IOError and runs a preconfigured
96 function.
97 """
98 # Initial and maximum number of worker threads.
99 INITIAL_WORKERS = 2
100 MAX_WORKERS = 16
101 RETRIES = 5
102
103 def __init__(self):
104 super(WorkerPool, self).__init__(
105 [IOError],
106 self.RETRIES,
107 self.INITIAL_WORKERS,
108 self.MAX_WORKERS,
109 0,
110 'remote')
111
112
113class LocalCache(object):
114 """Local cache that stores objects fetched via Storage.
115
116 It can be accessed concurrently from multiple threads, so it should protect
117 its internal state with some lock.
118 """
119 cache_dir = None
120
121 def __enter__(self):
122 """Context manager interface."""
123 return self
124
125 def __exit__(self, _exc_type, _exec_value, _traceback):
126 """Context manager interface."""
127 return False
128
129 def cached_set(self):
130 """Returns a set of all cached digests (always a new object)."""
131 raise NotImplementedError()
132
133 def touch(self, digest, size):
134 """Ensures item is not corrupted and updates its LRU position.
135
136 Arguments:
137 digest: hash digest of item to check.
138 size: expected size of this item.
139
140 Returns:
141 True if item is in cache and not corrupted.
142 """
143 raise NotImplementedError()
144
145 def evict(self, digest):
146 """Removes item from cache if it's there."""
147 raise NotImplementedError()
148
149 def read(self, digest):
150 """Returns contents of the cached item as a single str."""
151 raise NotImplementedError()
152
153 def write(self, digest, content):
154 """Reads data from |content| generator and stores it in cache."""
155 raise NotImplementedError()
156
157 def hardlink(self, digest, dest, file_mode):
158 """Ensures file at |dest| has same content as cached |digest|.
159
160 If file_mode is provided, it is used to set the executable bit if
161 applicable.
162 """
163 raise NotImplementedError()
164
165
166class IsolatedFile(object):
167 """Represents a single parsed .isolated file."""
168 def __init__(self, obj_hash, algo):
169 """|obj_hash| is really the sha-1 of the file."""
170 logging.debug('IsolatedFile(%s)' % obj_hash)
171 self.obj_hash = obj_hash
172 self.algo = algo
173 # Set once all the left-side of the tree is parsed. 'Tree' here means the
174 # .isolate and all the .isolated files recursively included by it with
175 # 'includes' key. The order of each sha-1 in 'includes', each representing a
176 # .isolated file in the hash table, is important, as the later ones are not
177 # processed until the firsts are retrieved and read.
178 self.can_fetch = False
179
180 # Raw data.
181 self.data = {}
182 # A IsolatedFile instance, one per object in self.includes.
183 self.children = []
184
185 # Set once the .isolated file is loaded.
186 self._is_parsed = False
187 # Set once the files are fetched.
188 self.files_fetched = False
189
190 def load(self, content):
191 """Verifies the .isolated file is valid and loads this object with the json
192 data.
193 """
194 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
195 assert not self._is_parsed
196 self.data = load_isolated(content, self.algo)
197 self.children = [
198 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
199 ]
200 self._is_parsed = True
201
202 def fetch_files(self, fetch_queue, files):
203 """Adds files in this .isolated file not present in |files| dictionary.
204
205 Preemptively request files.
206
207 Note that |files| is modified by this function.
208 """
209 assert self.can_fetch
210 if not self._is_parsed or self.files_fetched:
211 return
212 logging.debug('fetch_files(%s)' % self.obj_hash)
213 for filepath, properties in self.data.get('files', {}).iteritems():
214 # Root isolated has priority on the files being mapped. In particular,
215 # overriden files must not be fetched.
216 if filepath not in files:
217 files[filepath] = properties
218 if 'h' in properties:
219 # Preemptively request files.
220 logging.debug('fetching %s' % filepath)
221 fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
222 self.files_fetched = True
223
224
225class Settings(object):
226 """Results of a completely parsed .isolated file."""
227 def __init__(self):
228 self.command = []
229 self.files = {}
230 self.read_only = None
231 self.relative_cwd = None
232 # The main .isolated file, a IsolatedFile instance.
233 self.root = None
234
235 def load(self, fetch_queue, root_isolated_hash, algo):
236 """Loads the .isolated and all the included .isolated asynchronously.
237
238 It enables support for "included" .isolated files. They are processed in
239 strict order but fetched asynchronously from the cache. This is important so
240 that a file in an included .isolated file that is overridden by an embedding
241 .isolated file is not fetched needlessly. The includes are fetched in one
242 pass and the files are fetched as soon as all the ones on the left-side
243 of the tree were fetched.
244
245 The prioritization is very important here for nested .isolated files.
246 'includes' have the highest priority and the algorithm is optimized for both
247 deep and wide trees. A deep one is a long link of .isolated files referenced
248 one at a time by one item in 'includes'. A wide one has a large number of
249 'includes' in a single .isolated file. 'left' is defined as an included
250 .isolated file earlier in the 'includes' list. So the order of the elements
251 in 'includes' is important.
252 """
253 self.root = IsolatedFile(root_isolated_hash, algo)
254
255 # Isolated files being retrieved now: hash -> IsolatedFile instance.
256 pending = {}
257 # Set of hashes of already retrieved items to refuse recursive includes.
258 seen = set()
259
260 def retrieve(isolated_file):
261 h = isolated_file.obj_hash
262 if h in seen:
263 raise IsolatedError('IsolatedFile %s is retrieved recursively' % h)
264 assert h not in pending
265 seen.add(h)
266 pending[h] = isolated_file
267 fetch_queue.add(h, priority=WorkerPool.HIGH)
268
269 retrieve(self.root)
270
271 while pending:
272 item_hash = fetch_queue.wait(pending)
273 item = pending.pop(item_hash)
274 item.load(fetch_queue.cache.read(item_hash))
275 if item_hash == root_isolated_hash:
276 # It's the root item.
277 item.can_fetch = True
278
279 for new_child in item.children:
280 retrieve(new_child)
281
282 # Traverse the whole tree to see if files can now be fetched.
283 self._traverse_tree(fetch_queue, self.root)
284
285 def check(n):
286 return all(check(x) for x in n.children) and n.files_fetched
287 assert check(self.root)
288
289 self.relative_cwd = self.relative_cwd or ''
290
291 def _traverse_tree(self, fetch_queue, node):
292 if node.can_fetch:
293 if not node.files_fetched:
294 self._update_self(fetch_queue, node)
295 will_break = False
296 for i in node.children:
297 if not i.can_fetch:
298 if will_break:
299 break
300 # Automatically mark the first one as fetcheable.
301 i.can_fetch = True
302 will_break = True
303 self._traverse_tree(fetch_queue, i)
304
305 def _update_self(self, fetch_queue, node):
306 node.fetch_files(fetch_queue, self.files)
307 # Grabs properties.
308 if not self.command and node.data.get('command'):
309 # Ensure paths are correctly separated on windows.
310 self.command = node.data['command']
311 if self.command:
312 self.command[0] = self.command[0].replace('/', os.path.sep)
313 self.command = tools.fix_python_path(self.command)
314 if self.read_only is None and node.data.get('read_only') is not None:
315 self.read_only = node.data['read_only']
316 if (self.relative_cwd is None and
317 node.data.get('relative_cwd') is not None):
318 self.relative_cwd = node.data['relative_cwd']
319
320
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400321def expand_symlinks(indir, relfile):
322 """Follows symlinks in |relfile|, but treating symlinks that point outside the
323 build tree as if they were ordinary directories/files. Returns the final
324 symlink-free target and a list of paths to symlinks encountered in the
325 process.
326
327 The rule about symlinks outside the build tree is for the benefit of the
328 Chromium OS ebuild, which symlinks the output directory to an unrelated path
329 in the chroot.
330
331 Fails when a directory loop is detected, although in theory we could support
332 that case.
333 """
334 is_directory = relfile.endswith(os.path.sep)
335 done = indir
336 todo = relfile.strip(os.path.sep)
337 symlinks = []
338
339 while todo:
340 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
341 done, todo)
342 if not symlink:
343 todo = file_path.fix_native_path_case(done, todo)
344 done = os.path.join(done, todo)
345 break
346 symlink_path = os.path.join(done, pre_symlink, symlink)
347 post_symlink = post_symlink.lstrip(os.path.sep)
348 # readlink doesn't exist on Windows.
349 # pylint: disable=E1101
350 target = os.path.normpath(os.path.join(done, pre_symlink))
351 symlink_target = os.readlink(symlink_path)
352 if os.path.isabs(symlink_target):
353 # Absolute path are considered a normal directories. The use case is
354 # generally someone who puts the output directory on a separate drive.
355 target = symlink_target
356 else:
357 # The symlink itself could be using the wrong path case.
358 target = file_path.fix_native_path_case(target, symlink_target)
359
360 if not os.path.exists(target):
361 raise MappingError(
362 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
363 target = file_path.get_native_path_case(target)
364 if not file_path.path_starts_with(indir, target):
365 done = symlink_path
366 todo = post_symlink
367 continue
368 if file_path.path_starts_with(target, symlink_path):
369 raise MappingError(
370 'Can\'t map recursive symlink reference %s -> %s' %
371 (symlink_path, target))
372 logging.info('Found symlink: %s -> %s', symlink_path, target)
373 symlinks.append(os.path.relpath(symlink_path, indir))
374 # Treat the common prefix of the old and new paths as done, and start
375 # scanning again.
376 target = target.split(os.path.sep)
377 symlink_path = symlink_path.split(os.path.sep)
378 prefix_length = 0
379 for target_piece, symlink_path_piece in zip(target, symlink_path):
380 if target_piece == symlink_path_piece:
381 prefix_length += 1
382 else:
383 break
384 done = os.path.sep.join(target[:prefix_length])
385 todo = os.path.join(
386 os.path.sep.join(target[prefix_length:]), post_symlink)
387
388 relfile = os.path.relpath(done, indir)
389 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
390 return relfile, symlinks
391
392
393def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
394 """Expands a single input. It can result in multiple outputs.
395
396 This function is recursive when relfile is a directory.
397
398 Note: this code doesn't properly handle recursive symlink like one created
399 with:
400 ln -s .. foo
401 """
402 if os.path.isabs(relfile):
403 raise MappingError('Can\'t map absolute path %s' % relfile)
404
405 infile = file_path.normpath(os.path.join(indir, relfile))
406 if not infile.startswith(indir):
407 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
408
409 filepath = os.path.join(indir, relfile)
410 native_filepath = file_path.get_native_path_case(filepath)
411 if filepath != native_filepath:
412 # Special case './'.
413 if filepath != native_filepath + '.' + os.path.sep:
414 # While it'd be nice to enforce path casing on Windows, it's impractical.
415 # Also give up enforcing strict path case on OSX. Really, it's that sad.
416 # The case where it happens is very specific and hard to reproduce:
417 # get_native_path_case(
418 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
419 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
420 #
421 # Note that this is really something deep in OSX because running
422 # ls Foo.framework/Versions/A
423 # will print out 'Resources', while file_path.get_native_path_case()
424 # returns a lower case 'r'.
425 #
426 # So *something* is happening under the hood resulting in the command 'ls'
427 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
428 # have no idea why.
429 if sys.platform not in ('darwin', 'win32'):
430 raise MappingError(
431 'File path doesn\'t equal native file path\n%s != %s' %
432 (filepath, native_filepath))
433
434 symlinks = []
435 if follow_symlinks:
436 relfile, symlinks = expand_symlinks(indir, relfile)
437
438 if relfile.endswith(os.path.sep):
439 if not os.path.isdir(infile):
440 raise MappingError(
441 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
442
443 # Special case './'.
444 if relfile.startswith('.' + os.path.sep):
445 relfile = relfile[2:]
446 outfiles = symlinks
447 try:
448 for filename in os.listdir(infile):
449 inner_relfile = os.path.join(relfile, filename)
450 if blacklist and blacklist(inner_relfile):
451 continue
452 if os.path.isdir(os.path.join(indir, inner_relfile)):
453 inner_relfile += os.path.sep
454 outfiles.extend(
455 expand_directory_and_symlink(indir, inner_relfile, blacklist,
456 follow_symlinks))
457 return outfiles
458 except OSError as e:
459 raise MappingError(
460 'Unable to iterate over directory %s.\n%s' % (infile, e))
461 else:
462 # Always add individual files even if they were blacklisted.
463 if os.path.isdir(infile):
464 raise MappingError(
465 'Input directory %s must have a trailing slash' % infile)
466
467 if not os.path.isfile(infile):
468 raise MappingError('Input file %s doesn\'t exist' % infile)
469
470 return symlinks + [relfile]
471
472
473def expand_directories_and_symlinks(
474 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
475 """Expands the directories and the symlinks, applies the blacklist and
476 verifies files exist.
477
478 Files are specified in os native path separator.
479 """
480 outfiles = []
481 for relfile in infiles:
482 try:
483 outfiles.extend(
484 expand_directory_and_symlink(
485 indir, relfile, blacklist, follow_symlinks))
486 except MappingError as e:
487 if not ignore_broken_items:
488 raise
489 logging.info('warning: %s', e)
490 return outfiles
491
492
493def file_to_metadata(filepath, prevdict, read_only, algo):
494 """Processes an input file, a dependency, and return meta data about it.
495
496 Behaviors:
497 - Retrieves the file mode, file size, file timestamp, file link
498 destination if it is a file link and calcultate the SHA-1 of the file's
499 content if the path points to a file and not a symlink.
500
501 Arguments:
502 filepath: File to act on.
503 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
504 to skip recalculating the hash. Optional.
505 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
506 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
507 windows, mode is not set since all files are 'executable' by
508 default.
509 algo: Hashing algorithm used.
510
511 Returns:
512 The necessary dict to create a entry in the 'files' section of an .isolated
513 file.
514 """
515 out = {}
516 # Always check the file stat and check if it is a link. The timestamp is used
517 # to know if the file's content/symlink destination should be looked into.
518 # E.g. only reuse from prevdict if the timestamp hasn't changed.
519 # There is the risk of the file's timestamp being reset to its last value
520 # manually while its content changed. We don't protect against that use case.
521 try:
522 filestats = os.lstat(filepath)
523 except OSError:
524 # The file is not present.
525 raise MappingError('%s is missing' % filepath)
526 is_link = stat.S_ISLNK(filestats.st_mode)
527
528 if sys.platform != 'win32':
529 # Ignore file mode on Windows since it's not really useful there.
530 filemode = stat.S_IMODE(filestats.st_mode)
531 # Remove write access for group and all access to 'others'.
532 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
533 if read_only:
534 filemode &= ~stat.S_IWUSR
535 if filemode & stat.S_IXUSR:
536 filemode |= stat.S_IXGRP
537 else:
538 filemode &= ~stat.S_IXGRP
539 if not is_link:
540 out['m'] = filemode
541
542 # Used to skip recalculating the hash or link destination. Use the most recent
543 # update time.
544 out['t'] = int(round(filestats.st_mtime))
545
546 if not is_link:
547 out['s'] = filestats.st_size
548 # If the timestamp wasn't updated and the file size is still the same, carry
549 # on the sha-1.
550 if (prevdict.get('t') == out['t'] and
551 prevdict.get('s') == out['s']):
552 # Reuse the previous hash if available.
553 out['h'] = prevdict.get('h')
554 if not out.get('h'):
555 out['h'] = hash_file(filepath, algo)
556 else:
557 # If the timestamp wasn't updated, carry on the link destination.
558 if prevdict.get('t') == out['t']:
559 # Reuse the previous link destination if available.
560 out['l'] = prevdict.get('l')
561 if out.get('l') is None:
562 # The link could be in an incorrect path case. In practice, this only
563 # happen on OSX on case insensitive HFS.
564 # TODO(maruel): It'd be better if it was only done once, in
565 # expand_directory_and_symlink(), so it would not be necessary to do again
566 # here.
567 symlink_value = os.readlink(filepath) # pylint: disable=E1101
568 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
569 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
570 out['l'] = os.path.relpath(native_dest, filedir)
571 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400572
573
574def save_isolated(isolated, data):
575 """Writes one or multiple .isolated files.
576
577 Note: this reference implementation does not create child .isolated file so it
578 always returns an empty list.
579
580 Returns the list of child isolated files that are included by |isolated|.
581 """
582 # Make sure the data is valid .isolated data by 'reloading' it.
583 algo = SUPPORTED_ALGOS[data['algo']]
584 load_isolated(json.dumps(data), algo)
585 tools.write_json(isolated, data, True)
586 return []
587
588
589def load_isolated(content, algo):
590 """Verifies the .isolated file is valid and loads this object with the json
591 data.
592
593 Arguments:
594 - content: raw serialized content to load.
595 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
596 algorithm used on the Isolate Server.
597 """
598 try:
599 data = json.loads(content)
600 except ValueError:
601 raise IsolatedError('Failed to parse: %s...' % content[:100])
602
603 if not isinstance(data, dict):
604 raise IsolatedError('Expected dict, got %r' % data)
605
606 # Check 'version' first, since it could modify the parsing after.
607 value = data.get('version', '1.0')
608 if not isinstance(value, basestring):
609 raise IsolatedError('Expected string, got %r' % value)
610 try:
611 version = tuple(map(int, value.split('.')))
612 except ValueError:
613 raise IsolatedError('Expected valid version, got %r' % value)
614
615 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
616 # Major version must match.
617 if version[0] != expected_version[0]:
618 raise IsolatedError(
619 'Expected compatible \'%s\' version, got %r' %
620 (ISOLATED_FILE_VERSION, value))
621
622 if algo is None:
623 # TODO(maruel): Remove the default around Jan 2014.
624 # Default the algorithm used in the .isolated file itself, falls back to
625 # 'sha-1' if unspecified.
626 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
627
628 for key, value in data.iteritems():
629 if key == 'algo':
630 if not isinstance(value, basestring):
631 raise IsolatedError('Expected string, got %r' % value)
632 if value not in SUPPORTED_ALGOS:
633 raise IsolatedError(
634 'Expected one of \'%s\', got %r' %
635 (', '.join(sorted(SUPPORTED_ALGOS)), value))
636 if value != SUPPORTED_ALGOS_REVERSE[algo]:
637 raise IsolatedError(
638 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
639
640 elif key == 'command':
641 if not isinstance(value, list):
642 raise IsolatedError('Expected list, got %r' % value)
643 if not value:
644 raise IsolatedError('Expected non-empty command')
645 for subvalue in value:
646 if not isinstance(subvalue, basestring):
647 raise IsolatedError('Expected string, got %r' % subvalue)
648
649 elif key == 'files':
650 if not isinstance(value, dict):
651 raise IsolatedError('Expected dict, got %r' % value)
652 for subkey, subvalue in value.iteritems():
653 if not isinstance(subkey, basestring):
654 raise IsolatedError('Expected string, got %r' % subkey)
655 if not isinstance(subvalue, dict):
656 raise IsolatedError('Expected dict, got %r' % subvalue)
657 for subsubkey, subsubvalue in subvalue.iteritems():
658 if subsubkey == 'l':
659 if not isinstance(subsubvalue, basestring):
660 raise IsolatedError('Expected string, got %r' % subsubvalue)
661 elif subsubkey == 'm':
662 if not isinstance(subsubvalue, int):
663 raise IsolatedError('Expected int, got %r' % subsubvalue)
664 elif subsubkey == 'h':
665 if not is_valid_hash(subsubvalue, algo):
666 raise IsolatedError('Expected sha-1, got %r' % subsubvalue)
667 elif subsubkey == 's':
668 if not isinstance(subsubvalue, (int, long)):
669 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
670 else:
671 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
672 if bool('h' in subvalue) == bool('l' in subvalue):
673 raise IsolatedError(
674 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
675 subvalue)
676 if bool('h' in subvalue) != bool('s' in subvalue):
677 raise IsolatedError(
678 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
679 subvalue)
680 if bool('s' in subvalue) == bool('l' in subvalue):
681 raise IsolatedError(
682 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
683 subvalue)
684 if bool('l' in subvalue) and bool('m' in subvalue):
685 raise IsolatedError(
686 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
687 subvalue)
688
689 elif key == 'includes':
690 if not isinstance(value, list):
691 raise IsolatedError('Expected list, got %r' % value)
692 if not value:
693 raise IsolatedError('Expected non-empty includes list')
694 for subvalue in value:
695 if not is_valid_hash(subvalue, algo):
696 raise IsolatedError('Expected sha-1, got %r' % subvalue)
697
698 elif key == 'os':
699 if version >= (1, 4):
700 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
701
702 elif key == 'read_only':
703 if not value in (0, 1, 2):
704 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
705
706 elif key == 'relative_cwd':
707 if not isinstance(value, basestring):
708 raise IsolatedError('Expected string, got %r' % value)
709
710 elif key == 'version':
711 # Already checked above.
712 pass
713
714 else:
715 raise IsolatedError('Unknown key %r' % key)
716
717 # Automatically fix os.path.sep if necessary. While .isolated files are always
718 # in the the native path format, someone could want to download an .isolated
719 # tree from another OS.
720 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
721 if 'files' in data:
722 data['files'] = dict(
723 (k.replace(wrong_path_sep, os.path.sep), v)
724 for k, v in data['files'].iteritems())
725 for v in data['files'].itervalues():
726 if 'l' in v:
727 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
728 if 'relative_cwd' in data:
729 data['relative_cwd'] = data['relative_cwd'].replace(
730 wrong_path_sep, os.path.sep)
731 return data