Blame - isolated_format.py - chromium.googlesource.com/infra/luci/client-py

blob: a67bad194cb4516ac4474327ad5a823a5193f090 [file] [log] [blame]

Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1	# Copyright 2014 The Swarming Authors. All rights reserved.
				2	# Use of this source code is governed under the Apache License, Version 2.0 that
				3	# can be found in the LICENSE file.
				4
				5	"""Understands .isolated files and can do local operations on them."""
				6
				7	import hashlib
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	8	import json
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	9	import logging
				10	import os
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	11	import re
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	12	import stat
				13	import sys
				14
				15	from utils import file_path
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	16	from utils import threading_utils
				17	from utils import tools
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	18
				19
				20	# Version stored and expected in .isolated files.
				21	ISOLATED_FILE_VERSION = '1.4'
				22
				23
				24	# Chunk size to use when doing disk I/O.
				25	DISK_FILE_CHUNK = 1024 * 1024
				26
				27
Marc-Antoine Ruel	1e7658c	2014-08-28 19:46:39 -0400	[diff] [blame]	28	# The file size to be used when we don't know the correct file size,
				29	# generally used for .isolated files.
				30	UNKNOWN_FILE_SIZE = None
				31
				32
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	33	# Maximum expected delay (in seconds) between successive file fetches
				34	# in run_tha_test. If it takes longer than that, a deadlock might be happening
				35	# and all stack frames for all threads are dumped to log.
				36	DEADLOCK_TIMEOUT = 5 * 60
				37
				38
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	39	# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
				40	# specify the names here.
				41	SUPPORTED_ALGOS = {
				42	'md5': hashlib.md5,
				43	'sha-1': hashlib.sha1,
				44	'sha-512': hashlib.sha512,
				45	}
				46
				47
				48	# Used for serialization.
				49	SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
				50
				51
Marc-Antoine Ruel	1e7658c	2014-08-28 19:46:39 -0400	[diff] [blame]	52	class IsolatedError(ValueError):
				53	"""Generic failure to load a .isolated file."""
				54	pass
				55
				56
				57	class MappingError(OSError):
				58	"""Failed to recreate the tree."""
				59	pass
				60
				61
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	62	def is_valid_hash(value, algo):
				63	"""Returns if the value is a valid hash for the corresponding algorithm."""
				64	size = 2 * algo().digest_size
				65	return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
				66
				67
				68	def get_hash_algo(_namespace):
				69	"""Return hash algorithm class to use when uploading to given \|namespace\|."""
				70	# TODO(vadimsh): Implement this at some point.
				71	return hashlib.sha1
				72
				73
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	74	def is_namespace_with_compression(namespace):
				75	"""Returns True if given \|namespace\| stores compressed objects."""
				76	return namespace.endswith(('-gzip', '-deflate'))
				77
				78
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	79	def hash_file(filepath, algo):
				80	"""Calculates the hash of a file without reading it all in memory at once.
				81
				82	\|algo\| should be one of hashlib hashing algorithm.
				83	"""
				84	digest = algo()
				85	with open(filepath, 'rb') as f:
				86	while True:
				87	chunk = f.read(DISK_FILE_CHUNK)
				88	if not chunk:
				89	break
				90	digest.update(chunk)
				91	return digest.hexdigest()
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	92
				93
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	94	class WorkerPool(threading_utils.AutoRetryThreadPool):
				95	"""Thread pool that automatically retries on IOError and runs a preconfigured
				96	function.
				97	"""
				98	# Initial and maximum number of worker threads.
				99	INITIAL_WORKERS = 2
				100	MAX_WORKERS = 16
				101	RETRIES = 5
				102
				103	def __init__(self):
				104	super(WorkerPool, self).__init__(
				105	[IOError],
				106	self.RETRIES,
				107	self.INITIAL_WORKERS,
				108	self.MAX_WORKERS,
				109	0,
				110	'remote')
				111
				112
				113	class LocalCache(object):
				114	"""Local cache that stores objects fetched via Storage.
				115
				116	It can be accessed concurrently from multiple threads, so it should protect
				117	its internal state with some lock.
				118	"""
				119	cache_dir = None
				120
				121	def __enter__(self):
				122	"""Context manager interface."""
				123	return self
				124
				125	def __exit__(self, _exc_type, _exec_value, _traceback):
				126	"""Context manager interface."""
				127	return False
				128
				129	def cached_set(self):
				130	"""Returns a set of all cached digests (always a new object)."""
				131	raise NotImplementedError()
				132
				133	def touch(self, digest, size):
				134	"""Ensures item is not corrupted and updates its LRU position.
				135
				136	Arguments:
				137	digest: hash digest of item to check.
				138	size: expected size of this item.
				139
				140	Returns:
				141	True if item is in cache and not corrupted.
				142	"""
				143	raise NotImplementedError()
				144
				145	def evict(self, digest):
				146	"""Removes item from cache if it's there."""
				147	raise NotImplementedError()
				148
				149	def read(self, digest):
				150	"""Returns contents of the cached item as a single str."""
				151	raise NotImplementedError()
				152
				153	def write(self, digest, content):
				154	"""Reads data from \|content\| generator and stores it in cache."""
				155	raise NotImplementedError()
				156
				157	def hardlink(self, digest, dest, file_mode):
				158	"""Ensures file at \|dest\| has same content as cached \|digest\|.
				159
				160	If file_mode is provided, it is used to set the executable bit if
				161	applicable.
				162	"""
				163	raise NotImplementedError()
				164
				165
				166	class IsolatedFile(object):
				167	"""Represents a single parsed .isolated file."""
				168	def __init__(self, obj_hash, algo):
				169	"""\|obj_hash\| is really the sha-1 of the file."""
				170	logging.debug('IsolatedFile(%s)' % obj_hash)
				171	self.obj_hash = obj_hash
				172	self.algo = algo
				173	# Set once all the left-side of the tree is parsed. 'Tree' here means the
				174	# .isolate and all the .isolated files recursively included by it with
				175	# 'includes' key. The order of each sha-1 in 'includes', each representing a
				176	# .isolated file in the hash table, is important, as the later ones are not
				177	# processed until the firsts are retrieved and read.
				178	self.can_fetch = False
				179
				180	# Raw data.
				181	self.data = {}
				182	# A IsolatedFile instance, one per object in self.includes.
				183	self.children = []
				184
				185	# Set once the .isolated file is loaded.
				186	self._is_parsed = False
				187	# Set once the files are fetched.
				188	self.files_fetched = False
				189
				190	def load(self, content):
				191	"""Verifies the .isolated file is valid and loads this object with the json
				192	data.
				193	"""
				194	logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
				195	assert not self._is_parsed
				196	self.data = load_isolated(content, self.algo)
				197	self.children = [
				198	IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
				199	]
				200	self._is_parsed = True
				201
				202	def fetch_files(self, fetch_queue, files):
				203	"""Adds files in this .isolated file not present in \|files\| dictionary.
				204
				205	Preemptively request files.
				206
				207	Note that \|files\| is modified by this function.
				208	"""
				209	assert self.can_fetch
				210	if not self._is_parsed or self.files_fetched:
				211	return
				212	logging.debug('fetch_files(%s)' % self.obj_hash)
				213	for filepath, properties in self.data.get('files', {}).iteritems():
				214	# Root isolated has priority on the files being mapped. In particular,
				215	# overriden files must not be fetched.
				216	if filepath not in files:
				217	files[filepath] = properties
				218	if 'h' in properties:
				219	# Preemptively request files.
				220	logging.debug('fetching %s' % filepath)
				221	fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
				222	self.files_fetched = True
				223
				224
				225	class Settings(object):
				226	"""Results of a completely parsed .isolated file."""
				227	def __init__(self):
				228	self.command = []
				229	self.files = {}
				230	self.read_only = None
				231	self.relative_cwd = None
				232	# The main .isolated file, a IsolatedFile instance.
				233	self.root = None
				234
				235	def load(self, fetch_queue, root_isolated_hash, algo):
				236	"""Loads the .isolated and all the included .isolated asynchronously.
				237
				238	It enables support for "included" .isolated files. They are processed in
				239	strict order but fetched asynchronously from the cache. This is important so
				240	that a file in an included .isolated file that is overridden by an embedding
				241	.isolated file is not fetched needlessly. The includes are fetched in one
				242	pass and the files are fetched as soon as all the ones on the left-side
				243	of the tree were fetched.
				244
				245	The prioritization is very important here for nested .isolated files.
				246	'includes' have the highest priority and the algorithm is optimized for both
				247	deep and wide trees. A deep one is a long link of .isolated files referenced
				248	one at a time by one item in 'includes'. A wide one has a large number of
				249	'includes' in a single .isolated file. 'left' is defined as an included
				250	.isolated file earlier in the 'includes' list. So the order of the elements
				251	in 'includes' is important.
				252	"""
				253	self.root = IsolatedFile(root_isolated_hash, algo)
				254
				255	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				256	pending = {}
				257	# Set of hashes of already retrieved items to refuse recursive includes.
				258	seen = set()
				259
				260	def retrieve(isolated_file):
				261	h = isolated_file.obj_hash
				262	if h in seen:
				263	raise IsolatedError('IsolatedFile %s is retrieved recursively' % h)
				264	assert h not in pending
				265	seen.add(h)
				266	pending[h] = isolated_file
				267	fetch_queue.add(h, priority=WorkerPool.HIGH)
				268
				269	retrieve(self.root)
				270
				271	while pending:
				272	item_hash = fetch_queue.wait(pending)
				273	item = pending.pop(item_hash)
				274	item.load(fetch_queue.cache.read(item_hash))
				275	if item_hash == root_isolated_hash:
				276	# It's the root item.
				277	item.can_fetch = True
				278
				279	for new_child in item.children:
				280	retrieve(new_child)
				281
				282	# Traverse the whole tree to see if files can now be fetched.
				283	self._traverse_tree(fetch_queue, self.root)
				284
				285	def check(n):
				286	return all(check(x) for x in n.children) and n.files_fetched
				287	assert check(self.root)
				288
				289	self.relative_cwd = self.relative_cwd or ''
				290
				291	def _traverse_tree(self, fetch_queue, node):
				292	if node.can_fetch:
				293	if not node.files_fetched:
				294	self._update_self(fetch_queue, node)
				295	will_break = False
				296	for i in node.children:
				297	if not i.can_fetch:
				298	if will_break:
				299	break
				300	# Automatically mark the first one as fetcheable.
				301	i.can_fetch = True
				302	will_break = True
				303	self._traverse_tree(fetch_queue, i)
				304
				305	def _update_self(self, fetch_queue, node):
				306	node.fetch_files(fetch_queue, self.files)
				307	# Grabs properties.
				308	if not self.command and node.data.get('command'):
				309	# Ensure paths are correctly separated on windows.
				310	self.command = node.data['command']
				311	if self.command:
				312	self.command[0] = self.command[0].replace('/', os.path.sep)
				313	self.command = tools.fix_python_path(self.command)
				314	if self.read_only is None and node.data.get('read_only') is not None:
				315	self.read_only = node.data['read_only']
				316	if (self.relative_cwd is None and
				317	node.data.get('relative_cwd') is not None):
				318	self.relative_cwd = node.data['relative_cwd']
				319
				320
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	321	def expand_symlinks(indir, relfile):
				322	"""Follows symlinks in \|relfile\|, but treating symlinks that point outside the
				323	build tree as if they were ordinary directories/files. Returns the final
				324	symlink-free target and a list of paths to symlinks encountered in the
				325	process.
				326
				327	The rule about symlinks outside the build tree is for the benefit of the
				328	Chromium OS ebuild, which symlinks the output directory to an unrelated path
				329	in the chroot.
				330
				331	Fails when a directory loop is detected, although in theory we could support
				332	that case.
				333	"""
				334	is_directory = relfile.endswith(os.path.sep)
				335	done = indir
				336	todo = relfile.strip(os.path.sep)
				337	symlinks = []
				338
				339	while todo:
				340	pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
				341	done, todo)
				342	if not symlink:
				343	todo = file_path.fix_native_path_case(done, todo)
				344	done = os.path.join(done, todo)
				345	break
				346	symlink_path = os.path.join(done, pre_symlink, symlink)
				347	post_symlink = post_symlink.lstrip(os.path.sep)
				348	# readlink doesn't exist on Windows.
				349	# pylint: disable=E1101
				350	target = os.path.normpath(os.path.join(done, pre_symlink))
				351	symlink_target = os.readlink(symlink_path)
				352	if os.path.isabs(symlink_target):
				353	# Absolute path are considered a normal directories. The use case is
				354	# generally someone who puts the output directory on a separate drive.
				355	target = symlink_target
				356	else:
				357	# The symlink itself could be using the wrong path case.
				358	target = file_path.fix_native_path_case(target, symlink_target)
				359
				360	if not os.path.exists(target):
				361	raise MappingError(
				362	'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
				363	target = file_path.get_native_path_case(target)
				364	if not file_path.path_starts_with(indir, target):
				365	done = symlink_path
				366	todo = post_symlink
				367	continue
				368	if file_path.path_starts_with(target, symlink_path):
				369	raise MappingError(
				370	'Can\'t map recursive symlink reference %s -> %s' %
				371	(symlink_path, target))
				372	logging.info('Found symlink: %s -> %s', symlink_path, target)
				373	symlinks.append(os.path.relpath(symlink_path, indir))
				374	# Treat the common prefix of the old and new paths as done, and start
				375	# scanning again.
				376	target = target.split(os.path.sep)
				377	symlink_path = symlink_path.split(os.path.sep)
				378	prefix_length = 0
				379	for target_piece, symlink_path_piece in zip(target, symlink_path):
				380	if target_piece == symlink_path_piece:
				381	prefix_length += 1
				382	else:
				383	break
				384	done = os.path.sep.join(target[:prefix_length])
				385	todo = os.path.join(
				386	os.path.sep.join(target[prefix_length:]), post_symlink)
				387
				388	relfile = os.path.relpath(done, indir)
				389	relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
				390	return relfile, symlinks
				391
				392
				393	def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
				394	"""Expands a single input. It can result in multiple outputs.
				395
				396	This function is recursive when relfile is a directory.
				397
				398	Note: this code doesn't properly handle recursive symlink like one created
				399	with:
				400	ln -s .. foo
				401	"""
				402	if os.path.isabs(relfile):
				403	raise MappingError('Can\'t map absolute path %s' % relfile)
				404
				405	infile = file_path.normpath(os.path.join(indir, relfile))
				406	if not infile.startswith(indir):
				407	raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
				408
				409	filepath = os.path.join(indir, relfile)
				410	native_filepath = file_path.get_native_path_case(filepath)
				411	if filepath != native_filepath:
				412	# Special case './'.
				413	if filepath != native_filepath + '.' + os.path.sep:
				414	# While it'd be nice to enforce path casing on Windows, it's impractical.
				415	# Also give up enforcing strict path case on OSX. Really, it's that sad.
				416	# The case where it happens is very specific and hard to reproduce:
				417	# get_native_path_case(
				418	# u'Foo.framework/Versions/A/Resources/Something.nib') will return
				419	# u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
				420	#
				421	# Note that this is really something deep in OSX because running
				422	# ls Foo.framework/Versions/A
				423	# will print out 'Resources', while file_path.get_native_path_case()
				424	# returns a lower case 'r'.
				425	#
				426	# So something is happening under the hood resulting in the command 'ls'
				427	# and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
				428	# have no idea why.
				429	if sys.platform not in ('darwin', 'win32'):
				430	raise MappingError(
				431	'File path doesn\'t equal native file path\n%s != %s' %
				432	(filepath, native_filepath))
				433
				434	symlinks = []
				435	if follow_symlinks:
				436	relfile, symlinks = expand_symlinks(indir, relfile)
				437
				438	if relfile.endswith(os.path.sep):
				439	if not os.path.isdir(infile):
				440	raise MappingError(
				441	'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
				442
				443	# Special case './'.
				444	if relfile.startswith('.' + os.path.sep):
				445	relfile = relfile[2:]
				446	outfiles = symlinks
				447	try:
				448	for filename in os.listdir(infile):
				449	inner_relfile = os.path.join(relfile, filename)
				450	if blacklist and blacklist(inner_relfile):
				451	continue
				452	if os.path.isdir(os.path.join(indir, inner_relfile)):
				453	inner_relfile += os.path.sep
				454	outfiles.extend(
				455	expand_directory_and_symlink(indir, inner_relfile, blacklist,
				456	follow_symlinks))
				457	return outfiles
				458	except OSError as e:
				459	raise MappingError(
				460	'Unable to iterate over directory %s.\n%s' % (infile, e))
				461	else:
				462	# Always add individual files even if they were blacklisted.
				463	if os.path.isdir(infile):
				464	raise MappingError(
				465	'Input directory %s must have a trailing slash' % infile)
				466
				467	if not os.path.isfile(infile):
				468	raise MappingError('Input file %s doesn\'t exist' % infile)
				469
				470	return symlinks + [relfile]
				471
				472
				473	def expand_directories_and_symlinks(
				474	indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
				475	"""Expands the directories and the symlinks, applies the blacklist and
				476	verifies files exist.
				477
				478	Files are specified in os native path separator.
				479	"""
				480	outfiles = []
				481	for relfile in infiles:
				482	try:
				483	outfiles.extend(
				484	expand_directory_and_symlink(
				485	indir, relfile, blacklist, follow_symlinks))
				486	except MappingError as e:
				487	if not ignore_broken_items:
				488	raise
				489	logging.info('warning: %s', e)
				490	return outfiles
				491
				492
				493	def file_to_metadata(filepath, prevdict, read_only, algo):
				494	"""Processes an input file, a dependency, and return meta data about it.
				495
				496	Behaviors:
				497	- Retrieves the file mode, file size, file timestamp, file link
				498	destination if it is a file link and calcultate the SHA-1 of the file's
				499	content if the path points to a file and not a symlink.
				500
				501	Arguments:
				502	filepath: File to act on.
				503	prevdict: the previous dictionary. It is used to retrieve the cached sha-1
				504	to skip recalculating the hash. Optional.
				505	read_only: If 1 or 2, the file mode is manipulated. In practice, only save
				506	one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
				507	windows, mode is not set since all files are 'executable' by
				508	default.
				509	algo: Hashing algorithm used.
				510
				511	Returns:
				512	The necessary dict to create a entry in the 'files' section of an .isolated
				513	file.
				514	"""
				515	out = {}
				516	# Always check the file stat and check if it is a link. The timestamp is used
				517	# to know if the file's content/symlink destination should be looked into.
				518	# E.g. only reuse from prevdict if the timestamp hasn't changed.
				519	# There is the risk of the file's timestamp being reset to its last value
				520	# manually while its content changed. We don't protect against that use case.
				521	try:
				522	filestats = os.lstat(filepath)
				523	except OSError:
				524	# The file is not present.
				525	raise MappingError('%s is missing' % filepath)
				526	is_link = stat.S_ISLNK(filestats.st_mode)
				527
				528	if sys.platform != 'win32':
				529	# Ignore file mode on Windows since it's not really useful there.
				530	filemode = stat.S_IMODE(filestats.st_mode)
				531	# Remove write access for group and all access to 'others'.
				532	filemode &= ~(stat.S_IWGRP \| stat.S_IRWXO)
				533	if read_only:
				534	filemode &= ~stat.S_IWUSR
				535	if filemode & stat.S_IXUSR:
				536	filemode \|= stat.S_IXGRP
				537	else:
				538	filemode &= ~stat.S_IXGRP
				539	if not is_link:
				540	out['m'] = filemode
				541
				542	# Used to skip recalculating the hash or link destination. Use the most recent
				543	# update time.
				544	out['t'] = int(round(filestats.st_mtime))
				545
				546	if not is_link:
				547	out['s'] = filestats.st_size
				548	# If the timestamp wasn't updated and the file size is still the same, carry
				549	# on the sha-1.
				550	if (prevdict.get('t') == out['t'] and
				551	prevdict.get('s') == out['s']):
				552	# Reuse the previous hash if available.
				553	out['h'] = prevdict.get('h')
				554	if not out.get('h'):
				555	out['h'] = hash_file(filepath, algo)
				556	else:
				557	# If the timestamp wasn't updated, carry on the link destination.
				558	if prevdict.get('t') == out['t']:
				559	# Reuse the previous link destination if available.
				560	out['l'] = prevdict.get('l')
				561	if out.get('l') is None:
				562	# The link could be in an incorrect path case. In practice, this only
				563	# happen on OSX on case insensitive HFS.
				564	# TODO(maruel): It'd be better if it was only done once, in
				565	# expand_directory_and_symlink(), so it would not be necessary to do again
				566	# here.
				567	symlink_value = os.readlink(filepath) # pylint: disable=E1101
				568	filedir = file_path.get_native_path_case(os.path.dirname(filepath))
				569	native_dest = file_path.fix_native_path_case(filedir, symlink_value)
				570	out['l'] = os.path.relpath(native_dest, filedir)
				571	return out
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame^]	572
				573
				574	def save_isolated(isolated, data):
				575	"""Writes one or multiple .isolated files.
				576
				577	Note: this reference implementation does not create child .isolated file so it
				578	always returns an empty list.
				579
				580	Returns the list of child isolated files that are included by \|isolated\|.
				581	"""
				582	# Make sure the data is valid .isolated data by 'reloading' it.
				583	algo = SUPPORTED_ALGOS[data['algo']]
				584	load_isolated(json.dumps(data), algo)
				585	tools.write_json(isolated, data, True)
				586	return []
				587
				588
				589	def load_isolated(content, algo):
				590	"""Verifies the .isolated file is valid and loads this object with the json
				591	data.
				592
				593	Arguments:
				594	- content: raw serialized content to load.
				595	- algo: hashlib algorithm class. Used to confirm the algorithm matches the
				596	algorithm used on the Isolate Server.
				597	"""
				598	try:
				599	data = json.loads(content)
				600	except ValueError:
				601	raise IsolatedError('Failed to parse: %s...' % content[:100])
				602
				603	if not isinstance(data, dict):
				604	raise IsolatedError('Expected dict, got %r' % data)
				605
				606	# Check 'version' first, since it could modify the parsing after.
				607	value = data.get('version', '1.0')
				608	if not isinstance(value, basestring):
				609	raise IsolatedError('Expected string, got %r' % value)
				610	try:
				611	version = tuple(map(int, value.split('.')))
				612	except ValueError:
				613	raise IsolatedError('Expected valid version, got %r' % value)
				614
				615	expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
				616	# Major version must match.
				617	if version[0] != expected_version[0]:
				618	raise IsolatedError(
				619	'Expected compatible \'%s\' version, got %r' %
				620	(ISOLATED_FILE_VERSION, value))
				621
				622	if algo is None:
				623	# TODO(maruel): Remove the default around Jan 2014.
				624	# Default the algorithm used in the .isolated file itself, falls back to
				625	# 'sha-1' if unspecified.
				626	algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
				627
				628	for key, value in data.iteritems():
				629	if key == 'algo':
				630	if not isinstance(value, basestring):
				631	raise IsolatedError('Expected string, got %r' % value)
				632	if value not in SUPPORTED_ALGOS:
				633	raise IsolatedError(
				634	'Expected one of \'%s\', got %r' %
				635	(', '.join(sorted(SUPPORTED_ALGOS)), value))
				636	if value != SUPPORTED_ALGOS_REVERSE[algo]:
				637	raise IsolatedError(
				638	'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
				639
				640	elif key == 'command':
				641	if not isinstance(value, list):
				642	raise IsolatedError('Expected list, got %r' % value)
				643	if not value:
				644	raise IsolatedError('Expected non-empty command')
				645	for subvalue in value:
				646	if not isinstance(subvalue, basestring):
				647	raise IsolatedError('Expected string, got %r' % subvalue)
				648
				649	elif key == 'files':
				650	if not isinstance(value, dict):
				651	raise IsolatedError('Expected dict, got %r' % value)
				652	for subkey, subvalue in value.iteritems():
				653	if not isinstance(subkey, basestring):
				654	raise IsolatedError('Expected string, got %r' % subkey)
				655	if not isinstance(subvalue, dict):
				656	raise IsolatedError('Expected dict, got %r' % subvalue)
				657	for subsubkey, subsubvalue in subvalue.iteritems():
				658	if subsubkey == 'l':
				659	if not isinstance(subsubvalue, basestring):
				660	raise IsolatedError('Expected string, got %r' % subsubvalue)
				661	elif subsubkey == 'm':
				662	if not isinstance(subsubvalue, int):
				663	raise IsolatedError('Expected int, got %r' % subsubvalue)
				664	elif subsubkey == 'h':
				665	if not is_valid_hash(subsubvalue, algo):
				666	raise IsolatedError('Expected sha-1, got %r' % subsubvalue)
				667	elif subsubkey == 's':
				668	if not isinstance(subsubvalue, (int, long)):
				669	raise IsolatedError('Expected int or long, got %r' % subsubvalue)
				670	else:
				671	raise IsolatedError('Unknown subsubkey %s' % subsubkey)
				672	if bool('h' in subvalue) == bool('l' in subvalue):
				673	raise IsolatedError(
				674	'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
				675	subvalue)
				676	if bool('h' in subvalue) != bool('s' in subvalue):
				677	raise IsolatedError(
				678	'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
				679	subvalue)
				680	if bool('s' in subvalue) == bool('l' in subvalue):
				681	raise IsolatedError(
				682	'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
				683	subvalue)
				684	if bool('l' in subvalue) and bool('m' in subvalue):
				685	raise IsolatedError(
				686	'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
				687	subvalue)
				688
				689	elif key == 'includes':
				690	if not isinstance(value, list):
				691	raise IsolatedError('Expected list, got %r' % value)
				692	if not value:
				693	raise IsolatedError('Expected non-empty includes list')
				694	for subvalue in value:
				695	if not is_valid_hash(subvalue, algo):
				696	raise IsolatedError('Expected sha-1, got %r' % subvalue)
				697
				698	elif key == 'os':
				699	if version >= (1, 4):
				700	raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
				701
				702	elif key == 'read_only':
				703	if not value in (0, 1, 2):
				704	raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
				705
				706	elif key == 'relative_cwd':
				707	if not isinstance(value, basestring):
				708	raise IsolatedError('Expected string, got %r' % value)
				709
				710	elif key == 'version':
				711	# Already checked above.
				712	pass
				713
				714	else:
				715	raise IsolatedError('Unknown key %r' % key)
				716
				717	# Automatically fix os.path.sep if necessary. While .isolated files are always
				718	# in the the native path format, someone could want to download an .isolated
				719	# tree from another OS.
				720	wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
				721	if 'files' in data:
				722	data['files'] = dict(
				723	(k.replace(wrong_path_sep, os.path.sep), v)
				724	for k, v in data['files'].iteritems())
				725	for v in data['files'].itervalues():
				726	if 'l' in v:
				727	v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
				728	if 'relative_cwd' in data:
				729	data['relative_cwd'] = data['relative_cwd'].replace(
				730	wrong_path_sep, os.path.sep)
				731	return data