blob: 23b7f660e7edaa61ba5f76a04d95cbb3f77149e9 [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
maruel12e30012015-10-09 11:55:35 -070016from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040017from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
19
20# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080021ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040022
23
24# Chunk size to use when doing disk I/O.
25DISK_FILE_CHUNK = 1024 * 1024
26
27
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040028# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040029# specify the names here.
30SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040031 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040032 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040033 'sha-512': hashlib.sha512,
34}
35
36
37# Used for serialization.
38SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
39
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040040
41SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070042
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040043
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040044class IsolatedError(ValueError):
45 """Generic failure to load a .isolated file."""
46 pass
47
48
49class MappingError(OSError):
50 """Failed to recreate the tree."""
51 pass
52
53
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040054def is_valid_hash(value, algo):
55 """Returns if the value is a valid hash for the corresponding algorithm."""
56 size = 2 * algo().digest_size
57 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
58
59
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040060get_hash_algo_has_logged = False
61
62
63def get_hash_algo(namespace):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040064 """Return hash algorithm class to use when uploading to given |namespace|."""
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040065 global get_hash_algo_has_logged
66 chosen = None
67 for name, algo in SUPPORTED_ALGOS.iteritems():
68 if namespace.startswith(name + '-'):
69 chosen = algo
70 break
71
72 if not get_hash_algo_has_logged:
73 get_hash_algo_has_logged = True
74 if chosen:
75 logging.info('Using hash algo %s for namespace %s', chosen, namespace)
76 else:
77 logging.warn('No hash algo found in \'%s\', assuming sha-1', namespace)
78
79 if not chosen:
80 return hashlib.sha1
81
82 return chosen
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040083
84
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040085def is_namespace_with_compression(namespace):
86 """Returns True if given |namespace| stores compressed objects."""
87 return namespace.endswith(('-gzip', '-deflate'))
88
89
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040090def hash_file(filepath, algo):
91 """Calculates the hash of a file without reading it all in memory at once.
92
93 |algo| should be one of hashlib hashing algorithm.
94 """
95 digest = algo()
maruel12e30012015-10-09 11:55:35 -070096 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040097 while True:
98 chunk = f.read(DISK_FILE_CHUNK)
99 if not chunk:
100 break
101 digest.update(chunk)
102 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400103
104
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400105class IsolatedFile(object):
106 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700107
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400108 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400109 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400110 self.obj_hash = obj_hash
111 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400112
113 # Raw data.
114 self.data = {}
115 # A IsolatedFile instance, one per object in self.includes.
116 self.children = []
117
118 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700119 self._is_loaded = False
120
121 def __repr__(self):
122 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400123
124 def load(self, content):
125 """Verifies the .isolated file is valid and loads this object with the json
126 data.
127 """
128 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700129 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400130 self.data = load_isolated(content, self.algo)
131 self.children = [
132 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
133 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700134 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400135
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700136 @property
137 def is_loaded(self):
138 """Returns True if 'load' was already called."""
139 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400140
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400141
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700142def walk_includes(isolated):
143 """Walks IsolatedFile include graph and yields IsolatedFile objects.
144
145 Visits root node first, then recursively all children, left to right.
146 Not yet loaded nodes are considered childless.
147 """
148 yield isolated
149 for child in isolated.children:
150 for x in walk_includes(child):
151 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400152
153
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700154@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400155def expand_symlinks(indir, relfile):
156 """Follows symlinks in |relfile|, but treating symlinks that point outside the
157 build tree as if they were ordinary directories/files. Returns the final
158 symlink-free target and a list of paths to symlinks encountered in the
159 process.
160
161 The rule about symlinks outside the build tree is for the benefit of the
162 Chromium OS ebuild, which symlinks the output directory to an unrelated path
163 in the chroot.
164
165 Fails when a directory loop is detected, although in theory we could support
166 that case.
167 """
168 is_directory = relfile.endswith(os.path.sep)
169 done = indir
170 todo = relfile.strip(os.path.sep)
171 symlinks = []
172
173 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700174 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400175 if not symlink:
176 todo = file_path.fix_native_path_case(done, todo)
177 done = os.path.join(done, todo)
178 break
179 symlink_path = os.path.join(done, pre_symlink, symlink)
180 post_symlink = post_symlink.lstrip(os.path.sep)
181 # readlink doesn't exist on Windows.
182 # pylint: disable=E1101
183 target = os.path.normpath(os.path.join(done, pre_symlink))
184 symlink_target = os.readlink(symlink_path)
185 if os.path.isabs(symlink_target):
186 # Absolute path are considered a normal directories. The use case is
187 # generally someone who puts the output directory on a separate drive.
188 target = symlink_target
189 else:
190 # The symlink itself could be using the wrong path case.
191 target = file_path.fix_native_path_case(target, symlink_target)
192
193 if not os.path.exists(target):
194 raise MappingError(
195 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
196 target = file_path.get_native_path_case(target)
197 if not file_path.path_starts_with(indir, target):
198 done = symlink_path
199 todo = post_symlink
200 continue
201 if file_path.path_starts_with(target, symlink_path):
202 raise MappingError(
203 'Can\'t map recursive symlink reference %s -> %s' %
204 (symlink_path, target))
205 logging.info('Found symlink: %s -> %s', symlink_path, target)
206 symlinks.append(os.path.relpath(symlink_path, indir))
207 # Treat the common prefix of the old and new paths as done, and start
208 # scanning again.
209 target = target.split(os.path.sep)
210 symlink_path = symlink_path.split(os.path.sep)
211 prefix_length = 0
212 for target_piece, symlink_path_piece in zip(target, symlink_path):
213 if target_piece == symlink_path_piece:
214 prefix_length += 1
215 else:
216 break
217 done = os.path.sep.join(target[:prefix_length])
218 todo = os.path.join(
219 os.path.sep.join(target[prefix_length:]), post_symlink)
220
221 relfile = os.path.relpath(done, indir)
222 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
223 return relfile, symlinks
224
225
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700226@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400227def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
228 """Expands a single input. It can result in multiple outputs.
229
230 This function is recursive when relfile is a directory.
231
232 Note: this code doesn't properly handle recursive symlink like one created
233 with:
234 ln -s .. foo
235 """
236 if os.path.isabs(relfile):
237 raise MappingError('Can\'t map absolute path %s' % relfile)
238
239 infile = file_path.normpath(os.path.join(indir, relfile))
240 if not infile.startswith(indir):
241 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
242
243 filepath = os.path.join(indir, relfile)
244 native_filepath = file_path.get_native_path_case(filepath)
245 if filepath != native_filepath:
246 # Special case './'.
247 if filepath != native_filepath + '.' + os.path.sep:
248 # While it'd be nice to enforce path casing on Windows, it's impractical.
249 # Also give up enforcing strict path case on OSX. Really, it's that sad.
250 # The case where it happens is very specific and hard to reproduce:
251 # get_native_path_case(
252 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
253 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
254 #
255 # Note that this is really something deep in OSX because running
256 # ls Foo.framework/Versions/A
257 # will print out 'Resources', while file_path.get_native_path_case()
258 # returns a lower case 'r'.
259 #
260 # So *something* is happening under the hood resulting in the command 'ls'
261 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
262 # have no idea why.
263 if sys.platform not in ('darwin', 'win32'):
264 raise MappingError(
265 'File path doesn\'t equal native file path\n%s != %s' %
266 (filepath, native_filepath))
267
268 symlinks = []
269 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500270 try:
271 relfile, symlinks = expand_symlinks(indir, relfile)
272 except OSError:
273 # The file doesn't exist, it will throw below.
274 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400275
276 if relfile.endswith(os.path.sep):
277 if not os.path.isdir(infile):
278 raise MappingError(
279 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
280
281 # Special case './'.
282 if relfile.startswith('.' + os.path.sep):
283 relfile = relfile[2:]
284 outfiles = symlinks
285 try:
maruel12e30012015-10-09 11:55:35 -0700286 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400287 inner_relfile = os.path.join(relfile, filename)
288 if blacklist and blacklist(inner_relfile):
289 continue
290 if os.path.isdir(os.path.join(indir, inner_relfile)):
291 inner_relfile += os.path.sep
292 outfiles.extend(
293 expand_directory_and_symlink(indir, inner_relfile, blacklist,
294 follow_symlinks))
295 return outfiles
296 except OSError as e:
297 raise MappingError(
298 'Unable to iterate over directory %s.\n%s' % (infile, e))
299 else:
300 # Always add individual files even if they were blacklisted.
301 if os.path.isdir(infile):
302 raise MappingError(
303 'Input directory %s must have a trailing slash' % infile)
304
305 if not os.path.isfile(infile):
306 raise MappingError('Input file %s doesn\'t exist' % infile)
307
308 return symlinks + [relfile]
309
310
311def expand_directories_and_symlinks(
312 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
313 """Expands the directories and the symlinks, applies the blacklist and
314 verifies files exist.
315
316 Files are specified in os native path separator.
317 """
318 outfiles = []
319 for relfile in infiles:
320 try:
321 outfiles.extend(
322 expand_directory_and_symlink(
323 indir, relfile, blacklist, follow_symlinks))
324 except MappingError as e:
325 if not ignore_broken_items:
326 raise
327 logging.info('warning: %s', e)
328 return outfiles
329
330
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700331@tools.profile
kjlubick80596f02017-04-28 08:13:19 -0700332def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400333 """Processes an input file, a dependency, and return meta data about it.
334
335 Behaviors:
336 - Retrieves the file mode, file size, file timestamp, file link
337 destination if it is a file link and calcultate the SHA-1 of the file's
338 content if the path points to a file and not a symlink.
339
340 Arguments:
341 filepath: File to act on.
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400342 prevdict: the previous dictionary. It is used to retrieve the cached hash
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400343 to skip recalculating the hash. Optional.
344 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
345 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
346 windows, mode is not set since all files are 'executable' by
347 default.
348 algo: Hashing algorithm used.
kjlubick80596f02017-04-28 08:13:19 -0700349 collapse_symlinks: True if symlinked files should be treated like they were
350 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400351
352 Returns:
353 The necessary dict to create a entry in the 'files' section of an .isolated
354 file.
355 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500356 # TODO(maruel): None is not a valid value.
357 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400358 out = {}
359 # Always check the file stat and check if it is a link. The timestamp is used
360 # to know if the file's content/symlink destination should be looked into.
361 # E.g. only reuse from prevdict if the timestamp hasn't changed.
362 # There is the risk of the file's timestamp being reset to its last value
363 # manually while its content changed. We don't protect against that use case.
364 try:
kjlubick80596f02017-04-28 08:13:19 -0700365 if collapse_symlinks:
366 # os.stat follows symbolic links
367 filestats = os.stat(filepath)
368 else:
369 # os.lstat does not follow symbolic links, and thus preserves them.
370 filestats = os.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400371 except OSError:
372 # The file is not present.
373 raise MappingError('%s is missing' % filepath)
374 is_link = stat.S_ISLNK(filestats.st_mode)
375
376 if sys.platform != 'win32':
377 # Ignore file mode on Windows since it's not really useful there.
378 filemode = stat.S_IMODE(filestats.st_mode)
379 # Remove write access for group and all access to 'others'.
380 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
381 if read_only:
382 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500383 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
384 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400385 filemode |= stat.S_IXGRP
386 else:
387 filemode &= ~stat.S_IXGRP
388 if not is_link:
389 out['m'] = filemode
390
391 # Used to skip recalculating the hash or link destination. Use the most recent
392 # update time.
393 out['t'] = int(round(filestats.st_mtime))
394
395 if not is_link:
396 out['s'] = filestats.st_size
397 # If the timestamp wasn't updated and the file size is still the same, carry
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400398 # on the hash.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400399 if (prevdict.get('t') == out['t'] and
400 prevdict.get('s') == out['s']):
401 # Reuse the previous hash if available.
402 out['h'] = prevdict.get('h')
403 if not out.get('h'):
404 out['h'] = hash_file(filepath, algo)
405 else:
406 # If the timestamp wasn't updated, carry on the link destination.
407 if prevdict.get('t') == out['t']:
408 # Reuse the previous link destination if available.
409 out['l'] = prevdict.get('l')
410 if out.get('l') is None:
411 # The link could be in an incorrect path case. In practice, this only
412 # happen on OSX on case insensitive HFS.
413 # TODO(maruel): It'd be better if it was only done once, in
414 # expand_directory_and_symlink(), so it would not be necessary to do again
415 # here.
416 symlink_value = os.readlink(filepath) # pylint: disable=E1101
417 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
418 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
419 out['l'] = os.path.relpath(native_dest, filedir)
420 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400421
422
423def save_isolated(isolated, data):
424 """Writes one or multiple .isolated files.
425
426 Note: this reference implementation does not create child .isolated file so it
427 always returns an empty list.
428
429 Returns the list of child isolated files that are included by |isolated|.
430 """
431 # Make sure the data is valid .isolated data by 'reloading' it.
432 algo = SUPPORTED_ALGOS[data['algo']]
433 load_isolated(json.dumps(data), algo)
434 tools.write_json(isolated, data, True)
435 return []
436
437
marueldf6e95e2016-02-26 19:05:38 -0800438def split_path(path):
439 """Splits a path and return a list with each element."""
440 out = []
441 while path:
442 path, rest = os.path.split(path)
443 if rest:
444 out.append(rest)
445 return out
446
447
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400448def load_isolated(content, algo):
449 """Verifies the .isolated file is valid and loads this object with the json
450 data.
451
452 Arguments:
453 - content: raw serialized content to load.
454 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
455 algorithm used on the Isolate Server.
456 """
Marc-Antoine Ruel5da404c2017-10-31 10:46:37 -0400457 if not algo:
458 raise IsolatedError('\'algo\' is required')
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400459 try:
460 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700461 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400462 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700463 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400464
465 if not isinstance(data, dict):
466 raise IsolatedError('Expected dict, got %r' % data)
467
468 # Check 'version' first, since it could modify the parsing after.
469 value = data.get('version', '1.0')
470 if not isinstance(value, basestring):
471 raise IsolatedError('Expected string, got %r' % value)
472 try:
473 version = tuple(map(int, value.split('.')))
474 except ValueError:
475 raise IsolatedError('Expected valid version, got %r' % value)
476
477 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
478 # Major version must match.
479 if version[0] != expected_version[0]:
480 raise IsolatedError(
481 'Expected compatible \'%s\' version, got %r' %
482 (ISOLATED_FILE_VERSION, value))
483
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400484 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
485
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400486 for key, value in data.iteritems():
487 if key == 'algo':
488 if not isinstance(value, basestring):
489 raise IsolatedError('Expected string, got %r' % value)
490 if value not in SUPPORTED_ALGOS:
491 raise IsolatedError(
492 'Expected one of \'%s\', got %r' %
493 (', '.join(sorted(SUPPORTED_ALGOS)), value))
494 if value != SUPPORTED_ALGOS_REVERSE[algo]:
495 raise IsolatedError(
496 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
497
498 elif key == 'command':
499 if not isinstance(value, list):
500 raise IsolatedError('Expected list, got %r' % value)
501 if not value:
502 raise IsolatedError('Expected non-empty command')
503 for subvalue in value:
504 if not isinstance(subvalue, basestring):
505 raise IsolatedError('Expected string, got %r' % subvalue)
506
507 elif key == 'files':
508 if not isinstance(value, dict):
509 raise IsolatedError('Expected dict, got %r' % value)
510 for subkey, subvalue in value.iteritems():
511 if not isinstance(subkey, basestring):
512 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800513 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
514 # Disallow '\\\\', it could UNC on Windows but disallow this
515 # everywhere.
516 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
517 if subkey.endswith(('/', '\\')):
518 raise IsolatedError(
519 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
520 if '..' in split_path(subkey):
521 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400522 if not isinstance(subvalue, dict):
523 raise IsolatedError('Expected dict, got %r' % subvalue)
524 for subsubkey, subsubvalue in subvalue.iteritems():
525 if subsubkey == 'l':
526 if not isinstance(subsubvalue, basestring):
527 raise IsolatedError('Expected string, got %r' % subsubvalue)
528 elif subsubkey == 'm':
529 if not isinstance(subsubvalue, int):
530 raise IsolatedError('Expected int, got %r' % subsubvalue)
531 elif subsubkey == 'h':
532 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400533 raise IsolatedError('Expected %s, got %r' %
534 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400535 elif subsubkey == 's':
536 if not isinstance(subsubvalue, (int, long)):
537 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700538 elif subsubkey == 't':
539 if subsubvalue not in SUPPORTED_FILE_TYPES:
540 raise IsolatedError('Expected one of \'%s\', got %r' % (
541 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400542 else:
543 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
544 if bool('h' in subvalue) == bool('l' in subvalue):
545 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400546 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
547 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400548 if bool('h' in subvalue) != bool('s' in subvalue):
549 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400550 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
551 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400552 if bool('s' in subvalue) == bool('l' in subvalue):
553 raise IsolatedError(
554 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
555 subvalue)
556 if bool('l' in subvalue) and bool('m' in subvalue):
557 raise IsolatedError(
558 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
559 subvalue)
560
561 elif key == 'includes':
562 if not isinstance(value, list):
563 raise IsolatedError('Expected list, got %r' % value)
564 if not value:
565 raise IsolatedError('Expected non-empty includes list')
566 for subvalue in value:
567 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400568 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400569
570 elif key == 'os':
571 if version >= (1, 4):
572 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
573
574 elif key == 'read_only':
575 if not value in (0, 1, 2):
576 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
577
578 elif key == 'relative_cwd':
579 if not isinstance(value, basestring):
580 raise IsolatedError('Expected string, got %r' % value)
581
582 elif key == 'version':
583 # Already checked above.
584 pass
585
586 else:
587 raise IsolatedError('Unknown key %r' % key)
588
589 # Automatically fix os.path.sep if necessary. While .isolated files are always
590 # in the the native path format, someone could want to download an .isolated
591 # tree from another OS.
592 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
593 if 'files' in data:
594 data['files'] = dict(
595 (k.replace(wrong_path_sep, os.path.sep), v)
596 for k, v in data['files'].iteritems())
597 for v in data['files'].itervalues():
598 if 'l' in v:
599 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
600 if 'relative_cwd' in data:
601 data['relative_cwd'] = data['relative_cwd'].replace(
602 wrong_path_sep, os.path.sep)
603 return data