blob: cde923fa74b729946dcc5fdf8a448f32888f34ec [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
maruel12e30012015-10-09 11:55:35 -070016from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040017from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
19
20# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080021ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040022
23
24# Chunk size to use when doing disk I/O.
25DISK_FILE_CHUNK = 1024 * 1024
26
27
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040028# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040029# specify the names here.
30SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040031 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040032 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040033 'sha-512': hashlib.sha512,
34}
35
36
37# Used for serialization.
38SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
39
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040040
41SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070042
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040043
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040044class IsolatedError(ValueError):
45 """Generic failure to load a .isolated file."""
46 pass
47
48
49class MappingError(OSError):
50 """Failed to recreate the tree."""
51 pass
52
53
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040054def is_valid_hash(value, algo):
55 """Returns if the value is a valid hash for the corresponding algorithm."""
56 size = 2 * algo().digest_size
57 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
58
59
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040060get_hash_algo_has_logged = False
61
62
63def get_hash_algo(namespace):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040064 """Return hash algorithm class to use when uploading to given |namespace|."""
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040065 global get_hash_algo_has_logged
66 chosen = None
67 for name, algo in SUPPORTED_ALGOS.iteritems():
68 if namespace.startswith(name + '-'):
69 chosen = algo
70 break
71
72 if not get_hash_algo_has_logged:
73 get_hash_algo_has_logged = True
74 if chosen:
75 logging.info('Using hash algo %s for namespace %s', chosen, namespace)
76 else:
77 logging.warn('No hash algo found in \'%s\', assuming sha-1', namespace)
78
79 if not chosen:
80 return hashlib.sha1
81
82 return chosen
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040083
84
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040085def is_namespace_with_compression(namespace):
86 """Returns True if given |namespace| stores compressed objects."""
87 return namespace.endswith(('-gzip', '-deflate'))
88
89
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040090def hash_file(filepath, algo):
91 """Calculates the hash of a file without reading it all in memory at once.
92
93 |algo| should be one of hashlib hashing algorithm.
94 """
95 digest = algo()
maruel12e30012015-10-09 11:55:35 -070096 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040097 while True:
98 chunk = f.read(DISK_FILE_CHUNK)
99 if not chunk:
100 break
101 digest.update(chunk)
102 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400103
104
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400105class IsolatedFile(object):
106 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700107
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400108 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400109 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400110 self.obj_hash = obj_hash
111 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400112
113 # Raw data.
114 self.data = {}
115 # A IsolatedFile instance, one per object in self.includes.
116 self.children = []
117
118 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700119 self._is_loaded = False
120
121 def __repr__(self):
122 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400123
124 def load(self, content):
125 """Verifies the .isolated file is valid and loads this object with the json
126 data.
127 """
128 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700129 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400130 self.data = load_isolated(content, self.algo)
131 self.children = [
132 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
133 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700134 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400135
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700136 @property
137 def is_loaded(self):
138 """Returns True if 'load' was already called."""
139 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400140
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400141
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700142def walk_includes(isolated):
143 """Walks IsolatedFile include graph and yields IsolatedFile objects.
144
145 Visits root node first, then recursively all children, left to right.
146 Not yet loaded nodes are considered childless.
147 """
148 yield isolated
149 for child in isolated.children:
150 for x in walk_includes(child):
151 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400152
153
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700154@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400155def expand_symlinks(indir, relfile):
156 """Follows symlinks in |relfile|, but treating symlinks that point outside the
157 build tree as if they were ordinary directories/files. Returns the final
158 symlink-free target and a list of paths to symlinks encountered in the
159 process.
160
161 The rule about symlinks outside the build tree is for the benefit of the
162 Chromium OS ebuild, which symlinks the output directory to an unrelated path
163 in the chroot.
164
165 Fails when a directory loop is detected, although in theory we could support
166 that case.
167 """
168 is_directory = relfile.endswith(os.path.sep)
169 done = indir
170 todo = relfile.strip(os.path.sep)
171 symlinks = []
172
173 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700174 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400175 if not symlink:
176 todo = file_path.fix_native_path_case(done, todo)
177 done = os.path.join(done, todo)
178 break
179 symlink_path = os.path.join(done, pre_symlink, symlink)
180 post_symlink = post_symlink.lstrip(os.path.sep)
181 # readlink doesn't exist on Windows.
182 # pylint: disable=E1101
183 target = os.path.normpath(os.path.join(done, pre_symlink))
184 symlink_target = os.readlink(symlink_path)
185 if os.path.isabs(symlink_target):
186 # Absolute path are considered a normal directories. The use case is
187 # generally someone who puts the output directory on a separate drive.
188 target = symlink_target
189 else:
190 # The symlink itself could be using the wrong path case.
191 target = file_path.fix_native_path_case(target, symlink_target)
192
193 if not os.path.exists(target):
194 raise MappingError(
195 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
196 target = file_path.get_native_path_case(target)
197 if not file_path.path_starts_with(indir, target):
198 done = symlink_path
199 todo = post_symlink
200 continue
201 if file_path.path_starts_with(target, symlink_path):
202 raise MappingError(
203 'Can\'t map recursive symlink reference %s -> %s' %
204 (symlink_path, target))
205 logging.info('Found symlink: %s -> %s', symlink_path, target)
206 symlinks.append(os.path.relpath(symlink_path, indir))
207 # Treat the common prefix of the old and new paths as done, and start
208 # scanning again.
209 target = target.split(os.path.sep)
210 symlink_path = symlink_path.split(os.path.sep)
211 prefix_length = 0
212 for target_piece, symlink_path_piece in zip(target, symlink_path):
213 if target_piece == symlink_path_piece:
214 prefix_length += 1
215 else:
216 break
217 done = os.path.sep.join(target[:prefix_length])
218 todo = os.path.join(
219 os.path.sep.join(target[prefix_length:]), post_symlink)
220
221 relfile = os.path.relpath(done, indir)
222 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
223 return relfile, symlinks
224
225
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700226@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400227def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
228 """Expands a single input. It can result in multiple outputs.
229
230 This function is recursive when relfile is a directory.
231
232 Note: this code doesn't properly handle recursive symlink like one created
233 with:
234 ln -s .. foo
235 """
236 if os.path.isabs(relfile):
237 raise MappingError('Can\'t map absolute path %s' % relfile)
238
239 infile = file_path.normpath(os.path.join(indir, relfile))
240 if not infile.startswith(indir):
241 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
242
243 filepath = os.path.join(indir, relfile)
244 native_filepath = file_path.get_native_path_case(filepath)
245 if filepath != native_filepath:
246 # Special case './'.
247 if filepath != native_filepath + '.' + os.path.sep:
248 # While it'd be nice to enforce path casing on Windows, it's impractical.
249 # Also give up enforcing strict path case on OSX. Really, it's that sad.
250 # The case where it happens is very specific and hard to reproduce:
251 # get_native_path_case(
252 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
253 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
254 #
255 # Note that this is really something deep in OSX because running
256 # ls Foo.framework/Versions/A
257 # will print out 'Resources', while file_path.get_native_path_case()
258 # returns a lower case 'r'.
259 #
260 # So *something* is happening under the hood resulting in the command 'ls'
261 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
262 # have no idea why.
263 if sys.platform not in ('darwin', 'win32'):
264 raise MappingError(
265 'File path doesn\'t equal native file path\n%s != %s' %
266 (filepath, native_filepath))
267
268 symlinks = []
269 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500270 try:
271 relfile, symlinks = expand_symlinks(indir, relfile)
272 except OSError:
273 # The file doesn't exist, it will throw below.
274 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400275
276 if relfile.endswith(os.path.sep):
277 if not os.path.isdir(infile):
278 raise MappingError(
279 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
280
281 # Special case './'.
282 if relfile.startswith('.' + os.path.sep):
283 relfile = relfile[2:]
284 outfiles = symlinks
285 try:
maruel12e30012015-10-09 11:55:35 -0700286 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400287 inner_relfile = os.path.join(relfile, filename)
288 if blacklist and blacklist(inner_relfile):
289 continue
290 if os.path.isdir(os.path.join(indir, inner_relfile)):
291 inner_relfile += os.path.sep
292 outfiles.extend(
293 expand_directory_and_symlink(indir, inner_relfile, blacklist,
294 follow_symlinks))
295 return outfiles
296 except OSError as e:
297 raise MappingError(
298 'Unable to iterate over directory %s.\n%s' % (infile, e))
299 else:
300 # Always add individual files even if they were blacklisted.
301 if os.path.isdir(infile):
302 raise MappingError(
303 'Input directory %s must have a trailing slash' % infile)
304
305 if not os.path.isfile(infile):
306 raise MappingError('Input file %s doesn\'t exist' % infile)
307
308 return symlinks + [relfile]
309
310
311def expand_directories_and_symlinks(
312 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
313 """Expands the directories and the symlinks, applies the blacklist and
314 verifies files exist.
315
316 Files are specified in os native path separator.
317 """
318 outfiles = []
319 for relfile in infiles:
320 try:
321 outfiles.extend(
322 expand_directory_and_symlink(
323 indir, relfile, blacklist, follow_symlinks))
324 except MappingError as e:
325 if not ignore_broken_items:
326 raise
327 logging.info('warning: %s', e)
328 return outfiles
329
330
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700331@tools.profile
kjlubick80596f02017-04-28 08:13:19 -0700332def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400333 """Processes an input file, a dependency, and return meta data about it.
334
335 Behaviors:
336 - Retrieves the file mode, file size, file timestamp, file link
337 destination if it is a file link and calcultate the SHA-1 of the file's
338 content if the path points to a file and not a symlink.
339
340 Arguments:
341 filepath: File to act on.
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400342 prevdict: the previous dictionary. It is used to retrieve the cached hash
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400343 to skip recalculating the hash. Optional.
344 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
345 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
346 windows, mode is not set since all files are 'executable' by
347 default.
348 algo: Hashing algorithm used.
kjlubick80596f02017-04-28 08:13:19 -0700349 collapse_symlinks: True if symlinked files should be treated like they were
350 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400351
352 Returns:
353 The necessary dict to create a entry in the 'files' section of an .isolated
354 file.
355 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500356 # TODO(maruel): None is not a valid value.
357 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400358 out = {}
359 # Always check the file stat and check if it is a link. The timestamp is used
360 # to know if the file's content/symlink destination should be looked into.
361 # E.g. only reuse from prevdict if the timestamp hasn't changed.
362 # There is the risk of the file's timestamp being reset to its last value
363 # manually while its content changed. We don't protect against that use case.
364 try:
kjlubick80596f02017-04-28 08:13:19 -0700365 if collapse_symlinks:
366 # os.stat follows symbolic links
367 filestats = os.stat(filepath)
368 else:
369 # os.lstat does not follow symbolic links, and thus preserves them.
370 filestats = os.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400371 except OSError:
372 # The file is not present.
373 raise MappingError('%s is missing' % filepath)
374 is_link = stat.S_ISLNK(filestats.st_mode)
375
376 if sys.platform != 'win32':
377 # Ignore file mode on Windows since it's not really useful there.
378 filemode = stat.S_IMODE(filestats.st_mode)
379 # Remove write access for group and all access to 'others'.
380 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
381 if read_only:
382 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500383 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
384 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400385 filemode |= stat.S_IXGRP
386 else:
387 filemode &= ~stat.S_IXGRP
388 if not is_link:
389 out['m'] = filemode
390
391 # Used to skip recalculating the hash or link destination. Use the most recent
392 # update time.
393 out['t'] = int(round(filestats.st_mtime))
394
395 if not is_link:
396 out['s'] = filestats.st_size
397 # If the timestamp wasn't updated and the file size is still the same, carry
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400398 # on the hash.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400399 if (prevdict.get('t') == out['t'] and
400 prevdict.get('s') == out['s']):
401 # Reuse the previous hash if available.
402 out['h'] = prevdict.get('h')
403 if not out.get('h'):
404 out['h'] = hash_file(filepath, algo)
405 else:
406 # If the timestamp wasn't updated, carry on the link destination.
407 if prevdict.get('t') == out['t']:
408 # Reuse the previous link destination if available.
409 out['l'] = prevdict.get('l')
410 if out.get('l') is None:
411 # The link could be in an incorrect path case. In practice, this only
412 # happen on OSX on case insensitive HFS.
413 # TODO(maruel): It'd be better if it was only done once, in
414 # expand_directory_and_symlink(), so it would not be necessary to do again
415 # here.
416 symlink_value = os.readlink(filepath) # pylint: disable=E1101
417 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
418 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
419 out['l'] = os.path.relpath(native_dest, filedir)
420 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400421
422
423def save_isolated(isolated, data):
424 """Writes one or multiple .isolated files.
425
426 Note: this reference implementation does not create child .isolated file so it
427 always returns an empty list.
428
429 Returns the list of child isolated files that are included by |isolated|.
430 """
431 # Make sure the data is valid .isolated data by 'reloading' it.
432 algo = SUPPORTED_ALGOS[data['algo']]
433 load_isolated(json.dumps(data), algo)
434 tools.write_json(isolated, data, True)
435 return []
436
437
marueldf6e95e2016-02-26 19:05:38 -0800438def split_path(path):
439 """Splits a path and return a list with each element."""
440 out = []
441 while path:
442 path, rest = os.path.split(path)
443 if rest:
444 out.append(rest)
445 return out
446
447
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400448def load_isolated(content, algo):
449 """Verifies the .isolated file is valid and loads this object with the json
450 data.
451
452 Arguments:
453 - content: raw serialized content to load.
454 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
455 algorithm used on the Isolate Server.
456 """
457 try:
458 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700459 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400460 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700461 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400462
463 if not isinstance(data, dict):
464 raise IsolatedError('Expected dict, got %r' % data)
465
466 # Check 'version' first, since it could modify the parsing after.
467 value = data.get('version', '1.0')
468 if not isinstance(value, basestring):
469 raise IsolatedError('Expected string, got %r' % value)
470 try:
471 version = tuple(map(int, value.split('.')))
472 except ValueError:
473 raise IsolatedError('Expected valid version, got %r' % value)
474
475 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
476 # Major version must match.
477 if version[0] != expected_version[0]:
478 raise IsolatedError(
479 'Expected compatible \'%s\' version, got %r' %
480 (ISOLATED_FILE_VERSION, value))
481
482 if algo is None:
483 # TODO(maruel): Remove the default around Jan 2014.
484 # Default the algorithm used in the .isolated file itself, falls back to
485 # 'sha-1' if unspecified.
486 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
487
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400488 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
489
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400490 for key, value in data.iteritems():
491 if key == 'algo':
492 if not isinstance(value, basestring):
493 raise IsolatedError('Expected string, got %r' % value)
494 if value not in SUPPORTED_ALGOS:
495 raise IsolatedError(
496 'Expected one of \'%s\', got %r' %
497 (', '.join(sorted(SUPPORTED_ALGOS)), value))
498 if value != SUPPORTED_ALGOS_REVERSE[algo]:
499 raise IsolatedError(
500 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
501
502 elif key == 'command':
503 if not isinstance(value, list):
504 raise IsolatedError('Expected list, got %r' % value)
505 if not value:
506 raise IsolatedError('Expected non-empty command')
507 for subvalue in value:
508 if not isinstance(subvalue, basestring):
509 raise IsolatedError('Expected string, got %r' % subvalue)
510
511 elif key == 'files':
512 if not isinstance(value, dict):
513 raise IsolatedError('Expected dict, got %r' % value)
514 for subkey, subvalue in value.iteritems():
515 if not isinstance(subkey, basestring):
516 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800517 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
518 # Disallow '\\\\', it could UNC on Windows but disallow this
519 # everywhere.
520 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
521 if subkey.endswith(('/', '\\')):
522 raise IsolatedError(
523 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
524 if '..' in split_path(subkey):
525 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400526 if not isinstance(subvalue, dict):
527 raise IsolatedError('Expected dict, got %r' % subvalue)
528 for subsubkey, subsubvalue in subvalue.iteritems():
529 if subsubkey == 'l':
530 if not isinstance(subsubvalue, basestring):
531 raise IsolatedError('Expected string, got %r' % subsubvalue)
532 elif subsubkey == 'm':
533 if not isinstance(subsubvalue, int):
534 raise IsolatedError('Expected int, got %r' % subsubvalue)
535 elif subsubkey == 'h':
536 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400537 raise IsolatedError('Expected %s, got %r' %
538 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400539 elif subsubkey == 's':
540 if not isinstance(subsubvalue, (int, long)):
541 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700542 elif subsubkey == 't':
543 if subsubvalue not in SUPPORTED_FILE_TYPES:
544 raise IsolatedError('Expected one of \'%s\', got %r' % (
545 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400546 else:
547 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
548 if bool('h' in subvalue) == bool('l' in subvalue):
549 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400550 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
551 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400552 if bool('h' in subvalue) != bool('s' in subvalue):
553 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400554 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
555 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400556 if bool('s' in subvalue) == bool('l' in subvalue):
557 raise IsolatedError(
558 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
559 subvalue)
560 if bool('l' in subvalue) and bool('m' in subvalue):
561 raise IsolatedError(
562 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
563 subvalue)
564
565 elif key == 'includes':
566 if not isinstance(value, list):
567 raise IsolatedError('Expected list, got %r' % value)
568 if not value:
569 raise IsolatedError('Expected non-empty includes list')
570 for subvalue in value:
571 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400572 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400573
574 elif key == 'os':
575 if version >= (1, 4):
576 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
577
578 elif key == 'read_only':
579 if not value in (0, 1, 2):
580 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
581
582 elif key == 'relative_cwd':
583 if not isinstance(value, basestring):
584 raise IsolatedError('Expected string, got %r' % value)
585
586 elif key == 'version':
587 # Already checked above.
588 pass
589
590 else:
591 raise IsolatedError('Unknown key %r' % key)
592
593 # Automatically fix os.path.sep if necessary. While .isolated files are always
594 # in the the native path format, someone could want to download an .isolated
595 # tree from another OS.
596 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
597 if 'files' in data:
598 data['files'] = dict(
599 (k.replace(wrong_path_sep, os.path.sep), v)
600 for k, v in data['files'].iteritems())
601 for v in data['files'].itervalues():
602 if 'l' in v:
603 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
604 if 'relative_cwd' in data:
605 data['relative_cwd'] = data['relative_cwd'].replace(
606 wrong_path_sep, os.path.sep)
607 return data