blob: bc10c89839baff0a0c844aad6ca2b0ba908770e2 [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
maruel12e30012015-10-09 11:55:35 -070016from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040017from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
19
20# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080021ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040022
23
24# Chunk size to use when doing disk I/O.
25DISK_FILE_CHUNK = 1024 * 1024
26
27
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040028# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040029# specify the names here.
30SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040031 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040032 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040033 'sha-512': hashlib.sha512,
34}
35
36
37# Used for serialization.
38SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
39
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040040
41SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070042
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040043
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040044class IsolatedError(ValueError):
45 """Generic failure to load a .isolated file."""
46 pass
47
48
49class MappingError(OSError):
50 """Failed to recreate the tree."""
51 pass
52
53
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040054def is_valid_hash(value, algo):
55 """Returns if the value is a valid hash for the corresponding algorithm."""
56 size = 2 * algo().digest_size
57 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
58
59
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040060def hash_file(filepath, algo):
61 """Calculates the hash of a file without reading it all in memory at once.
62
63 |algo| should be one of hashlib hashing algorithm.
64 """
65 digest = algo()
maruel12e30012015-10-09 11:55:35 -070066 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040067 while True:
68 chunk = f.read(DISK_FILE_CHUNK)
69 if not chunk:
70 break
71 digest.update(chunk)
72 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040073
74
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040075class IsolatedFile(object):
76 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070077
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040078 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040079 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040080 self.obj_hash = obj_hash
81 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040082
83 # Raw data.
84 self.data = {}
85 # A IsolatedFile instance, one per object in self.includes.
86 self.children = []
87
88 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070089 self._is_loaded = False
90
91 def __repr__(self):
92 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040093
94 def load(self, content):
95 """Verifies the .isolated file is valid and loads this object with the json
96 data.
97 """
98 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070099 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400100 self.data = load_isolated(content, self.algo)
101 self.children = [
102 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
103 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700104 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400105
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700106 @property
107 def is_loaded(self):
108 """Returns True if 'load' was already called."""
109 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400110
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400111
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700112def walk_includes(isolated):
113 """Walks IsolatedFile include graph and yields IsolatedFile objects.
114
115 Visits root node first, then recursively all children, left to right.
116 Not yet loaded nodes are considered childless.
117 """
118 yield isolated
119 for child in isolated.children:
120 for x in walk_includes(child):
121 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400122
123
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700124@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400125def expand_symlinks(indir, relfile):
126 """Follows symlinks in |relfile|, but treating symlinks that point outside the
127 build tree as if they were ordinary directories/files. Returns the final
128 symlink-free target and a list of paths to symlinks encountered in the
129 process.
130
131 The rule about symlinks outside the build tree is for the benefit of the
132 Chromium OS ebuild, which symlinks the output directory to an unrelated path
133 in the chroot.
134
135 Fails when a directory loop is detected, although in theory we could support
136 that case.
137 """
138 is_directory = relfile.endswith(os.path.sep)
139 done = indir
140 todo = relfile.strip(os.path.sep)
141 symlinks = []
142
143 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700144 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400145 if not symlink:
146 todo = file_path.fix_native_path_case(done, todo)
147 done = os.path.join(done, todo)
148 break
149 symlink_path = os.path.join(done, pre_symlink, symlink)
150 post_symlink = post_symlink.lstrip(os.path.sep)
151 # readlink doesn't exist on Windows.
152 # pylint: disable=E1101
153 target = os.path.normpath(os.path.join(done, pre_symlink))
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500154 symlink_target = fs.readlink(symlink_path)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400155 if os.path.isabs(symlink_target):
156 # Absolute path are considered a normal directories. The use case is
157 # generally someone who puts the output directory on a separate drive.
158 target = symlink_target
159 else:
160 # The symlink itself could be using the wrong path case.
161 target = file_path.fix_native_path_case(target, symlink_target)
162
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500163 if not fs.exists(target):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400164 raise MappingError(
165 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
166 target = file_path.get_native_path_case(target)
167 if not file_path.path_starts_with(indir, target):
168 done = symlink_path
169 todo = post_symlink
170 continue
171 if file_path.path_starts_with(target, symlink_path):
172 raise MappingError(
173 'Can\'t map recursive symlink reference %s -> %s' %
174 (symlink_path, target))
175 logging.info('Found symlink: %s -> %s', symlink_path, target)
176 symlinks.append(os.path.relpath(symlink_path, indir))
177 # Treat the common prefix of the old and new paths as done, and start
178 # scanning again.
179 target = target.split(os.path.sep)
180 symlink_path = symlink_path.split(os.path.sep)
181 prefix_length = 0
182 for target_piece, symlink_path_piece in zip(target, symlink_path):
183 if target_piece == symlink_path_piece:
184 prefix_length += 1
185 else:
186 break
187 done = os.path.sep.join(target[:prefix_length])
188 todo = os.path.join(
189 os.path.sep.join(target[prefix_length:]), post_symlink)
190
191 relfile = os.path.relpath(done, indir)
192 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
193 return relfile, symlinks
194
195
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700196@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400197def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
198 """Expands a single input. It can result in multiple outputs.
199
200 This function is recursive when relfile is a directory.
201
202 Note: this code doesn't properly handle recursive symlink like one created
203 with:
204 ln -s .. foo
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000205
206 Yields:
207 Relative file paths inside the directory.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400208 """
209 if os.path.isabs(relfile):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500210 raise MappingError(u'Can\'t map absolute path %s' % relfile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400211
212 infile = file_path.normpath(os.path.join(indir, relfile))
213 if not infile.startswith(indir):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500214 raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400215
216 filepath = os.path.join(indir, relfile)
217 native_filepath = file_path.get_native_path_case(filepath)
218 if filepath != native_filepath:
219 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500220 if filepath != native_filepath + u'.' + os.path.sep:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400221 # While it'd be nice to enforce path casing on Windows, it's impractical.
222 # Also give up enforcing strict path case on OSX. Really, it's that sad.
223 # The case where it happens is very specific and hard to reproduce:
224 # get_native_path_case(
225 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
226 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
227 #
228 # Note that this is really something deep in OSX because running
229 # ls Foo.framework/Versions/A
230 # will print out 'Resources', while file_path.get_native_path_case()
231 # returns a lower case 'r'.
232 #
233 # So *something* is happening under the hood resulting in the command 'ls'
234 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
235 # have no idea why.
236 if sys.platform not in ('darwin', 'win32'):
237 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500238 u'File path doesn\'t equal native file path\n%s != %s' %
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400239 (filepath, native_filepath))
240
241 symlinks = []
242 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500243 try:
244 relfile, symlinks = expand_symlinks(indir, relfile)
245 except OSError:
246 # The file doesn't exist, it will throw below.
247 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400248
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000249 for s in symlinks:
250 yield s
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400251 if relfile.endswith(os.path.sep):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500252 if not fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400253 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500254 u'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400255
256 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500257 if relfile.startswith(u'.' + os.path.sep):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400258 relfile = relfile[2:]
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400259 try:
maruel12e30012015-10-09 11:55:35 -0700260 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400261 inner_relfile = os.path.join(relfile, filename)
262 if blacklist and blacklist(inner_relfile):
263 continue
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500264 if fs.isdir(os.path.join(indir, inner_relfile)):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400265 inner_relfile += os.path.sep
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000266 # Apply recursively.
267 for i in expand_directory_and_symlink(
268 indir, inner_relfile, blacklist, follow_symlinks):
269 yield i
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400270 except OSError as e:
271 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500272 u'Unable to iterate over directory %s.\n%s' % (infile, e))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400273 else:
274 # Always add individual files even if they were blacklisted.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500275 if fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400276 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500277 u'Input directory %s must have a trailing slash' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400278
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500279 if not fs.isfile(infile):
280 raise MappingError(u'Input file %s doesn\'t exist' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400281
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000282 yield relfile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400283
284
285def expand_directories_and_symlinks(
286 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
287 """Expands the directories and the symlinks, applies the blacklist and
288 verifies files exist.
289
290 Files are specified in os native path separator.
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000291
292 Yields:
293 Relative file path of each file inside every directory specified.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400294 """
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400295 for relfile in infiles:
296 try:
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000297 for i in expand_directory_and_symlink(
298 indir, relfile, blacklist, follow_symlinks):
299 yield i
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400300 except MappingError as e:
301 if not ignore_broken_items:
302 raise
303 logging.info('warning: %s', e)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400304
305
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700306@tools.profile
kjlubick80596f02017-04-28 08:13:19 -0700307def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400308 """Processes an input file, a dependency, and return meta data about it.
309
310 Behaviors:
311 - Retrieves the file mode, file size, file timestamp, file link
312 destination if it is a file link and calcultate the SHA-1 of the file's
313 content if the path points to a file and not a symlink.
314
315 Arguments:
316 filepath: File to act on.
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400317 prevdict: the previous dictionary. It is used to retrieve the cached hash
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400318 to skip recalculating the hash. Optional.
319 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
320 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
321 windows, mode is not set since all files are 'executable' by
322 default.
323 algo: Hashing algorithm used.
kjlubick80596f02017-04-28 08:13:19 -0700324 collapse_symlinks: True if symlinked files should be treated like they were
325 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400326
327 Returns:
328 The necessary dict to create a entry in the 'files' section of an .isolated
329 file.
330 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500331 # TODO(maruel): None is not a valid value.
332 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400333 out = {}
334 # Always check the file stat and check if it is a link. The timestamp is used
335 # to know if the file's content/symlink destination should be looked into.
336 # E.g. only reuse from prevdict if the timestamp hasn't changed.
337 # There is the risk of the file's timestamp being reset to its last value
338 # manually while its content changed. We don't protect against that use case.
339 try:
kjlubick80596f02017-04-28 08:13:19 -0700340 if collapse_symlinks:
341 # os.stat follows symbolic links
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500342 filestats = fs.stat(filepath)
kjlubick80596f02017-04-28 08:13:19 -0700343 else:
344 # os.lstat does not follow symbolic links, and thus preserves them.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500345 filestats = fs.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400346 except OSError:
347 # The file is not present.
348 raise MappingError('%s is missing' % filepath)
349 is_link = stat.S_ISLNK(filestats.st_mode)
350
351 if sys.platform != 'win32':
352 # Ignore file mode on Windows since it's not really useful there.
353 filemode = stat.S_IMODE(filestats.st_mode)
354 # Remove write access for group and all access to 'others'.
355 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
356 if read_only:
357 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500358 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
359 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400360 filemode |= stat.S_IXGRP
361 else:
362 filemode &= ~stat.S_IXGRP
363 if not is_link:
364 out['m'] = filemode
365
366 # Used to skip recalculating the hash or link destination. Use the most recent
367 # update time.
368 out['t'] = int(round(filestats.st_mtime))
369
370 if not is_link:
371 out['s'] = filestats.st_size
372 # If the timestamp wasn't updated and the file size is still the same, carry
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400373 # on the hash.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400374 if (prevdict.get('t') == out['t'] and
375 prevdict.get('s') == out['s']):
376 # Reuse the previous hash if available.
377 out['h'] = prevdict.get('h')
378 if not out.get('h'):
379 out['h'] = hash_file(filepath, algo)
380 else:
381 # If the timestamp wasn't updated, carry on the link destination.
382 if prevdict.get('t') == out['t']:
383 # Reuse the previous link destination if available.
384 out['l'] = prevdict.get('l')
385 if out.get('l') is None:
386 # The link could be in an incorrect path case. In practice, this only
387 # happen on OSX on case insensitive HFS.
388 # TODO(maruel): It'd be better if it was only done once, in
389 # expand_directory_and_symlink(), so it would not be necessary to do again
390 # here.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500391 symlink_value = fs.readlink(filepath) # pylint: disable=E1101
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400392 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
393 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
394 out['l'] = os.path.relpath(native_dest, filedir)
395 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400396
397
398def save_isolated(isolated, data):
399 """Writes one or multiple .isolated files.
400
401 Note: this reference implementation does not create child .isolated file so it
402 always returns an empty list.
403
404 Returns the list of child isolated files that are included by |isolated|.
405 """
406 # Make sure the data is valid .isolated data by 'reloading' it.
407 algo = SUPPORTED_ALGOS[data['algo']]
408 load_isolated(json.dumps(data), algo)
409 tools.write_json(isolated, data, True)
410 return []
411
412
marueldf6e95e2016-02-26 19:05:38 -0800413def split_path(path):
414 """Splits a path and return a list with each element."""
415 out = []
416 while path:
417 path, rest = os.path.split(path)
418 if rest:
419 out.append(rest)
420 return out
421
422
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400423def load_isolated(content, algo):
424 """Verifies the .isolated file is valid and loads this object with the json
425 data.
426
427 Arguments:
428 - content: raw serialized content to load.
429 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
430 algorithm used on the Isolate Server.
431 """
Marc-Antoine Ruel5da404c2017-10-31 10:46:37 -0400432 if not algo:
433 raise IsolatedError('\'algo\' is required')
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400434 try:
435 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700436 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400437 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700438 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400439
440 if not isinstance(data, dict):
441 raise IsolatedError('Expected dict, got %r' % data)
442
443 # Check 'version' first, since it could modify the parsing after.
444 value = data.get('version', '1.0')
445 if not isinstance(value, basestring):
446 raise IsolatedError('Expected string, got %r' % value)
447 try:
448 version = tuple(map(int, value.split('.')))
449 except ValueError:
450 raise IsolatedError('Expected valid version, got %r' % value)
451
452 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
453 # Major version must match.
454 if version[0] != expected_version[0]:
455 raise IsolatedError(
456 'Expected compatible \'%s\' version, got %r' %
457 (ISOLATED_FILE_VERSION, value))
458
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400459 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
460
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400461 for key, value in data.iteritems():
462 if key == 'algo':
463 if not isinstance(value, basestring):
464 raise IsolatedError('Expected string, got %r' % value)
465 if value not in SUPPORTED_ALGOS:
466 raise IsolatedError(
467 'Expected one of \'%s\', got %r' %
468 (', '.join(sorted(SUPPORTED_ALGOS)), value))
469 if value != SUPPORTED_ALGOS_REVERSE[algo]:
470 raise IsolatedError(
471 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
472
473 elif key == 'command':
474 if not isinstance(value, list):
475 raise IsolatedError('Expected list, got %r' % value)
476 if not value:
477 raise IsolatedError('Expected non-empty command')
478 for subvalue in value:
479 if not isinstance(subvalue, basestring):
480 raise IsolatedError('Expected string, got %r' % subvalue)
481
482 elif key == 'files':
483 if not isinstance(value, dict):
484 raise IsolatedError('Expected dict, got %r' % value)
485 for subkey, subvalue in value.iteritems():
486 if not isinstance(subkey, basestring):
487 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800488 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
489 # Disallow '\\\\', it could UNC on Windows but disallow this
490 # everywhere.
491 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
492 if subkey.endswith(('/', '\\')):
493 raise IsolatedError(
494 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
495 if '..' in split_path(subkey):
496 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400497 if not isinstance(subvalue, dict):
498 raise IsolatedError('Expected dict, got %r' % subvalue)
499 for subsubkey, subsubvalue in subvalue.iteritems():
500 if subsubkey == 'l':
501 if not isinstance(subsubvalue, basestring):
502 raise IsolatedError('Expected string, got %r' % subsubvalue)
503 elif subsubkey == 'm':
504 if not isinstance(subsubvalue, int):
505 raise IsolatedError('Expected int, got %r' % subsubvalue)
506 elif subsubkey == 'h':
507 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400508 raise IsolatedError('Expected %s, got %r' %
509 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400510 elif subsubkey == 's':
511 if not isinstance(subsubvalue, (int, long)):
512 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700513 elif subsubkey == 't':
514 if subsubvalue not in SUPPORTED_FILE_TYPES:
515 raise IsolatedError('Expected one of \'%s\', got %r' % (
516 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400517 else:
518 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
519 if bool('h' in subvalue) == bool('l' in subvalue):
520 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400521 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
522 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400523 if bool('h' in subvalue) != bool('s' in subvalue):
524 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400525 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
526 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400527 if bool('s' in subvalue) == bool('l' in subvalue):
528 raise IsolatedError(
529 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
530 subvalue)
531 if bool('l' in subvalue) and bool('m' in subvalue):
532 raise IsolatedError(
533 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
534 subvalue)
535
536 elif key == 'includes':
537 if not isinstance(value, list):
538 raise IsolatedError('Expected list, got %r' % value)
539 if not value:
540 raise IsolatedError('Expected non-empty includes list')
541 for subvalue in value:
542 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400543 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400544
545 elif key == 'os':
546 if version >= (1, 4):
547 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
548
549 elif key == 'read_only':
550 if not value in (0, 1, 2):
551 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
552
553 elif key == 'relative_cwd':
554 if not isinstance(value, basestring):
555 raise IsolatedError('Expected string, got %r' % value)
556
557 elif key == 'version':
558 # Already checked above.
559 pass
560
561 else:
562 raise IsolatedError('Unknown key %r' % key)
563
564 # Automatically fix os.path.sep if necessary. While .isolated files are always
Marc-Antoine Ruelf674a582018-01-12 10:56:01 -0500565 # in the native path format, someone could want to download an .isolated tree
566 # from another OS.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400567 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
568 if 'files' in data:
569 data['files'] = dict(
570 (k.replace(wrong_path_sep, os.path.sep), v)
571 for k, v in data['files'].iteritems())
572 for v in data['files'].itervalues():
573 if 'l' in v:
574 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
575 if 'relative_cwd' in data:
576 data['relative_cwd'] = data['relative_cwd'].replace(
577 wrong_path_sep, os.path.sep)
578 return data