blob: 37cee200cb0a5d612386986a5b5028b34fa8f073 [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
maruel12e30012015-10-09 11:55:35 -070016from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040017from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040019# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080020ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040021
22
23# Chunk size to use when doing disk I/O.
24DISK_FILE_CHUNK = 1024 * 1024
25
26
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040027# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040028# specify the names here.
29SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040030 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040031 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040032 'sha-512': hashlib.sha512,
33}
34
35
36# Used for serialization.
Lei Leife202df2019-06-11 17:33:34 +000037SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.items())
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040038
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040039
40SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070041
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040042
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040043class IsolatedError(ValueError):
44 """Generic failure to load a .isolated file."""
45 pass
46
47
48class MappingError(OSError):
49 """Failed to recreate the tree."""
50 pass
51
52
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040053def is_valid_hash(value, algo):
54 """Returns if the value is a valid hash for the corresponding algorithm."""
55 size = 2 * algo().digest_size
56 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
57
58
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040059def hash_file(filepath, algo):
60 """Calculates the hash of a file without reading it all in memory at once.
61
62 |algo| should be one of hashlib hashing algorithm.
63 """
64 digest = algo()
maruel12e30012015-10-09 11:55:35 -070065 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040066 while True:
67 chunk = f.read(DISK_FILE_CHUNK)
68 if not chunk:
69 break
70 digest.update(chunk)
71 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040072
73
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040074class IsolatedFile(object):
75 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070076
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040077 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040078 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040079 self.obj_hash = obj_hash
80 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040081
82 # Raw data.
83 self.data = {}
84 # A IsolatedFile instance, one per object in self.includes.
85 self.children = []
86
87 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070088 self._is_loaded = False
89
90 def __repr__(self):
91 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040092
93 def load(self, content):
94 """Verifies the .isolated file is valid and loads this object with the json
95 data.
96 """
97 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070098 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040099 self.data = load_isolated(content, self.algo)
100 self.children = [
101 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
102 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700103 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400104
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700105 @property
106 def is_loaded(self):
107 """Returns True if 'load' was already called."""
108 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400109
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400110
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700111def walk_includes(isolated):
112 """Walks IsolatedFile include graph and yields IsolatedFile objects.
113
114 Visits root node first, then recursively all children, left to right.
115 Not yet loaded nodes are considered childless.
116 """
117 yield isolated
118 for child in isolated.children:
119 for x in walk_includes(child):
120 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400121
122
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700123@tools.profile
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000124def _expand_symlinks(indir, relfile):
125 """Finds symlinks in relfile.
126
127 Follows symlinks in |relfile|, but treating symlinks that point outside the
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400128 build tree as if they were ordinary directories/files. Returns the final
129 symlink-free target and a list of paths to symlinks encountered in the
130 process.
131
132 The rule about symlinks outside the build tree is for the benefit of the
133 Chromium OS ebuild, which symlinks the output directory to an unrelated path
134 in the chroot.
135
136 Fails when a directory loop is detected, although in theory we could support
137 that case.
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000138
139 Arguments:
140 - indir: base directory; symlinks in indir are not processed; this is
141 the base directory that is considered 'outside of the tree'.
142 - relfile: part of the path to expand symlink.
143
144 Returns:
145 tuple(relfile, list(symlinks)): relfile is real path of relfile where all
146 symlinks were evaluated. symlinks if the chain of symlinks found along the
147 way, if any.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400148 """
149 is_directory = relfile.endswith(os.path.sep)
150 done = indir
151 todo = relfile.strip(os.path.sep)
152 symlinks = []
153
154 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700155 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400156 if not symlink:
157 todo = file_path.fix_native_path_case(done, todo)
158 done = os.path.join(done, todo)
159 break
160 symlink_path = os.path.join(done, pre_symlink, symlink)
161 post_symlink = post_symlink.lstrip(os.path.sep)
162 # readlink doesn't exist on Windows.
163 # pylint: disable=E1101
164 target = os.path.normpath(os.path.join(done, pre_symlink))
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500165 symlink_target = fs.readlink(symlink_path)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400166 if os.path.isabs(symlink_target):
167 # Absolute path are considered a normal directories. The use case is
168 # generally someone who puts the output directory on a separate drive.
169 target = symlink_target
170 else:
171 # The symlink itself could be using the wrong path case.
172 target = file_path.fix_native_path_case(target, symlink_target)
173
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500174 if not fs.exists(target):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400175 raise MappingError(
176 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
177 target = file_path.get_native_path_case(target)
178 if not file_path.path_starts_with(indir, target):
179 done = symlink_path
180 todo = post_symlink
181 continue
182 if file_path.path_starts_with(target, symlink_path):
183 raise MappingError(
184 'Can\'t map recursive symlink reference %s -> %s' %
185 (symlink_path, target))
186 logging.info('Found symlink: %s -> %s', symlink_path, target)
187 symlinks.append(os.path.relpath(symlink_path, indir))
188 # Treat the common prefix of the old and new paths as done, and start
189 # scanning again.
190 target = target.split(os.path.sep)
191 symlink_path = symlink_path.split(os.path.sep)
192 prefix_length = 0
193 for target_piece, symlink_path_piece in zip(target, symlink_path):
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000194 if target_piece != symlink_path_piece:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400195 break
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000196 prefix_length += 1
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400197 done = os.path.sep.join(target[:prefix_length])
198 todo = os.path.join(
199 os.path.sep.join(target[prefix_length:]), post_symlink)
200
201 relfile = os.path.relpath(done, indir)
202 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
203 return relfile, symlinks
204
205
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700206@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400207def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
208 """Expands a single input. It can result in multiple outputs.
209
210 This function is recursive when relfile is a directory.
211
212 Note: this code doesn't properly handle recursive symlink like one created
213 with:
214 ln -s .. foo
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000215
216 Yields:
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000217 tuple(Relative path, bool is_symlink) to files and symlinks inside |indir|.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400218 """
219 if os.path.isabs(relfile):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500220 raise MappingError(u'Can\'t map absolute path %s' % relfile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400221
222 infile = file_path.normpath(os.path.join(indir, relfile))
223 if not infile.startswith(indir):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500224 raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400225
226 filepath = os.path.join(indir, relfile)
227 native_filepath = file_path.get_native_path_case(filepath)
228 if filepath != native_filepath:
229 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500230 if filepath != native_filepath + u'.' + os.path.sep:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400231 # While it'd be nice to enforce path casing on Windows, it's impractical.
232 # Also give up enforcing strict path case on OSX. Really, it's that sad.
233 # The case where it happens is very specific and hard to reproduce:
234 # get_native_path_case(
235 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
236 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
237 #
238 # Note that this is really something deep in OSX because running
239 # ls Foo.framework/Versions/A
240 # will print out 'Resources', while file_path.get_native_path_case()
241 # returns a lower case 'r'.
242 #
243 # So *something* is happening under the hood resulting in the command 'ls'
244 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
245 # have no idea why.
246 if sys.platform not in ('darwin', 'win32'):
247 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500248 u'File path doesn\'t equal native file path\n%s != %s' %
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400249 (filepath, native_filepath))
250
251 symlinks = []
252 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500253 try:
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000254 relfile, symlinks = _expand_symlinks(indir, relfile)
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500255 except OSError:
256 # The file doesn't exist, it will throw below.
257 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400258
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000259 # The symlinks need to be mapped in.
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000260 for s in symlinks:
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000261 yield s, True
262
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400263 if relfile.endswith(os.path.sep):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500264 if not fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400265 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500266 u'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400267
268 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500269 if relfile.startswith(u'.' + os.path.sep):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400270 relfile = relfile[2:]
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400271 try:
maruel12e30012015-10-09 11:55:35 -0700272 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400273 inner_relfile = os.path.join(relfile, filename)
274 if blacklist and blacklist(inner_relfile):
275 continue
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500276 if fs.isdir(os.path.join(indir, inner_relfile)):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400277 inner_relfile += os.path.sep
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000278 # Apply recursively.
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000279 for i, is_symlink in expand_directory_and_symlink(
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000280 indir, inner_relfile, blacklist, follow_symlinks):
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000281 yield i, is_symlink
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400282 except OSError as e:
283 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500284 u'Unable to iterate over directory %s.\n%s' % (infile, e))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400285 else:
286 # Always add individual files even if they were blacklisted.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500287 if fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400288 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500289 u'Input directory %s must have a trailing slash' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400290
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500291 if not fs.isfile(infile):
292 raise MappingError(u'Input file %s doesn\'t exist' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400293
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000294 yield relfile, False
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400295
296
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700297@tools.profile
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000298def file_to_metadata(filepath, read_only, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400299 """Processes an input file, a dependency, and return meta data about it.
300
301 Behaviors:
302 - Retrieves the file mode, file size, file timestamp, file link
303 destination if it is a file link and calcultate the SHA-1 of the file's
304 content if the path points to a file and not a symlink.
305
306 Arguments:
307 filepath: File to act on.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400308 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
309 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
310 windows, mode is not set since all files are 'executable' by
311 default.
kjlubick80596f02017-04-28 08:13:19 -0700312 collapse_symlinks: True if symlinked files should be treated like they were
313 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400314
315 Returns:
316 The necessary dict to create a entry in the 'files' section of an .isolated
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000317 file *except* 'h' for files.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400318 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500319 # TODO(maruel): None is not a valid value.
320 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400321 out = {}
Marc-Antoine Ruel13554fd2018-12-04 18:01:05 +0000322 # Always check the file stat and check if it is a link.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400323 try:
kjlubick80596f02017-04-28 08:13:19 -0700324 if collapse_symlinks:
325 # os.stat follows symbolic links
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500326 filestats = fs.stat(filepath)
kjlubick80596f02017-04-28 08:13:19 -0700327 else:
328 # os.lstat does not follow symbolic links, and thus preserves them.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500329 filestats = fs.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400330 except OSError:
331 # The file is not present.
332 raise MappingError('%s is missing' % filepath)
333 is_link = stat.S_ISLNK(filestats.st_mode)
334
335 if sys.platform != 'win32':
336 # Ignore file mode on Windows since it's not really useful there.
337 filemode = stat.S_IMODE(filestats.st_mode)
338 # Remove write access for group and all access to 'others'.
339 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
340 if read_only:
341 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500342 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
343 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400344 filemode |= stat.S_IXGRP
345 else:
346 filemode &= ~stat.S_IXGRP
347 if not is_link:
348 out['m'] = filemode
349
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400350 if not is_link:
351 out['s'] = filestats.st_size
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400352 else:
Marc-Antoine Ruel13554fd2018-12-04 18:01:05 +0000353 # The link could be in an incorrect path case. In practice, this only
354 # happens on macOS on case insensitive HFS.
355 # TODO(maruel): It'd be better if it was only done once, in
356 # expand_directory_and_symlink(), so it would not be necessary to do again
357 # here.
358 symlink_value = fs.readlink(filepath) # pylint: disable=no-member
359 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
360 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
361 out['l'] = os.path.relpath(native_dest, filedir)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400362 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400363
364
365def save_isolated(isolated, data):
366 """Writes one or multiple .isolated files.
367
368 Note: this reference implementation does not create child .isolated file so it
369 always returns an empty list.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400370 """
371 # Make sure the data is valid .isolated data by 'reloading' it.
372 algo = SUPPORTED_ALGOS[data['algo']]
373 load_isolated(json.dumps(data), algo)
374 tools.write_json(isolated, data, True)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400375
376
marueldf6e95e2016-02-26 19:05:38 -0800377def split_path(path):
378 """Splits a path and return a list with each element."""
379 out = []
380 while path:
381 path, rest = os.path.split(path)
382 if rest:
383 out.append(rest)
384 return out
385
386
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400387def load_isolated(content, algo):
388 """Verifies the .isolated file is valid and loads this object with the json
389 data.
390
391 Arguments:
392 - content: raw serialized content to load.
393 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
394 algorithm used on the Isolate Server.
395 """
Marc-Antoine Ruel5da404c2017-10-31 10:46:37 -0400396 if not algo:
397 raise IsolatedError('\'algo\' is required')
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400398 try:
399 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700400 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400401 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700402 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400403
404 if not isinstance(data, dict):
405 raise IsolatedError('Expected dict, got %r' % data)
406
407 # Check 'version' first, since it could modify the parsing after.
408 value = data.get('version', '1.0')
409 if not isinstance(value, basestring):
410 raise IsolatedError('Expected string, got %r' % value)
411 try:
412 version = tuple(map(int, value.split('.')))
413 except ValueError:
414 raise IsolatedError('Expected valid version, got %r' % value)
415
416 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
417 # Major version must match.
418 if version[0] != expected_version[0]:
419 raise IsolatedError(
420 'Expected compatible \'%s\' version, got %r' %
421 (ISOLATED_FILE_VERSION, value))
422
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400423 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
424
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400425 for key, value in data.iteritems():
426 if key == 'algo':
427 if not isinstance(value, basestring):
428 raise IsolatedError('Expected string, got %r' % value)
429 if value not in SUPPORTED_ALGOS:
430 raise IsolatedError(
431 'Expected one of \'%s\', got %r' %
432 (', '.join(sorted(SUPPORTED_ALGOS)), value))
433 if value != SUPPORTED_ALGOS_REVERSE[algo]:
434 raise IsolatedError(
435 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
436
437 elif key == 'command':
438 if not isinstance(value, list):
439 raise IsolatedError('Expected list, got %r' % value)
440 if not value:
441 raise IsolatedError('Expected non-empty command')
442 for subvalue in value:
443 if not isinstance(subvalue, basestring):
444 raise IsolatedError('Expected string, got %r' % subvalue)
445
446 elif key == 'files':
447 if not isinstance(value, dict):
448 raise IsolatedError('Expected dict, got %r' % value)
449 for subkey, subvalue in value.iteritems():
450 if not isinstance(subkey, basestring):
451 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800452 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
453 # Disallow '\\\\', it could UNC on Windows but disallow this
454 # everywhere.
455 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
456 if subkey.endswith(('/', '\\')):
457 raise IsolatedError(
458 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
459 if '..' in split_path(subkey):
460 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400461 if not isinstance(subvalue, dict):
462 raise IsolatedError('Expected dict, got %r' % subvalue)
463 for subsubkey, subsubvalue in subvalue.iteritems():
464 if subsubkey == 'l':
465 if not isinstance(subsubvalue, basestring):
466 raise IsolatedError('Expected string, got %r' % subsubvalue)
467 elif subsubkey == 'm':
468 if not isinstance(subsubvalue, int):
469 raise IsolatedError('Expected int, got %r' % subsubvalue)
470 elif subsubkey == 'h':
471 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400472 raise IsolatedError('Expected %s, got %r' %
473 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400474 elif subsubkey == 's':
475 if not isinstance(subsubvalue, (int, long)):
476 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700477 elif subsubkey == 't':
478 if subsubvalue not in SUPPORTED_FILE_TYPES:
479 raise IsolatedError('Expected one of \'%s\', got %r' % (
480 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400481 else:
482 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
483 if bool('h' in subvalue) == bool('l' in subvalue):
484 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400485 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
486 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400487 if bool('h' in subvalue) != bool('s' in subvalue):
488 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400489 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
490 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400491 if bool('s' in subvalue) == bool('l' in subvalue):
492 raise IsolatedError(
493 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
494 subvalue)
495 if bool('l' in subvalue) and bool('m' in subvalue):
496 raise IsolatedError(
497 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
498 subvalue)
499
500 elif key == 'includes':
501 if not isinstance(value, list):
502 raise IsolatedError('Expected list, got %r' % value)
503 if not value:
504 raise IsolatedError('Expected non-empty includes list')
505 for subvalue in value:
506 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400507 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400508
509 elif key == 'os':
510 if version >= (1, 4):
511 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
512
513 elif key == 'read_only':
514 if not value in (0, 1, 2):
515 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
516
517 elif key == 'relative_cwd':
518 if not isinstance(value, basestring):
519 raise IsolatedError('Expected string, got %r' % value)
520
521 elif key == 'version':
522 # Already checked above.
523 pass
524
525 else:
526 raise IsolatedError('Unknown key %r' % key)
527
528 # Automatically fix os.path.sep if necessary. While .isolated files are always
Marc-Antoine Ruelf674a582018-01-12 10:56:01 -0500529 # in the native path format, someone could want to download an .isolated tree
530 # from another OS.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400531 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
532 if 'files' in data:
533 data['files'] = dict(
534 (k.replace(wrong_path_sep, os.path.sep), v)
535 for k, v in data['files'].iteritems())
536 for v in data['files'].itervalues():
537 if 'l' in v:
538 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
539 if 'relative_cwd' in data:
540 data['relative_cwd'] = data['relative_cwd'].replace(
541 wrong_path_sep, os.path.sep)
542 return data