blob: e27fb19a00da80c57ab71034984288b12c27b185 [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
Takuto Ikuta81de2342019-10-25 11:58:18 +000015import six
16
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040017from utils import file_path
maruel12e30012015-10-09 11:55:35 -070018from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040019from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040020
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040021# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080022ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040023
24
25# Chunk size to use when doing disk I/O.
26DISK_FILE_CHUNK = 1024 * 1024
27
28
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040029# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040030# specify the names here.
31SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040032 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040033 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040034 'sha-512': hashlib.sha512,
35}
36
37
38# Used for serialization.
Lei Leife202df2019-06-11 17:33:34 +000039SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.items())
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040040
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040041
42SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070043
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040044
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040045class IsolatedError(ValueError):
46 """Generic failure to load a .isolated file."""
47 pass
48
49
50class MappingError(OSError):
51 """Failed to recreate the tree."""
52 pass
53
54
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040055def is_valid_hash(value, algo):
56 """Returns if the value is a valid hash for the corresponding algorithm."""
57 size = 2 * algo().digest_size
58 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
59
60
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040061def hash_file(filepath, algo):
62 """Calculates the hash of a file without reading it all in memory at once.
63
64 |algo| should be one of hashlib hashing algorithm.
65 """
66 digest = algo()
maruel12e30012015-10-09 11:55:35 -070067 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040068 while True:
69 chunk = f.read(DISK_FILE_CHUNK)
70 if not chunk:
71 break
72 digest.update(chunk)
73 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040074
75
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040076class IsolatedFile(object):
77 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070078
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040079 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040080 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040081 self.obj_hash = obj_hash
82 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040083
84 # Raw data.
85 self.data = {}
86 # A IsolatedFile instance, one per object in self.includes.
87 self.children = []
88
89 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070090 self._is_loaded = False
91
92 def __repr__(self):
93 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040094
95 def load(self, content):
96 """Verifies the .isolated file is valid and loads this object with the json
97 data.
98 """
99 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700100 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400101 self.data = load_isolated(content, self.algo)
102 self.children = [
103 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
104 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700105 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400106
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700107 @property
108 def is_loaded(self):
109 """Returns True if 'load' was already called."""
110 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400111
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400112
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700113def walk_includes(isolated):
114 """Walks IsolatedFile include graph and yields IsolatedFile objects.
115
116 Visits root node first, then recursively all children, left to right.
117 Not yet loaded nodes are considered childless.
118 """
119 yield isolated
120 for child in isolated.children:
121 for x in walk_includes(child):
122 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400123
124
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700125@tools.profile
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000126def _expand_symlinks(indir, relfile):
127 """Finds symlinks in relfile.
128
129 Follows symlinks in |relfile|, but treating symlinks that point outside the
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400130 build tree as if they were ordinary directories/files. Returns the final
131 symlink-free target and a list of paths to symlinks encountered in the
132 process.
133
134 The rule about symlinks outside the build tree is for the benefit of the
135 Chromium OS ebuild, which symlinks the output directory to an unrelated path
136 in the chroot.
137
138 Fails when a directory loop is detected, although in theory we could support
139 that case.
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000140
141 Arguments:
142 - indir: base directory; symlinks in indir are not processed; this is
143 the base directory that is considered 'outside of the tree'.
144 - relfile: part of the path to expand symlink.
145
146 Returns:
147 tuple(relfile, list(symlinks)): relfile is real path of relfile where all
148 symlinks were evaluated. symlinks if the chain of symlinks found along the
149 way, if any.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400150 """
151 is_directory = relfile.endswith(os.path.sep)
152 done = indir
153 todo = relfile.strip(os.path.sep)
154 symlinks = []
155
156 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700157 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400158 if not symlink:
159 todo = file_path.fix_native_path_case(done, todo)
160 done = os.path.join(done, todo)
161 break
162 symlink_path = os.path.join(done, pre_symlink, symlink)
163 post_symlink = post_symlink.lstrip(os.path.sep)
164 # readlink doesn't exist on Windows.
165 # pylint: disable=E1101
166 target = os.path.normpath(os.path.join(done, pre_symlink))
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500167 symlink_target = fs.readlink(symlink_path)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400168 if os.path.isabs(symlink_target):
169 # Absolute path are considered a normal directories. The use case is
170 # generally someone who puts the output directory on a separate drive.
171 target = symlink_target
172 else:
173 # The symlink itself could be using the wrong path case.
174 target = file_path.fix_native_path_case(target, symlink_target)
175
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500176 if not fs.exists(target):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400177 raise MappingError(
178 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
179 target = file_path.get_native_path_case(target)
180 if not file_path.path_starts_with(indir, target):
181 done = symlink_path
182 todo = post_symlink
183 continue
184 if file_path.path_starts_with(target, symlink_path):
185 raise MappingError(
186 'Can\'t map recursive symlink reference %s -> %s' %
187 (symlink_path, target))
188 logging.info('Found symlink: %s -> %s', symlink_path, target)
189 symlinks.append(os.path.relpath(symlink_path, indir))
190 # Treat the common prefix of the old and new paths as done, and start
191 # scanning again.
192 target = target.split(os.path.sep)
193 symlink_path = symlink_path.split(os.path.sep)
194 prefix_length = 0
195 for target_piece, symlink_path_piece in zip(target, symlink_path):
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000196 if target_piece != symlink_path_piece:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400197 break
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000198 prefix_length += 1
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400199 done = os.path.sep.join(target[:prefix_length])
200 todo = os.path.join(
201 os.path.sep.join(target[prefix_length:]), post_symlink)
202
203 relfile = os.path.relpath(done, indir)
204 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
205 return relfile, symlinks
206
207
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700208@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400209def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
210 """Expands a single input. It can result in multiple outputs.
211
212 This function is recursive when relfile is a directory.
213
214 Note: this code doesn't properly handle recursive symlink like one created
215 with:
216 ln -s .. foo
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000217
218 Yields:
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000219 tuple(Relative path, bool is_symlink) to files and symlinks inside |indir|.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400220 """
221 if os.path.isabs(relfile):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500222 raise MappingError(u'Can\'t map absolute path %s' % relfile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400223
224 infile = file_path.normpath(os.path.join(indir, relfile))
225 if not infile.startswith(indir):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500226 raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400227
228 filepath = os.path.join(indir, relfile)
229 native_filepath = file_path.get_native_path_case(filepath)
230 if filepath != native_filepath:
231 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500232 if filepath != native_filepath + u'.' + os.path.sep:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400233 # While it'd be nice to enforce path casing on Windows, it's impractical.
234 # Also give up enforcing strict path case on OSX. Really, it's that sad.
235 # The case where it happens is very specific and hard to reproduce:
236 # get_native_path_case(
237 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
238 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
239 #
240 # Note that this is really something deep in OSX because running
241 # ls Foo.framework/Versions/A
242 # will print out 'Resources', while file_path.get_native_path_case()
243 # returns a lower case 'r'.
244 #
245 # So *something* is happening under the hood resulting in the command 'ls'
246 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
247 # have no idea why.
248 if sys.platform not in ('darwin', 'win32'):
249 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500250 u'File path doesn\'t equal native file path\n%s != %s' %
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400251 (filepath, native_filepath))
252
253 symlinks = []
254 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500255 try:
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000256 relfile, symlinks = _expand_symlinks(indir, relfile)
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500257 except OSError:
258 # The file doesn't exist, it will throw below.
259 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400260
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000261 # The symlinks need to be mapped in.
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000262 for s in symlinks:
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000263 yield s, True
264
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400265 if relfile.endswith(os.path.sep):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500266 if not fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400267 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500268 u'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400269
270 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500271 if relfile.startswith(u'.' + os.path.sep):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400272 relfile = relfile[2:]
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400273 try:
maruel12e30012015-10-09 11:55:35 -0700274 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400275 inner_relfile = os.path.join(relfile, filename)
276 if blacklist and blacklist(inner_relfile):
277 continue
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500278 if fs.isdir(os.path.join(indir, inner_relfile)):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400279 inner_relfile += os.path.sep
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000280 # Apply recursively.
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000281 for i, is_symlink in expand_directory_and_symlink(
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000282 indir, inner_relfile, blacklist, follow_symlinks):
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000283 yield i, is_symlink
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400284 except OSError as e:
285 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500286 u'Unable to iterate over directory %s.\n%s' % (infile, e))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400287 else:
288 # Always add individual files even if they were blacklisted.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500289 if fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400290 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500291 u'Input directory %s must have a trailing slash' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400292
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500293 if not fs.isfile(infile):
294 raise MappingError(u'Input file %s doesn\'t exist' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400295
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000296 yield relfile, False
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400297
298
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700299@tools.profile
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000300def file_to_metadata(filepath, read_only, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400301 """Processes an input file, a dependency, and return meta data about it.
302
303 Behaviors:
304 - Retrieves the file mode, file size, file timestamp, file link
305 destination if it is a file link and calcultate the SHA-1 of the file's
306 content if the path points to a file and not a symlink.
307
308 Arguments:
309 filepath: File to act on.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400310 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
311 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
312 windows, mode is not set since all files are 'executable' by
313 default.
kjlubick80596f02017-04-28 08:13:19 -0700314 collapse_symlinks: True if symlinked files should be treated like they were
315 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400316
317 Returns:
318 The necessary dict to create a entry in the 'files' section of an .isolated
Marc-Antoine Ruel1b2885d2018-12-04 18:30:33 +0000319 file *except* 'h' for files.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400320 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500321 # TODO(maruel): None is not a valid value.
322 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400323 out = {}
Marc-Antoine Ruel13554fd2018-12-04 18:01:05 +0000324 # Always check the file stat and check if it is a link.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400325 try:
kjlubick80596f02017-04-28 08:13:19 -0700326 if collapse_symlinks:
327 # os.stat follows symbolic links
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500328 filestats = fs.stat(filepath)
kjlubick80596f02017-04-28 08:13:19 -0700329 else:
330 # os.lstat does not follow symbolic links, and thus preserves them.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500331 filestats = fs.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400332 except OSError:
333 # The file is not present.
334 raise MappingError('%s is missing' % filepath)
335 is_link = stat.S_ISLNK(filestats.st_mode)
336
337 if sys.platform != 'win32':
338 # Ignore file mode on Windows since it's not really useful there.
339 filemode = stat.S_IMODE(filestats.st_mode)
340 # Remove write access for group and all access to 'others'.
341 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
342 if read_only:
343 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500344 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
345 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400346 filemode |= stat.S_IXGRP
347 else:
348 filemode &= ~stat.S_IXGRP
349 if not is_link:
350 out['m'] = filemode
351
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400352 if not is_link:
353 out['s'] = filestats.st_size
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400354 else:
Marc-Antoine Ruel13554fd2018-12-04 18:01:05 +0000355 # The link could be in an incorrect path case. In practice, this only
356 # happens on macOS on case insensitive HFS.
357 # TODO(maruel): It'd be better if it was only done once, in
358 # expand_directory_and_symlink(), so it would not be necessary to do again
359 # here.
360 symlink_value = fs.readlink(filepath) # pylint: disable=no-member
361 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
362 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
363 out['l'] = os.path.relpath(native_dest, filedir)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400364 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400365
366
367def save_isolated(isolated, data):
368 """Writes one or multiple .isolated files.
369
370 Note: this reference implementation does not create child .isolated file so it
371 always returns an empty list.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400372 """
373 # Make sure the data is valid .isolated data by 'reloading' it.
374 algo = SUPPORTED_ALGOS[data['algo']]
375 load_isolated(json.dumps(data), algo)
376 tools.write_json(isolated, data, True)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400377
378
marueldf6e95e2016-02-26 19:05:38 -0800379def split_path(path):
380 """Splits a path and return a list with each element."""
381 out = []
382 while path:
383 path, rest = os.path.split(path)
384 if rest:
385 out.append(rest)
386 return out
387
388
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400389def load_isolated(content, algo):
390 """Verifies the .isolated file is valid and loads this object with the json
391 data.
392
393 Arguments:
394 - content: raw serialized content to load.
395 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
396 algorithm used on the Isolate Server.
397 """
Marc-Antoine Ruel5da404c2017-10-31 10:46:37 -0400398 if not algo:
399 raise IsolatedError('\'algo\' is required')
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400400 try:
401 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700402 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400403 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700404 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400405
406 if not isinstance(data, dict):
407 raise IsolatedError('Expected dict, got %r' % data)
408
409 # Check 'version' first, since it could modify the parsing after.
410 value = data.get('version', '1.0')
Takuto Ikuta81de2342019-10-25 11:58:18 +0000411 if not isinstance(value, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400412 raise IsolatedError('Expected string, got %r' % value)
413 try:
414 version = tuple(map(int, value.split('.')))
415 except ValueError:
416 raise IsolatedError('Expected valid version, got %r' % value)
417
418 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
419 # Major version must match.
420 if version[0] != expected_version[0]:
421 raise IsolatedError(
422 'Expected compatible \'%s\' version, got %r' %
423 (ISOLATED_FILE_VERSION, value))
424
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400425 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
426
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000427 for key, value in data.items():
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400428 if key == 'algo':
Takuto Ikuta81de2342019-10-25 11:58:18 +0000429 if not isinstance(value, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400430 raise IsolatedError('Expected string, got %r' % value)
431 if value not in SUPPORTED_ALGOS:
432 raise IsolatedError(
433 'Expected one of \'%s\', got %r' %
434 (', '.join(sorted(SUPPORTED_ALGOS)), value))
435 if value != SUPPORTED_ALGOS_REVERSE[algo]:
436 raise IsolatedError(
437 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
438
439 elif key == 'command':
440 if not isinstance(value, list):
441 raise IsolatedError('Expected list, got %r' % value)
442 if not value:
443 raise IsolatedError('Expected non-empty command')
444 for subvalue in value:
Takuto Ikuta81de2342019-10-25 11:58:18 +0000445 if not isinstance(subvalue, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400446 raise IsolatedError('Expected string, got %r' % subvalue)
447
448 elif key == 'files':
449 if not isinstance(value, dict):
450 raise IsolatedError('Expected dict, got %r' % value)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000451 for subkey, subvalue in value.items():
Takuto Ikuta81de2342019-10-25 11:58:18 +0000452 if not isinstance(subkey, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400453 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800454 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
455 # Disallow '\\\\', it could UNC on Windows but disallow this
456 # everywhere.
457 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
458 if subkey.endswith(('/', '\\')):
459 raise IsolatedError(
460 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
461 if '..' in split_path(subkey):
462 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400463 if not isinstance(subvalue, dict):
464 raise IsolatedError('Expected dict, got %r' % subvalue)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000465 for subsubkey, subsubvalue in subvalue.items():
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400466 if subsubkey == 'l':
Takuto Ikuta81de2342019-10-25 11:58:18 +0000467 if not isinstance(subsubvalue, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400468 raise IsolatedError('Expected string, got %r' % subsubvalue)
469 elif subsubkey == 'm':
470 if not isinstance(subsubvalue, int):
471 raise IsolatedError('Expected int, got %r' % subsubvalue)
472 elif subsubkey == 'h':
473 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400474 raise IsolatedError('Expected %s, got %r' %
475 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400476 elif subsubkey == 's':
Takuto Ikuta81de2342019-10-25 11:58:18 +0000477 if not isinstance(subsubvalue, six.integer_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400478 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700479 elif subsubkey == 't':
480 if subsubvalue not in SUPPORTED_FILE_TYPES:
481 raise IsolatedError('Expected one of \'%s\', got %r' % (
482 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400483 else:
484 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
485 if bool('h' in subvalue) == bool('l' in subvalue):
486 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400487 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
488 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400489 if bool('h' in subvalue) != bool('s' in subvalue):
490 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400491 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
492 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400493 if bool('s' in subvalue) == bool('l' in subvalue):
494 raise IsolatedError(
495 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
496 subvalue)
497 if bool('l' in subvalue) and bool('m' in subvalue):
498 raise IsolatedError(
499 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
500 subvalue)
501
502 elif key == 'includes':
503 if not isinstance(value, list):
504 raise IsolatedError('Expected list, got %r' % value)
505 if not value:
506 raise IsolatedError('Expected non-empty includes list')
507 for subvalue in value:
508 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400509 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400510
511 elif key == 'os':
512 if version >= (1, 4):
513 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
514
515 elif key == 'read_only':
516 if not value in (0, 1, 2):
517 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
518
519 elif key == 'relative_cwd':
Takuto Ikuta81de2342019-10-25 11:58:18 +0000520 if not isinstance(value, six.string_types):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400521 raise IsolatedError('Expected string, got %r' % value)
522
523 elif key == 'version':
524 # Already checked above.
525 pass
526
527 else:
528 raise IsolatedError('Unknown key %r' % key)
529
530 # Automatically fix os.path.sep if necessary. While .isolated files are always
Marc-Antoine Ruelf674a582018-01-12 10:56:01 -0500531 # in the native path format, someone could want to download an .isolated tree
532 # from another OS.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400533 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
534 if 'files' in data:
535 data['files'] = dict(
536 (k.replace(wrong_path_sep, os.path.sep), v)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000537 for k, v in data['files'].items())
538 for v in data['files'].values():
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400539 if 'l' in v:
540 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
541 if 'relative_cwd' in data:
542 data['relative_cwd'] = data['relative_cwd'].replace(
543 wrong_path_sep, os.path.sep)
544 return data