blob: 0888e263445fa0e9f21d919a786fdb26e44816c5 [file] [log] [blame]
maruelea586f32016-04-05 11:11:33 -07001# Copyright 2014 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07002# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04004
5"""Understands .isolated files and can do local operations on them."""
6
7import hashlib
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04008import json
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04009import logging
10import os
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040011import re
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040012import stat
13import sys
14
15from utils import file_path
maruel12e30012015-10-09 11:55:35 -070016from utils import fs
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040017from utils import tools
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040018
19
20# Version stored and expected in .isolated files.
tansell26de79e2016-11-13 18:41:11 -080021ISOLATED_FILE_VERSION = '1.6'
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040022
23
24# Chunk size to use when doing disk I/O.
25DISK_FILE_CHUNK = 1024 * 1024
26
27
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040028# Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040029# specify the names here.
30SUPPORTED_ALGOS = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040031 'sha-1': hashlib.sha1,
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040032 'sha-256': hashlib.sha256,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040033 'sha-512': hashlib.sha512,
34}
35
36
37# Used for serialization.
38SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
39
Marc-Antoine Ruel7dafa772017-09-12 19:25:59 -040040
41SUPPORTED_FILE_TYPES = ['basic', 'tar']
tanselle4288c32016-07-28 09:45:40 -070042
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040043
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040044class IsolatedError(ValueError):
45 """Generic failure to load a .isolated file."""
46 pass
47
48
49class MappingError(OSError):
50 """Failed to recreate the tree."""
51 pass
52
53
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040054def is_valid_hash(value, algo):
55 """Returns if the value is a valid hash for the corresponding algorithm."""
56 size = 2 * algo().digest_size
57 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
58
59
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040060def hash_file(filepath, algo):
61 """Calculates the hash of a file without reading it all in memory at once.
62
63 |algo| should be one of hashlib hashing algorithm.
64 """
65 digest = algo()
maruel12e30012015-10-09 11:55:35 -070066 with fs.open(filepath, 'rb') as f:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040067 while True:
68 chunk = f.read(DISK_FILE_CHUNK)
69 if not chunk:
70 break
71 digest.update(chunk)
72 return digest.hexdigest()
Marc-Antoine Ruel92257792014-08-28 20:51:08 -040073
74
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040075class IsolatedFile(object):
76 """Represents a single parsed .isolated file."""
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070077
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040078 def __init__(self, obj_hash, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -040079 """|obj_hash| is really the hash of the file."""
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040080 self.obj_hash = obj_hash
81 self.algo = algo
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040082
83 # Raw data.
84 self.data = {}
85 # A IsolatedFile instance, one per object in self.includes.
86 self.children = []
87
88 # Set once the .isolated file is loaded.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070089 self._is_loaded = False
90
91 def __repr__(self):
92 return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040093
94 def load(self, content):
95 """Verifies the .isolated file is valid and loads this object with the json
96 data.
97 """
98 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -070099 assert not self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400100 self.data = load_isolated(content, self.algo)
101 self.children = [
102 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
103 ]
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700104 self._is_loaded = True
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400105
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700106 @property
107 def is_loaded(self):
108 """Returns True if 'load' was already called."""
109 return self._is_loaded
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400110
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400111
Vadim Shtayura7f7459c2014-09-04 13:25:10 -0700112def walk_includes(isolated):
113 """Walks IsolatedFile include graph and yields IsolatedFile objects.
114
115 Visits root node first, then recursively all children, left to right.
116 Not yet loaded nodes are considered childless.
117 """
118 yield isolated
119 for child in isolated.children:
120 for x in walk_includes(child):
121 yield x
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400122
123
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700124@tools.profile
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000125def _expand_symlinks(indir, relfile):
126 """Finds symlinks in relfile.
127
128 Follows symlinks in |relfile|, but treating symlinks that point outside the
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400129 build tree as if they were ordinary directories/files. Returns the final
130 symlink-free target and a list of paths to symlinks encountered in the
131 process.
132
133 The rule about symlinks outside the build tree is for the benefit of the
134 Chromium OS ebuild, which symlinks the output directory to an unrelated path
135 in the chroot.
136
137 Fails when a directory loop is detected, although in theory we could support
138 that case.
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000139
140 Arguments:
141 - indir: base directory; symlinks in indir are not processed; this is
142 the base directory that is considered 'outside of the tree'.
143 - relfile: part of the path to expand symlink.
144
145 Returns:
146 tuple(relfile, list(symlinks)): relfile is real path of relfile where all
147 symlinks were evaluated. symlinks if the chain of symlinks found along the
148 way, if any.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400149 """
150 is_directory = relfile.endswith(os.path.sep)
151 done = indir
152 todo = relfile.strip(os.path.sep)
153 symlinks = []
154
155 while todo:
Vadim Shtayura56c17562014-10-07 17:13:34 -0700156 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400157 if not symlink:
158 todo = file_path.fix_native_path_case(done, todo)
159 done = os.path.join(done, todo)
160 break
161 symlink_path = os.path.join(done, pre_symlink, symlink)
162 post_symlink = post_symlink.lstrip(os.path.sep)
163 # readlink doesn't exist on Windows.
164 # pylint: disable=E1101
165 target = os.path.normpath(os.path.join(done, pre_symlink))
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500166 symlink_target = fs.readlink(symlink_path)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400167 if os.path.isabs(symlink_target):
168 # Absolute path are considered a normal directories. The use case is
169 # generally someone who puts the output directory on a separate drive.
170 target = symlink_target
171 else:
172 # The symlink itself could be using the wrong path case.
173 target = file_path.fix_native_path_case(target, symlink_target)
174
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500175 if not fs.exists(target):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400176 raise MappingError(
177 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
178 target = file_path.get_native_path_case(target)
179 if not file_path.path_starts_with(indir, target):
180 done = symlink_path
181 todo = post_symlink
182 continue
183 if file_path.path_starts_with(target, symlink_path):
184 raise MappingError(
185 'Can\'t map recursive symlink reference %s -> %s' %
186 (symlink_path, target))
187 logging.info('Found symlink: %s -> %s', symlink_path, target)
188 symlinks.append(os.path.relpath(symlink_path, indir))
189 # Treat the common prefix of the old and new paths as done, and start
190 # scanning again.
191 target = target.split(os.path.sep)
192 symlink_path = symlink_path.split(os.path.sep)
193 prefix_length = 0
194 for target_piece, symlink_path_piece in zip(target, symlink_path):
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000195 if target_piece != symlink_path_piece:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400196 break
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000197 prefix_length += 1
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400198 done = os.path.sep.join(target[:prefix_length])
199 todo = os.path.join(
200 os.path.sep.join(target[prefix_length:]), post_symlink)
201
202 relfile = os.path.relpath(done, indir)
203 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
204 return relfile, symlinks
205
206
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700207@tools.profile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400208def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
209 """Expands a single input. It can result in multiple outputs.
210
211 This function is recursive when relfile is a directory.
212
213 Note: this code doesn't properly handle recursive symlink like one created
214 with:
215 ln -s .. foo
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000216
217 Yields:
218 Relative file paths inside the directory.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400219 """
220 if os.path.isabs(relfile):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500221 raise MappingError(u'Can\'t map absolute path %s' % relfile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400222
223 infile = file_path.normpath(os.path.join(indir, relfile))
224 if not infile.startswith(indir):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500225 raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400226
227 filepath = os.path.join(indir, relfile)
228 native_filepath = file_path.get_native_path_case(filepath)
229 if filepath != native_filepath:
230 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500231 if filepath != native_filepath + u'.' + os.path.sep:
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400232 # While it'd be nice to enforce path casing on Windows, it's impractical.
233 # Also give up enforcing strict path case on OSX. Really, it's that sad.
234 # The case where it happens is very specific and hard to reproduce:
235 # get_native_path_case(
236 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
237 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
238 #
239 # Note that this is really something deep in OSX because running
240 # ls Foo.framework/Versions/A
241 # will print out 'Resources', while file_path.get_native_path_case()
242 # returns a lower case 'r'.
243 #
244 # So *something* is happening under the hood resulting in the command 'ls'
245 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
246 # have no idea why.
247 if sys.platform not in ('darwin', 'win32'):
248 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500249 u'File path doesn\'t equal native file path\n%s != %s' %
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400250 (filepath, native_filepath))
251
252 symlinks = []
253 if follow_symlinks:
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500254 try:
Marc-Antoine Rueldcff6462018-12-04 16:35:18 +0000255 relfile, symlinks = _expand_symlinks(indir, relfile)
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500256 except OSError:
257 # The file doesn't exist, it will throw below.
258 pass
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400259
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000260 for s in symlinks:
261 yield s
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400262 if relfile.endswith(os.path.sep):
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500263 if not fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400264 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500265 u'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400266
267 # Special case './'.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500268 if relfile.startswith(u'.' + os.path.sep):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400269 relfile = relfile[2:]
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400270 try:
maruel12e30012015-10-09 11:55:35 -0700271 for filename in fs.listdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400272 inner_relfile = os.path.join(relfile, filename)
273 if blacklist and blacklist(inner_relfile):
274 continue
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500275 if fs.isdir(os.path.join(indir, inner_relfile)):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400276 inner_relfile += os.path.sep
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000277 # Apply recursively.
278 for i in expand_directory_and_symlink(
279 indir, inner_relfile, blacklist, follow_symlinks):
280 yield i
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400281 except OSError as e:
282 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500283 u'Unable to iterate over directory %s.\n%s' % (infile, e))
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400284 else:
285 # Always add individual files even if they were blacklisted.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500286 if fs.isdir(infile):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400287 raise MappingError(
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500288 u'Input directory %s must have a trailing slash' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400289
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500290 if not fs.isfile(infile):
291 raise MappingError(u'Input file %s doesn\'t exist' % infile)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400292
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000293 yield relfile
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400294
295
296def expand_directories_and_symlinks(
297 indir, infiles, blacklist, follow_symlinks, ignore_broken_items):
298 """Expands the directories and the symlinks, applies the blacklist and
299 verifies files exist.
300
301 Files are specified in os native path separator.
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000302
303 Yields:
304 Relative file path of each file inside every directory specified.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400305 """
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400306 for relfile in infiles:
307 try:
Marc-Antoine Ruelcc802b02018-11-28 21:05:01 +0000308 for i in expand_directory_and_symlink(
309 indir, relfile, blacklist, follow_symlinks):
310 yield i
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400311 except MappingError as e:
312 if not ignore_broken_items:
313 raise
314 logging.info('warning: %s', e)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400315
316
Vadim Shtayurac28b74f2014-10-06 20:00:08 -0700317@tools.profile
kjlubick80596f02017-04-28 08:13:19 -0700318def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400319 """Processes an input file, a dependency, and return meta data about it.
320
321 Behaviors:
322 - Retrieves the file mode, file size, file timestamp, file link
323 destination if it is a file link and calcultate the SHA-1 of the file's
324 content if the path points to a file and not a symlink.
325
326 Arguments:
327 filepath: File to act on.
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400328 prevdict: the previous dictionary. It is used to retrieve the cached hash
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400329 to skip recalculating the hash. Optional.
330 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
331 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
332 windows, mode is not set since all files are 'executable' by
333 default.
334 algo: Hashing algorithm used.
kjlubick80596f02017-04-28 08:13:19 -0700335 collapse_symlinks: True if symlinked files should be treated like they were
336 the normal underlying file.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400337
338 Returns:
339 The necessary dict to create a entry in the 'files' section of an .isolated
340 file.
341 """
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500342 # TODO(maruel): None is not a valid value.
343 assert read_only in (None, 0, 1, 2), read_only
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400344 out = {}
345 # Always check the file stat and check if it is a link. The timestamp is used
346 # to know if the file's content/symlink destination should be looked into.
347 # E.g. only reuse from prevdict if the timestamp hasn't changed.
348 # There is the risk of the file's timestamp being reset to its last value
349 # manually while its content changed. We don't protect against that use case.
350 try:
kjlubick80596f02017-04-28 08:13:19 -0700351 if collapse_symlinks:
352 # os.stat follows symbolic links
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500353 filestats = fs.stat(filepath)
kjlubick80596f02017-04-28 08:13:19 -0700354 else:
355 # os.lstat does not follow symbolic links, and thus preserves them.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500356 filestats = fs.lstat(filepath)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400357 except OSError:
358 # The file is not present.
359 raise MappingError('%s is missing' % filepath)
360 is_link = stat.S_ISLNK(filestats.st_mode)
361
362 if sys.platform != 'win32':
363 # Ignore file mode on Windows since it's not really useful there.
364 filemode = stat.S_IMODE(filestats.st_mode)
365 # Remove write access for group and all access to 'others'.
366 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
367 if read_only:
368 filemode &= ~stat.S_IWUSR
Marc-Antoine Ruela275b292014-11-25 15:17:21 -0500369 if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
370 # Only keep x group bit if both x user bit and group read bit are set.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400371 filemode |= stat.S_IXGRP
372 else:
373 filemode &= ~stat.S_IXGRP
374 if not is_link:
375 out['m'] = filemode
376
377 # Used to skip recalculating the hash or link destination. Use the most recent
378 # update time.
379 out['t'] = int(round(filestats.st_mtime))
380
381 if not is_link:
382 out['s'] = filestats.st_size
383 # If the timestamp wasn't updated and the file size is still the same, carry
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400384 # on the hash.
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400385 if (prevdict.get('t') == out['t'] and
386 prevdict.get('s') == out['s']):
387 # Reuse the previous hash if available.
388 out['h'] = prevdict.get('h')
389 if not out.get('h'):
390 out['h'] = hash_file(filepath, algo)
391 else:
392 # If the timestamp wasn't updated, carry on the link destination.
393 if prevdict.get('t') == out['t']:
394 # Reuse the previous link destination if available.
395 out['l'] = prevdict.get('l')
396 if out.get('l') is None:
397 # The link could be in an incorrect path case. In practice, this only
398 # happen on OSX on case insensitive HFS.
399 # TODO(maruel): It'd be better if it was only done once, in
400 # expand_directory_and_symlink(), so it would not be necessary to do again
401 # here.
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500402 symlink_value = fs.readlink(filepath) # pylint: disable=E1101
Marc-Antoine Ruel92257792014-08-28 20:51:08 -0400403 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
404 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
405 out['l'] = os.path.relpath(native_dest, filedir)
406 return out
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400407
408
409def save_isolated(isolated, data):
410 """Writes one or multiple .isolated files.
411
412 Note: this reference implementation does not create child .isolated file so it
413 always returns an empty list.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400414 """
415 # Make sure the data is valid .isolated data by 'reloading' it.
416 algo = SUPPORTED_ALGOS[data['algo']]
417 load_isolated(json.dumps(data), algo)
418 tools.write_json(isolated, data, True)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400419
420
marueldf6e95e2016-02-26 19:05:38 -0800421def split_path(path):
422 """Splits a path and return a list with each element."""
423 out = []
424 while path:
425 path, rest = os.path.split(path)
426 if rest:
427 out.append(rest)
428 return out
429
430
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400431def load_isolated(content, algo):
432 """Verifies the .isolated file is valid and loads this object with the json
433 data.
434
435 Arguments:
436 - content: raw serialized content to load.
437 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
438 algorithm used on the Isolate Server.
439 """
Marc-Antoine Ruel5da404c2017-10-31 10:46:37 -0400440 if not algo:
441 raise IsolatedError('\'algo\' is required')
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400442 try:
443 data = json.loads(content)
aludwin6b54a6b2017-08-03 18:20:06 -0700444 except ValueError as v:
Adrian Ludwin7dc29dd2017-08-17 23:01:47 -0400445 logging.error('Failed to parse .isolated file:\n%s', content)
aludwin6b54a6b2017-08-03 18:20:06 -0700446 raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100]))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400447
448 if not isinstance(data, dict):
449 raise IsolatedError('Expected dict, got %r' % data)
450
451 # Check 'version' first, since it could modify the parsing after.
452 value = data.get('version', '1.0')
453 if not isinstance(value, basestring):
454 raise IsolatedError('Expected string, got %r' % value)
455 try:
456 version = tuple(map(int, value.split('.')))
457 except ValueError:
458 raise IsolatedError('Expected valid version, got %r' % value)
459
460 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
461 # Major version must match.
462 if version[0] != expected_version[0]:
463 raise IsolatedError(
464 'Expected compatible \'%s\' version, got %r' %
465 (ISOLATED_FILE_VERSION, value))
466
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400467 algo_name = SUPPORTED_ALGOS_REVERSE[algo]
468
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400469 for key, value in data.iteritems():
470 if key == 'algo':
471 if not isinstance(value, basestring):
472 raise IsolatedError('Expected string, got %r' % value)
473 if value not in SUPPORTED_ALGOS:
474 raise IsolatedError(
475 'Expected one of \'%s\', got %r' %
476 (', '.join(sorted(SUPPORTED_ALGOS)), value))
477 if value != SUPPORTED_ALGOS_REVERSE[algo]:
478 raise IsolatedError(
479 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
480
481 elif key == 'command':
482 if not isinstance(value, list):
483 raise IsolatedError('Expected list, got %r' % value)
484 if not value:
485 raise IsolatedError('Expected non-empty command')
486 for subvalue in value:
487 if not isinstance(subvalue, basestring):
488 raise IsolatedError('Expected string, got %r' % subvalue)
489
490 elif key == 'files':
491 if not isinstance(value, dict):
492 raise IsolatedError('Expected dict, got %r' % value)
493 for subkey, subvalue in value.iteritems():
494 if not isinstance(subkey, basestring):
495 raise IsolatedError('Expected string, got %r' % subkey)
marueldf6e95e2016-02-26 19:05:38 -0800496 if os.path.isabs(subkey) or subkey.startswith('\\\\'):
497 # Disallow '\\\\', it could UNC on Windows but disallow this
498 # everywhere.
499 raise IsolatedError('File path can\'t be absolute: %r' % subkey)
500 if subkey.endswith(('/', '\\')):
501 raise IsolatedError(
502 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey))
503 if '..' in split_path(subkey):
504 raise IsolatedError('File path can\'t reference parent: %r' % subkey)
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400505 if not isinstance(subvalue, dict):
506 raise IsolatedError('Expected dict, got %r' % subvalue)
507 for subsubkey, subsubvalue in subvalue.iteritems():
508 if subsubkey == 'l':
509 if not isinstance(subsubvalue, basestring):
510 raise IsolatedError('Expected string, got %r' % subsubvalue)
511 elif subsubkey == 'm':
512 if not isinstance(subsubvalue, int):
513 raise IsolatedError('Expected int, got %r' % subsubvalue)
514 elif subsubkey == 'h':
515 if not is_valid_hash(subsubvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400516 raise IsolatedError('Expected %s, got %r' %
517 (algo_name, subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400518 elif subsubkey == 's':
519 if not isinstance(subsubvalue, (int, long)):
520 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
tanselle4288c32016-07-28 09:45:40 -0700521 elif subsubkey == 't':
522 if subsubvalue not in SUPPORTED_FILE_TYPES:
523 raise IsolatedError('Expected one of \'%s\', got %r' % (
524 ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400525 else:
526 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
527 if bool('h' in subvalue) == bool('l' in subvalue):
528 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400529 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' %
530 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400531 if bool('h' in subvalue) != bool('s' in subvalue):
532 raise IsolatedError(
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400533 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' %
534 (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400535 if bool('s' in subvalue) == bool('l' in subvalue):
536 raise IsolatedError(
537 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
538 subvalue)
539 if bool('l' in subvalue) and bool('m' in subvalue):
540 raise IsolatedError(
541 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
542 subvalue)
543
544 elif key == 'includes':
545 if not isinstance(value, list):
546 raise IsolatedError('Expected list, got %r' % value)
547 if not value:
548 raise IsolatedError('Expected non-empty includes list')
549 for subvalue in value:
550 if not is_valid_hash(subvalue, algo):
Adrian Ludwinb4ebc092017-09-13 07:46:24 -0400551 raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue))
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400552
553 elif key == 'os':
554 if version >= (1, 4):
555 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
556
557 elif key == 'read_only':
558 if not value in (0, 1, 2):
559 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
560
561 elif key == 'relative_cwd':
562 if not isinstance(value, basestring):
563 raise IsolatedError('Expected string, got %r' % value)
564
565 elif key == 'version':
566 # Already checked above.
567 pass
568
569 else:
570 raise IsolatedError('Unknown key %r' % key)
571
572 # Automatically fix os.path.sep if necessary. While .isolated files are always
Marc-Antoine Ruelf674a582018-01-12 10:56:01 -0500573 # in the native path format, someone could want to download an .isolated tree
574 # from another OS.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400575 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
576 if 'files' in data:
577 data['files'] = dict(
578 (k.replace(wrong_path_sep, os.path.sep), v)
579 for k, v in data['files'].iteritems())
580 for v in data['files'].itervalues():
581 if 'l' in v:
582 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
583 if 'relative_cwd' in data:
584 data['relative_cwd'] = data['relative_cwd'].replace(
585 wrong_path_sep, os.path.sep)
586 return data