maruel | ea586f3 | 2016-04-05 11:11:33 -0700 | [diff] [blame] | 1 | # Copyright 2014 The LUCI Authors. All rights reserved. |
maruel | f1f5e2a | 2016-05-25 17:10:39 -0700 | [diff] [blame] | 2 | # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 | # that can be found in the LICENSE file. |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 4 | |
| 5 | """Understands .isolated files and can do local operations on them.""" |
| 6 | |
| 7 | import hashlib |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 8 | import json |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 9 | import logging |
| 10 | import os |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 11 | import re |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 12 | import stat |
| 13 | import sys |
| 14 | |
| 15 | from utils import file_path |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 16 | from utils import fs |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 17 | from utils import tools |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 18 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 19 | # Version stored and expected in .isolated files. |
tansell | 26de79e | 2016-11-13 18:41:11 -0800 | [diff] [blame] | 20 | ISOLATED_FILE_VERSION = '1.6' |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 21 | |
| 22 | |
| 23 | # Chunk size to use when doing disk I/O. |
| 24 | DISK_FILE_CHUNK = 1024 * 1024 |
| 25 | |
| 26 | |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 27 | # Sadly, hashlib uses 'shaX' instead of the standard 'sha-X' so explicitly |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 28 | # specify the names here. |
| 29 | SUPPORTED_ALGOS = { |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 30 | 'sha-1': hashlib.sha1, |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 31 | 'sha-256': hashlib.sha256, |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 32 | 'sha-512': hashlib.sha512, |
| 33 | } |
| 34 | |
| 35 | |
| 36 | # Used for serialization. |
Lei Lei | fe202df | 2019-06-11 17:33:34 +0000 | [diff] [blame^] | 37 | SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.items()) |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 38 | |
Marc-Antoine Ruel | 7dafa77 | 2017-09-12 19:25:59 -0400 | [diff] [blame] | 39 | |
| 40 | SUPPORTED_FILE_TYPES = ['basic', 'tar'] |
tansell | e4288c3 | 2016-07-28 09:45:40 -0700 | [diff] [blame] | 41 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 42 | |
Marc-Antoine Ruel | 1e7658c | 2014-08-28 19:46:39 -0400 | [diff] [blame] | 43 | class IsolatedError(ValueError): |
| 44 | """Generic failure to load a .isolated file.""" |
| 45 | pass |
| 46 | |
| 47 | |
| 48 | class MappingError(OSError): |
| 49 | """Failed to recreate the tree.""" |
| 50 | pass |
| 51 | |
| 52 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 53 | def is_valid_hash(value, algo): |
| 54 | """Returns if the value is a valid hash for the corresponding algorithm.""" |
| 55 | size = 2 * algo().digest_size |
| 56 | return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value)) |
| 57 | |
| 58 | |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 59 | def hash_file(filepath, algo): |
| 60 | """Calculates the hash of a file without reading it all in memory at once. |
| 61 | |
| 62 | |algo| should be one of hashlib hashing algorithm. |
| 63 | """ |
| 64 | digest = algo() |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 65 | with fs.open(filepath, 'rb') as f: |
Marc-Antoine Ruel | 8bee66d | 2014-08-28 19:02:07 -0400 | [diff] [blame] | 66 | while True: |
| 67 | chunk = f.read(DISK_FILE_CHUNK) |
| 68 | if not chunk: |
| 69 | break |
| 70 | digest.update(chunk) |
| 71 | return digest.hexdigest() |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 72 | |
| 73 | |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 74 | class IsolatedFile(object): |
| 75 | """Represents a single parsed .isolated file.""" |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 76 | |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 77 | def __init__(self, obj_hash, algo): |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 78 | """|obj_hash| is really the hash of the file.""" |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 79 | self.obj_hash = obj_hash |
| 80 | self.algo = algo |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 81 | |
| 82 | # Raw data. |
| 83 | self.data = {} |
| 84 | # A IsolatedFile instance, one per object in self.includes. |
| 85 | self.children = [] |
| 86 | |
| 87 | # Set once the .isolated file is loaded. |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 88 | self._is_loaded = False |
| 89 | |
| 90 | def __repr__(self): |
| 91 | return 'IsolatedFile(%s, loaded: %s)' % (self.obj_hash, self._is_loaded) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 92 | |
| 93 | def load(self, content): |
| 94 | """Verifies the .isolated file is valid and loads this object with the json |
| 95 | data. |
| 96 | """ |
| 97 | logging.debug('IsolatedFile.load(%s)' % self.obj_hash) |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 98 | assert not self._is_loaded |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 99 | self.data = load_isolated(content, self.algo) |
| 100 | self.children = [ |
| 101 | IsolatedFile(i, self.algo) for i in self.data.get('includes', []) |
| 102 | ] |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 103 | self._is_loaded = True |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 104 | |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 105 | @property |
| 106 | def is_loaded(self): |
| 107 | """Returns True if 'load' was already called.""" |
| 108 | return self._is_loaded |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 109 | |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 110 | |
Vadim Shtayura | 7f7459c | 2014-09-04 13:25:10 -0700 | [diff] [blame] | 111 | def walk_includes(isolated): |
| 112 | """Walks IsolatedFile include graph and yields IsolatedFile objects. |
| 113 | |
| 114 | Visits root node first, then recursively all children, left to right. |
| 115 | Not yet loaded nodes are considered childless. |
| 116 | """ |
| 117 | yield isolated |
| 118 | for child in isolated.children: |
| 119 | for x in walk_includes(child): |
| 120 | yield x |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 121 | |
| 122 | |
Vadim Shtayura | c28b74f | 2014-10-06 20:00:08 -0700 | [diff] [blame] | 123 | @tools.profile |
Marc-Antoine Ruel | dcff646 | 2018-12-04 16:35:18 +0000 | [diff] [blame] | 124 | def _expand_symlinks(indir, relfile): |
| 125 | """Finds symlinks in relfile. |
| 126 | |
| 127 | Follows symlinks in |relfile|, but treating symlinks that point outside the |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 128 | build tree as if they were ordinary directories/files. Returns the final |
| 129 | symlink-free target and a list of paths to symlinks encountered in the |
| 130 | process. |
| 131 | |
| 132 | The rule about symlinks outside the build tree is for the benefit of the |
| 133 | Chromium OS ebuild, which symlinks the output directory to an unrelated path |
| 134 | in the chroot. |
| 135 | |
| 136 | Fails when a directory loop is detected, although in theory we could support |
| 137 | that case. |
Marc-Antoine Ruel | dcff646 | 2018-12-04 16:35:18 +0000 | [diff] [blame] | 138 | |
| 139 | Arguments: |
| 140 | - indir: base directory; symlinks in indir are not processed; this is |
| 141 | the base directory that is considered 'outside of the tree'. |
| 142 | - relfile: part of the path to expand symlink. |
| 143 | |
| 144 | Returns: |
| 145 | tuple(relfile, list(symlinks)): relfile is real path of relfile where all |
| 146 | symlinks were evaluated. symlinks if the chain of symlinks found along the |
| 147 | way, if any. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 148 | """ |
| 149 | is_directory = relfile.endswith(os.path.sep) |
| 150 | done = indir |
| 151 | todo = relfile.strip(os.path.sep) |
| 152 | symlinks = [] |
| 153 | |
| 154 | while todo: |
Vadim Shtayura | 56c1756 | 2014-10-07 17:13:34 -0700 | [diff] [blame] | 155 | pre_symlink, symlink, post_symlink = file_path.split_at_symlink(done, todo) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 156 | if not symlink: |
| 157 | todo = file_path.fix_native_path_case(done, todo) |
| 158 | done = os.path.join(done, todo) |
| 159 | break |
| 160 | symlink_path = os.path.join(done, pre_symlink, symlink) |
| 161 | post_symlink = post_symlink.lstrip(os.path.sep) |
| 162 | # readlink doesn't exist on Windows. |
| 163 | # pylint: disable=E1101 |
| 164 | target = os.path.normpath(os.path.join(done, pre_symlink)) |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 165 | symlink_target = fs.readlink(symlink_path) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 166 | if os.path.isabs(symlink_target): |
| 167 | # Absolute path are considered a normal directories. The use case is |
| 168 | # generally someone who puts the output directory on a separate drive. |
| 169 | target = symlink_target |
| 170 | else: |
| 171 | # The symlink itself could be using the wrong path case. |
| 172 | target = file_path.fix_native_path_case(target, symlink_target) |
| 173 | |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 174 | if not fs.exists(target): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 175 | raise MappingError( |
| 176 | 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target)) |
| 177 | target = file_path.get_native_path_case(target) |
| 178 | if not file_path.path_starts_with(indir, target): |
| 179 | done = symlink_path |
| 180 | todo = post_symlink |
| 181 | continue |
| 182 | if file_path.path_starts_with(target, symlink_path): |
| 183 | raise MappingError( |
| 184 | 'Can\'t map recursive symlink reference %s -> %s' % |
| 185 | (symlink_path, target)) |
| 186 | logging.info('Found symlink: %s -> %s', symlink_path, target) |
| 187 | symlinks.append(os.path.relpath(symlink_path, indir)) |
| 188 | # Treat the common prefix of the old and new paths as done, and start |
| 189 | # scanning again. |
| 190 | target = target.split(os.path.sep) |
| 191 | symlink_path = symlink_path.split(os.path.sep) |
| 192 | prefix_length = 0 |
| 193 | for target_piece, symlink_path_piece in zip(target, symlink_path): |
Marc-Antoine Ruel | dcff646 | 2018-12-04 16:35:18 +0000 | [diff] [blame] | 194 | if target_piece != symlink_path_piece: |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 195 | break |
Marc-Antoine Ruel | dcff646 | 2018-12-04 16:35:18 +0000 | [diff] [blame] | 196 | prefix_length += 1 |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 197 | done = os.path.sep.join(target[:prefix_length]) |
| 198 | todo = os.path.join( |
| 199 | os.path.sep.join(target[prefix_length:]), post_symlink) |
| 200 | |
| 201 | relfile = os.path.relpath(done, indir) |
| 202 | relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep |
| 203 | return relfile, symlinks |
| 204 | |
| 205 | |
Vadim Shtayura | c28b74f | 2014-10-06 20:00:08 -0700 | [diff] [blame] | 206 | @tools.profile |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 207 | def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks): |
| 208 | """Expands a single input. It can result in multiple outputs. |
| 209 | |
| 210 | This function is recursive when relfile is a directory. |
| 211 | |
| 212 | Note: this code doesn't properly handle recursive symlink like one created |
| 213 | with: |
| 214 | ln -s .. foo |
Marc-Antoine Ruel | cc802b0 | 2018-11-28 21:05:01 +0000 | [diff] [blame] | 215 | |
| 216 | Yields: |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 217 | tuple(Relative path, bool is_symlink) to files and symlinks inside |indir|. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 218 | """ |
| 219 | if os.path.isabs(relfile): |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 220 | raise MappingError(u'Can\'t map absolute path %s' % relfile) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 221 | |
| 222 | infile = file_path.normpath(os.path.join(indir, relfile)) |
| 223 | if not infile.startswith(indir): |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 224 | raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir)) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 225 | |
| 226 | filepath = os.path.join(indir, relfile) |
| 227 | native_filepath = file_path.get_native_path_case(filepath) |
| 228 | if filepath != native_filepath: |
| 229 | # Special case './'. |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 230 | if filepath != native_filepath + u'.' + os.path.sep: |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 231 | # While it'd be nice to enforce path casing on Windows, it's impractical. |
| 232 | # Also give up enforcing strict path case on OSX. Really, it's that sad. |
| 233 | # The case where it happens is very specific and hard to reproduce: |
| 234 | # get_native_path_case( |
| 235 | # u'Foo.framework/Versions/A/Resources/Something.nib') will return |
| 236 | # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'. |
| 237 | # |
| 238 | # Note that this is really something deep in OSX because running |
| 239 | # ls Foo.framework/Versions/A |
| 240 | # will print out 'Resources', while file_path.get_native_path_case() |
| 241 | # returns a lower case 'r'. |
| 242 | # |
| 243 | # So *something* is happening under the hood resulting in the command 'ls' |
| 244 | # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We |
| 245 | # have no idea why. |
| 246 | if sys.platform not in ('darwin', 'win32'): |
| 247 | raise MappingError( |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 248 | u'File path doesn\'t equal native file path\n%s != %s' % |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 249 | (filepath, native_filepath)) |
| 250 | |
| 251 | symlinks = [] |
| 252 | if follow_symlinks: |
Marc-Antoine Ruel | a275b29 | 2014-11-25 15:17:21 -0500 | [diff] [blame] | 253 | try: |
Marc-Antoine Ruel | dcff646 | 2018-12-04 16:35:18 +0000 | [diff] [blame] | 254 | relfile, symlinks = _expand_symlinks(indir, relfile) |
Marc-Antoine Ruel | a275b29 | 2014-11-25 15:17:21 -0500 | [diff] [blame] | 255 | except OSError: |
| 256 | # The file doesn't exist, it will throw below. |
| 257 | pass |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 258 | |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 259 | # The symlinks need to be mapped in. |
Marc-Antoine Ruel | cc802b0 | 2018-11-28 21:05:01 +0000 | [diff] [blame] | 260 | for s in symlinks: |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 261 | yield s, True |
| 262 | |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 263 | if relfile.endswith(os.path.sep): |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 264 | if not fs.isdir(infile): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 265 | raise MappingError( |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 266 | u'%s is not a directory but ends with "%s"' % (infile, os.path.sep)) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 267 | |
| 268 | # Special case './'. |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 269 | if relfile.startswith(u'.' + os.path.sep): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 270 | relfile = relfile[2:] |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 271 | try: |
maruel | 12e3001 | 2015-10-09 11:55:35 -0700 | [diff] [blame] | 272 | for filename in fs.listdir(infile): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 273 | inner_relfile = os.path.join(relfile, filename) |
| 274 | if blacklist and blacklist(inner_relfile): |
| 275 | continue |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 276 | if fs.isdir(os.path.join(indir, inner_relfile)): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 277 | inner_relfile += os.path.sep |
Marc-Antoine Ruel | cc802b0 | 2018-11-28 21:05:01 +0000 | [diff] [blame] | 278 | # Apply recursively. |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 279 | for i, is_symlink in expand_directory_and_symlink( |
Marc-Antoine Ruel | cc802b0 | 2018-11-28 21:05:01 +0000 | [diff] [blame] | 280 | indir, inner_relfile, blacklist, follow_symlinks): |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 281 | yield i, is_symlink |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 282 | except OSError as e: |
| 283 | raise MappingError( |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 284 | u'Unable to iterate over directory %s.\n%s' % (infile, e)) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 285 | else: |
| 286 | # Always add individual files even if they were blacklisted. |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 287 | if fs.isdir(infile): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 288 | raise MappingError( |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 289 | u'Input directory %s must have a trailing slash' % infile) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 290 | |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 291 | if not fs.isfile(infile): |
| 292 | raise MappingError(u'Input file %s doesn\'t exist' % infile) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 293 | |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 294 | yield relfile, False |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 295 | |
| 296 | |
Vadim Shtayura | c28b74f | 2014-10-06 20:00:08 -0700 | [diff] [blame] | 297 | @tools.profile |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 298 | def file_to_metadata(filepath, read_only, collapse_symlinks): |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 299 | """Processes an input file, a dependency, and return meta data about it. |
| 300 | |
| 301 | Behaviors: |
| 302 | - Retrieves the file mode, file size, file timestamp, file link |
| 303 | destination if it is a file link and calcultate the SHA-1 of the file's |
| 304 | content if the path points to a file and not a symlink. |
| 305 | |
| 306 | Arguments: |
| 307 | filepath: File to act on. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 308 | read_only: If 1 or 2, the file mode is manipulated. In practice, only save |
| 309 | one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On |
| 310 | windows, mode is not set since all files are 'executable' by |
| 311 | default. |
kjlubick | 80596f0 | 2017-04-28 08:13:19 -0700 | [diff] [blame] | 312 | collapse_symlinks: True if symlinked files should be treated like they were |
| 313 | the normal underlying file. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 314 | |
| 315 | Returns: |
| 316 | The necessary dict to create a entry in the 'files' section of an .isolated |
Marc-Antoine Ruel | 1b2885d | 2018-12-04 18:30:33 +0000 | [diff] [blame] | 317 | file *except* 'h' for files. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 318 | """ |
Marc-Antoine Ruel | f1d827c | 2014-11-24 15:22:25 -0500 | [diff] [blame] | 319 | # TODO(maruel): None is not a valid value. |
| 320 | assert read_only in (None, 0, 1, 2), read_only |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 321 | out = {} |
Marc-Antoine Ruel | 13554fd | 2018-12-04 18:01:05 +0000 | [diff] [blame] | 322 | # Always check the file stat and check if it is a link. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 323 | try: |
kjlubick | 80596f0 | 2017-04-28 08:13:19 -0700 | [diff] [blame] | 324 | if collapse_symlinks: |
| 325 | # os.stat follows symbolic links |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 326 | filestats = fs.stat(filepath) |
kjlubick | 80596f0 | 2017-04-28 08:13:19 -0700 | [diff] [blame] | 327 | else: |
| 328 | # os.lstat does not follow symbolic links, and thus preserves them. |
Marc-Antoine Ruel | 7a68f71 | 2017-12-01 18:45:18 -0500 | [diff] [blame] | 329 | filestats = fs.lstat(filepath) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 330 | except OSError: |
| 331 | # The file is not present. |
| 332 | raise MappingError('%s is missing' % filepath) |
| 333 | is_link = stat.S_ISLNK(filestats.st_mode) |
| 334 | |
| 335 | if sys.platform != 'win32': |
| 336 | # Ignore file mode on Windows since it's not really useful there. |
| 337 | filemode = stat.S_IMODE(filestats.st_mode) |
| 338 | # Remove write access for group and all access to 'others'. |
| 339 | filemode &= ~(stat.S_IWGRP | stat.S_IRWXO) |
| 340 | if read_only: |
| 341 | filemode &= ~stat.S_IWUSR |
Marc-Antoine Ruel | a275b29 | 2014-11-25 15:17:21 -0500 | [diff] [blame] | 342 | if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP): |
| 343 | # Only keep x group bit if both x user bit and group read bit are set. |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 344 | filemode |= stat.S_IXGRP |
| 345 | else: |
| 346 | filemode &= ~stat.S_IXGRP |
| 347 | if not is_link: |
| 348 | out['m'] = filemode |
| 349 | |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 350 | if not is_link: |
| 351 | out['s'] = filestats.st_size |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 352 | else: |
Marc-Antoine Ruel | 13554fd | 2018-12-04 18:01:05 +0000 | [diff] [blame] | 353 | # The link could be in an incorrect path case. In practice, this only |
| 354 | # happens on macOS on case insensitive HFS. |
| 355 | # TODO(maruel): It'd be better if it was only done once, in |
| 356 | # expand_directory_and_symlink(), so it would not be necessary to do again |
| 357 | # here. |
| 358 | symlink_value = fs.readlink(filepath) # pylint: disable=no-member |
| 359 | filedir = file_path.get_native_path_case(os.path.dirname(filepath)) |
| 360 | native_dest = file_path.fix_native_path_case(filedir, symlink_value) |
| 361 | out['l'] = os.path.relpath(native_dest, filedir) |
Marc-Antoine Ruel | 9225779 | 2014-08-28 20:51:08 -0400 | [diff] [blame] | 362 | return out |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 363 | |
| 364 | |
| 365 | def save_isolated(isolated, data): |
| 366 | """Writes one or multiple .isolated files. |
| 367 | |
| 368 | Note: this reference implementation does not create child .isolated file so it |
| 369 | always returns an empty list. |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 370 | """ |
| 371 | # Make sure the data is valid .isolated data by 'reloading' it. |
| 372 | algo = SUPPORTED_ALGOS[data['algo']] |
| 373 | load_isolated(json.dumps(data), algo) |
| 374 | tools.write_json(isolated, data, True) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 375 | |
| 376 | |
maruel | df6e95e | 2016-02-26 19:05:38 -0800 | [diff] [blame] | 377 | def split_path(path): |
| 378 | """Splits a path and return a list with each element.""" |
| 379 | out = [] |
| 380 | while path: |
| 381 | path, rest = os.path.split(path) |
| 382 | if rest: |
| 383 | out.append(rest) |
| 384 | return out |
| 385 | |
| 386 | |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 387 | def load_isolated(content, algo): |
| 388 | """Verifies the .isolated file is valid and loads this object with the json |
| 389 | data. |
| 390 | |
| 391 | Arguments: |
| 392 | - content: raw serialized content to load. |
| 393 | - algo: hashlib algorithm class. Used to confirm the algorithm matches the |
| 394 | algorithm used on the Isolate Server. |
| 395 | """ |
Marc-Antoine Ruel | 5da404c | 2017-10-31 10:46:37 -0400 | [diff] [blame] | 396 | if not algo: |
| 397 | raise IsolatedError('\'algo\' is required') |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 398 | try: |
| 399 | data = json.loads(content) |
aludwin | 6b54a6b | 2017-08-03 18:20:06 -0700 | [diff] [blame] | 400 | except ValueError as v: |
Adrian Ludwin | 7dc29dd | 2017-08-17 23:01:47 -0400 | [diff] [blame] | 401 | logging.error('Failed to parse .isolated file:\n%s', content) |
aludwin | 6b54a6b | 2017-08-03 18:20:06 -0700 | [diff] [blame] | 402 | raise IsolatedError('Failed to parse (%s): %s...' % (v, content[:100])) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 403 | |
| 404 | if not isinstance(data, dict): |
| 405 | raise IsolatedError('Expected dict, got %r' % data) |
| 406 | |
| 407 | # Check 'version' first, since it could modify the parsing after. |
| 408 | value = data.get('version', '1.0') |
| 409 | if not isinstance(value, basestring): |
| 410 | raise IsolatedError('Expected string, got %r' % value) |
| 411 | try: |
| 412 | version = tuple(map(int, value.split('.'))) |
| 413 | except ValueError: |
| 414 | raise IsolatedError('Expected valid version, got %r' % value) |
| 415 | |
| 416 | expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.'))) |
| 417 | # Major version must match. |
| 418 | if version[0] != expected_version[0]: |
| 419 | raise IsolatedError( |
| 420 | 'Expected compatible \'%s\' version, got %r' % |
| 421 | (ISOLATED_FILE_VERSION, value)) |
| 422 | |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 423 | algo_name = SUPPORTED_ALGOS_REVERSE[algo] |
| 424 | |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 425 | for key, value in data.iteritems(): |
| 426 | if key == 'algo': |
| 427 | if not isinstance(value, basestring): |
| 428 | raise IsolatedError('Expected string, got %r' % value) |
| 429 | if value not in SUPPORTED_ALGOS: |
| 430 | raise IsolatedError( |
| 431 | 'Expected one of \'%s\', got %r' % |
| 432 | (', '.join(sorted(SUPPORTED_ALGOS)), value)) |
| 433 | if value != SUPPORTED_ALGOS_REVERSE[algo]: |
| 434 | raise IsolatedError( |
| 435 | 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value)) |
| 436 | |
| 437 | elif key == 'command': |
| 438 | if not isinstance(value, list): |
| 439 | raise IsolatedError('Expected list, got %r' % value) |
| 440 | if not value: |
| 441 | raise IsolatedError('Expected non-empty command') |
| 442 | for subvalue in value: |
| 443 | if not isinstance(subvalue, basestring): |
| 444 | raise IsolatedError('Expected string, got %r' % subvalue) |
| 445 | |
| 446 | elif key == 'files': |
| 447 | if not isinstance(value, dict): |
| 448 | raise IsolatedError('Expected dict, got %r' % value) |
| 449 | for subkey, subvalue in value.iteritems(): |
| 450 | if not isinstance(subkey, basestring): |
| 451 | raise IsolatedError('Expected string, got %r' % subkey) |
maruel | df6e95e | 2016-02-26 19:05:38 -0800 | [diff] [blame] | 452 | if os.path.isabs(subkey) or subkey.startswith('\\\\'): |
| 453 | # Disallow '\\\\', it could UNC on Windows but disallow this |
| 454 | # everywhere. |
| 455 | raise IsolatedError('File path can\'t be absolute: %r' % subkey) |
| 456 | if subkey.endswith(('/', '\\')): |
| 457 | raise IsolatedError( |
| 458 | 'File path can\'t end with \'%s\': %r' % (subkey[-1], subkey)) |
| 459 | if '..' in split_path(subkey): |
| 460 | raise IsolatedError('File path can\'t reference parent: %r' % subkey) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 461 | if not isinstance(subvalue, dict): |
| 462 | raise IsolatedError('Expected dict, got %r' % subvalue) |
| 463 | for subsubkey, subsubvalue in subvalue.iteritems(): |
| 464 | if subsubkey == 'l': |
| 465 | if not isinstance(subsubvalue, basestring): |
| 466 | raise IsolatedError('Expected string, got %r' % subsubvalue) |
| 467 | elif subsubkey == 'm': |
| 468 | if not isinstance(subsubvalue, int): |
| 469 | raise IsolatedError('Expected int, got %r' % subsubvalue) |
| 470 | elif subsubkey == 'h': |
| 471 | if not is_valid_hash(subsubvalue, algo): |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 472 | raise IsolatedError('Expected %s, got %r' % |
| 473 | (algo_name, subsubvalue)) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 474 | elif subsubkey == 's': |
| 475 | if not isinstance(subsubvalue, (int, long)): |
| 476 | raise IsolatedError('Expected int or long, got %r' % subsubvalue) |
tansell | e4288c3 | 2016-07-28 09:45:40 -0700 | [diff] [blame] | 477 | elif subsubkey == 't': |
| 478 | if subsubvalue not in SUPPORTED_FILE_TYPES: |
| 479 | raise IsolatedError('Expected one of \'%s\', got %r' % ( |
| 480 | ', '.join(sorted(SUPPORTED_FILE_TYPES)), subsubvalue)) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 481 | else: |
| 482 | raise IsolatedError('Unknown subsubkey %s' % subsubkey) |
| 483 | if bool('h' in subvalue) == bool('l' in subvalue): |
| 484 | raise IsolatedError( |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 485 | 'Need only one of \'h\' (%s) or \'l\' (link), got: %r' % |
| 486 | (algo_name, subvalue)) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 487 | if bool('h' in subvalue) != bool('s' in subvalue): |
| 488 | raise IsolatedError( |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 489 | 'Both \'h\' (%s) and \'s\' (size) should be set, got: %r' % |
| 490 | (algo_name, subvalue)) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 491 | if bool('s' in subvalue) == bool('l' in subvalue): |
| 492 | raise IsolatedError( |
| 493 | 'Need only one of \'s\' (size) or \'l\' (link), got: %r' % |
| 494 | subvalue) |
| 495 | if bool('l' in subvalue) and bool('m' in subvalue): |
| 496 | raise IsolatedError( |
| 497 | 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' % |
| 498 | subvalue) |
| 499 | |
| 500 | elif key == 'includes': |
| 501 | if not isinstance(value, list): |
| 502 | raise IsolatedError('Expected list, got %r' % value) |
| 503 | if not value: |
| 504 | raise IsolatedError('Expected non-empty includes list') |
| 505 | for subvalue in value: |
| 506 | if not is_valid_hash(subvalue, algo): |
Adrian Ludwin | b4ebc09 | 2017-09-13 07:46:24 -0400 | [diff] [blame] | 507 | raise IsolatedError('Expected %s, got %r' % (algo_name, subvalue)) |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 508 | |
| 509 | elif key == 'os': |
| 510 | if version >= (1, 4): |
| 511 | raise IsolatedError('Key \'os\' is not allowed starting version 1.4') |
| 512 | |
| 513 | elif key == 'read_only': |
| 514 | if not value in (0, 1, 2): |
| 515 | raise IsolatedError('Expected 0, 1 or 2, got %r' % value) |
| 516 | |
| 517 | elif key == 'relative_cwd': |
| 518 | if not isinstance(value, basestring): |
| 519 | raise IsolatedError('Expected string, got %r' % value) |
| 520 | |
| 521 | elif key == 'version': |
| 522 | # Already checked above. |
| 523 | pass |
| 524 | |
| 525 | else: |
| 526 | raise IsolatedError('Unknown key %r' % key) |
| 527 | |
| 528 | # Automatically fix os.path.sep if necessary. While .isolated files are always |
Marc-Antoine Ruel | f674a58 | 2018-01-12 10:56:01 -0500 | [diff] [blame] | 529 | # in the native path format, someone could want to download an .isolated tree |
| 530 | # from another OS. |
Marc-Antoine Ruel | 52436aa | 2014-08-28 21:57:57 -0400 | [diff] [blame] | 531 | wrong_path_sep = '/' if os.path.sep == '\\' else '\\' |
| 532 | if 'files' in data: |
| 533 | data['files'] = dict( |
| 534 | (k.replace(wrong_path_sep, os.path.sep), v) |
| 535 | for k, v in data['files'].iteritems()) |
| 536 | for v in data['files'].itervalues(): |
| 537 | if 'l' in v: |
| 538 | v['l'] = v['l'].replace(wrong_path_sep, os.path.sep) |
| 539 | if 'relative_cwd' in data: |
| 540 | data['relative_cwd'] = data['relative_cwd'].replace( |
| 541 | wrong_path_sep, os.path.sep) |
| 542 | return data |