blob: ff9683da405dd1d6f3bbb615374cf48ddf6da6b9 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Junji Watanabe7b720782020-07-01 01:51:07 +000015import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000017import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040018
19from utils import file_path
20from utils import fs
21from utils import lru
22from utils import threading_utils
23from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000024tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
Lei Leife202df2019-06-11 17:33:34 +000026# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000027from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000028import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
Junji Watanabe5e73aab2020-04-09 04:20:27 +000030import isolated_format
31
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040032# The file size to be used when we don't know the correct file size,
33# generally used for .isolated files.
34UNKNOWN_FILE_SIZE = None
35
36
37def file_write(path, content_generator):
38 """Writes file content as generated by content_generator.
39
40 Creates the intermediary directory as needed.
41
42 Returns the number of bytes written.
43
44 Meant to be mocked out in unit tests.
45 """
46 file_path.ensure_tree(os.path.dirname(path))
47 total = 0
48 with fs.open(path, 'wb') as f:
49 for d in content_generator:
50 total += len(d)
51 f.write(d)
52 return total
53
54
55def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000056 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040057
58 Currently it just checks the file exists and its size matches the expectation.
59 """
60 if size == UNKNOWN_FILE_SIZE:
61 return fs.isfile(path)
62 try:
63 actual_size = fs.stat(path).st_size
64 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000065 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
66 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040067 return False
68 if size != actual_size:
69 logging.warning(
70 'Found invalid item %s; %d != %d',
71 os.path.basename(path), actual_size, size)
72 return False
73 return True
74
75
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000076def trim_caches(caches, path, min_free_space, max_age_secs):
77 """Trims multiple caches.
78
79 The goal here is to coherently trim all caches in a coherent LRU fashion,
80 deleting older items independent of which container they belong to.
81
82 Two policies are enforced first:
83 - max_age_secs
84 - min_free_space
85
86 Once that's done, then we enforce each cache's own policies.
87
88 Returns:
89 Slice containing the size of all items evicted.
90 """
91 min_ts = time.time() - max_age_secs if max_age_secs else 0
92 free_disk = file_path.get_free_space(path) if min_free_space else 0
93 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000111def _use_scandir():
112 # Use scandir in windows for faster execution.
113 # Do not use in other OS due to crbug.com/989409
114 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000115
Junji Watanabe38b28b02020-04-23 10:23:30 +0000116
Junji Watanabeacb32a32021-03-09 04:17:55 +0000117def _get_recursive_size_with_scandir(path):
118 if sys.platform == 'win32':
119
120 def direntIsJunction(entry):
121 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
122 # windows-only symbols.
123 return bool(entry.stat().st_file_attributes
124 & scandir.FILE_ATTRIBUTE_REPARSE_POINT)
125 else:
126
127 def direntIsJunction(_entry):
128 return False
129
130 total = 0
131 n_dirs = 0
132 n_files = 0
133 n_links = 0
134 stack = [path]
135 while stack:
136 for entry in scandir.scandir(stack.pop()):
137 if entry.is_symlink() or direntIsJunction(entry):
138 n_links += 1
139 continue
140 if entry.is_file():
141 n_files += 1
142 total += entry.stat().st_size
143 elif entry.is_dir():
144 n_dirs += 1
145 stack.append(entry.path)
146 else:
147 logging.warning('non directory/file entry: %s', entry)
148 return total, n_dirs, n_files, n_links
149
150
151def _get_recursive_size_with_fswalk(path):
152 total = 0
153 n_dirs = 0
154 n_files = 0
155 n_links = 0
156 for root, dirs, files in fs.walk(path):
157 n_dirs += len(dirs)
158 for f in files:
159 st = fs.lstat(os.path.join(root, f))
160 if stat.S_ISLNK(st.st_mode):
161 n_links += 1
162 continue
163 n_files += 1
164 total += st.st_size
165 return total, n_dirs, n_files, n_links
166
167
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000168def _get_recursive_size(path):
169 """Returns the total data size for the specified path.
170
171 This function can be surprisingly slow on OSX, so its output should be cached.
172 """
Junji Watanabeacb32a32021-03-09 04:17:55 +0000173 start = time.time()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000174 try:
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000175 if _use_scandir():
Junji Watanabeacb32a32021-03-09 04:17:55 +0000176 total, n_dirs, n_files, n_links = _get_recursive_size_with_scandir(path)
177 else:
178 total, n_dirs, n_files, n_links = _get_recursive_size_with_fswalk(path)
179 elapsed = time.time() - start
180 logging.debug(
181 '_get_recursive_size: traversed %s took %s seconds. '
182 'scandir: %s, files: %d, links: %d, dirs: %d', path, elapsed,
183 _use_scandir(), n_files, n_links, n_dirs)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000184 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000185 except (IOError, OSError, UnicodeEncodeError):
186 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000187 return None
188
189
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400190class NamedCacheError(Exception):
191 """Named cache specific error."""
192
193
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400194class NoMoreSpace(Exception):
195 """Not enough space to map the whole directory."""
196 pass
197
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400198
199class CachePolicies(object):
200 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
201 """Common caching policies for the multiple caches (isolated, named, cipd).
202
203 Arguments:
204 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
205 cache is effectively a leak.
206 - min_free_space: Trim if disk free space becomes lower than this value. If
207 0, it will unconditionally fill the disk.
208 - max_items: Maximum number of items to keep in the cache. If 0, do not
209 enforce a limit.
210 - max_age_secs: Maximum age an item is kept in the cache until it is
211 automatically evicted. Having a lot of dead luggage slows
212 everything down.
213 """
214 self.max_cache_size = max_cache_size
215 self.min_free_space = min_free_space
216 self.max_items = max_items
217 self.max_age_secs = max_age_secs
218
219 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000220 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
221 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
222 self.max_cache_size, float(self.max_cache_size) / 1024**3,
223 self.max_items, self.min_free_space,
224 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400225
226
227class CacheMiss(Exception):
228 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400229 def __init__(self, digest):
230 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000231 super(CacheMiss,
232 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400233
234
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400235class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000236
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400237 def __init__(self, cache_dir):
238 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000239 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400240 assert file_path.isabs(cache_dir), cache_dir
241 self.cache_dir = cache_dir
242 self._lock = threading_utils.LockWithAssert()
243 # Profiling values.
244 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400245 self._used = []
246
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000247 def __nonzero__(self):
248 """A cache is always True.
249
250 Otherwise it falls back to __len__, which is surprising.
251 """
252 return True
253
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000254 def __bool__(self):
255 """A cache is always True.
256
257 Otherwise it falls back to __len__, which is surprising.
258 """
259 return True
260
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000261 def __len__(self):
262 """Returns the number of entries in the cache."""
263 raise NotImplementedError()
264
265 def __iter__(self):
266 """Iterates over all the entries names."""
267 raise NotImplementedError()
268
269 def __contains__(self, name):
270 """Returns if an entry is in the cache."""
271 raise NotImplementedError()
272
273 @property
274 def total_size(self):
275 """Returns the total size of the cache in bytes."""
276 raise NotImplementedError()
277
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400278 @property
279 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000280 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400281 with self._lock:
282 return self._added[:]
283
284 @property
285 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000286 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 with self._lock:
288 return self._used[:]
289
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000290 def get_oldest(self):
291 """Returns timestamp of oldest cache entry or None.
292
293 Returns:
294 Timestamp of the oldest item.
295
296 Used for manual trimming.
297 """
298 raise NotImplementedError()
299
300 def remove_oldest(self):
301 """Removes the oldest item from the cache.
302
303 Returns:
304 Size of the oldest item.
305
306 Used for manual trimming.
307 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400308 raise NotImplementedError()
309
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000310 def save(self):
311 """Saves the current cache to disk."""
312 raise NotImplementedError()
313
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400314 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000315 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400316
317 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000318 Slice with the size of evicted items.
319 """
320 raise NotImplementedError()
321
322 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000323 """Deletes any corrupted item from the cache, then calls trim(), then
324 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000325
326 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400327 """
328 raise NotImplementedError()
329
330
331class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332 """Content addressed cache that stores objects temporarily.
333
334 It can be accessed concurrently from multiple threads, so it should protect
335 its internal state with some lock.
336 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400337
338 def __enter__(self):
339 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000340 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400341 return self
342
343 def __exit__(self, _exc_type, _exec_value, _traceback):
344 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000345 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400346 return False
347
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 def touch(self, digest, size):
349 """Ensures item is not corrupted and updates its LRU position.
350
351 Arguments:
352 digest: hash digest of item to check.
353 size: expected size of this item.
354
355 Returns:
356 True if item is in cache and not corrupted.
357 """
358 raise NotImplementedError()
359
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400360 def getfileobj(self, digest):
361 """Returns a readable file like object.
362
363 If file exists on the file system it will have a .name attribute with an
364 absolute path to the file.
365 """
366 raise NotImplementedError()
367
368 def write(self, digest, content):
369 """Reads data from |content| generator and stores it in cache.
370
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000371 It is possible to write to an object that already exists. It may be
372 ignored (sent to /dev/null) but the timestamp is still updated.
373
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374 Returns digest to simplify chaining.
375 """
376 raise NotImplementedError()
377
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378
379class MemoryContentAddressedCache(ContentAddressedCache):
380 """ContentAddressedCache implementation that stores everything in memory."""
381
Lei Leife202df2019-06-11 17:33:34 +0000382 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400383 """Args:
384 file_mode_mask: bit mask to AND file mode with. Default value will make
385 all mapped files to be read only.
386 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400387 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400388 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000389 # Items in a LRU lookup dict(digest: size).
390 self._lru = lru.LRUDict()
391
392 # Cache interface implementation.
393
394 def __len__(self):
395 with self._lock:
396 return len(self._lru)
397
398 def __iter__(self):
399 # This is not thread-safe.
400 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400401
402 def __contains__(self, digest):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405
406 @property
407 def total_size(self):
408 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000409 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000411 def get_oldest(self):
412 with self._lock:
413 try:
414 # (key, (value, ts))
415 return self._lru.get_oldest()[1][1]
416 except KeyError:
417 return None
418
419 def remove_oldest(self):
420 with self._lock:
421 # TODO(maruel): Update self._added.
422 # (key, (value, ts))
423 return len(self._lru.pop_oldest()[1][0])
424
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000425 def save(self):
426 pass
427
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000428 def trim(self):
429 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000430 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400431
432 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000433 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400434 pass
435
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000436 # ContentAddressedCache interface implementation.
437
438 def __contains__(self, digest):
439 with self._lock:
440 return digest in self._lru
441
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 def touch(self, digest, size):
443 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000444 try:
445 self._lru.touch(digest)
446 except KeyError:
447 return False
448 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400449
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 def getfileobj(self, digest):
451 with self._lock:
452 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000453 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400454 except KeyError:
455 raise CacheMiss(digest)
456 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000457 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400458 return io.BytesIO(d)
459
460 def write(self, digest, content):
461 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000462 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400463 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000464 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400465 self._added.append(len(data))
466 return digest
467
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400468
469class DiskContentAddressedCache(ContentAddressedCache):
470 """Stateful LRU cache in a flat hash table in a directory.
471
472 Saves its state as json file.
473 """
474 STATE_FILE = u'state.json'
475
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000476 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400477 """
478 Arguments:
479 cache_dir: directory where to place the cache.
480 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400481 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000482 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483 """
484 # All protected methods (starting with '_') except _path should be called
485 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400486 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
489 # Items in a LRU lookup dict(digest: size).
490 self._lru = lru.LRUDict()
491 # Current cached free disk space. It is updated by self._trim().
492 file_path.ensure_tree(self.cache_dir)
493 self._free_disk = file_path.get_free_space(self.cache_dir)
494 # The first item in the LRU cache that must not be evicted during this run
495 # since it was referenced. All items more recent that _protected in the LRU
496 # cache are also inherently protected. It could be a set() of all items
497 # referenced but this increases memory usage without a use case.
498 self._protected = None
499 # Cleanup operations done by self._load(), if any.
500 self._operations = []
501 with tools.Profiler('Setup'):
502 with self._lock:
503 self._load(trim, time_fn)
504
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000505 # Cache interface implementation.
506
507 def __len__(self):
508 with self._lock:
509 return len(self._lru)
510
511 def __iter__(self):
512 # This is not thread-safe.
513 return self._lru.__iter__()
514
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400515 def __contains__(self, digest):
516 with self._lock:
517 return digest in self._lru
518
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400520 def total_size(self):
521 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000522 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000524 def get_oldest(self):
525 with self._lock:
526 try:
527 # (key, (value, ts))
528 return self._lru.get_oldest()[1][1]
529 except KeyError:
530 return None
531
532 def remove_oldest(self):
533 with self._lock:
534 # TODO(maruel): Update self._added.
535 return self._remove_lru_file(True)
536
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000537 def save(self):
538 with self._lock:
539 return self._save()
540
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000541 def trim(self):
542 """Forces retention policies."""
543 with self._lock:
544 return self._trim()
545
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400546 def cleanup(self):
547 """Cleans up the cache directory.
548
549 Ensures there is no unknown files in cache_dir.
550 Ensures the read-only bits are set correctly.
551
552 At that point, the cache was already loaded, trimmed to respect cache
553 policies.
554 """
555 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000556 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400557 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000558 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559 # It'd be faster if there were a readdir() function.
560 for filename in fs.listdir(self.cache_dir):
561 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000562 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400563 continue
564 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000565 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400566 previous.remove(filename)
567 continue
568
569 # An untracked file. Delete it.
570 logging.warning('Removing unknown file %s from cache', filename)
571 p = self._path(filename)
572 if fs.isdir(p):
573 try:
574 file_path.rmtree(p)
575 except OSError:
576 pass
577 else:
578 file_path.try_remove(p)
579 continue
580
581 if previous:
582 # Filter out entries that were not found.
583 logging.warning('Removed %d lost files', len(previous))
584 for filename in previous:
585 self._lru.pop(filename)
586 self._save()
587
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000588 # Verify hash of every single item to detect corruption. the corrupted
589 # files will be evicted.
590 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000591 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000592 # verify only if the mtime is grather than the timestamp in state.json
593 # to avoid take too long time.
594 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000595 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000596 logging.warning('Item has been modified. item: %s', digest)
597 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000598 # Update timestamp in state.json
599 self._lru.touch(digest)
600 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000601 # remove corrupted file from LRU and file system
602 self._lru.pop(digest)
603 self._delete_file(digest, UNKNOWN_FILE_SIZE)
604 logging.error('Deleted corrupted item: %s', digest)
605 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400606
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000607 # ContentAddressedCache interface implementation.
608
609 def __contains__(self, digest):
610 with self._lock:
611 return digest in self._lru
612
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400613 def touch(self, digest, size):
614 """Verifies an actual file is valid and bumps its LRU position.
615
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000616 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400617
618 Note that is doesn't compute the hash so it could still be corrupted if the
619 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400620 """
621 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000622 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400623
624 # Update its LRU position.
625 with self._lock:
626 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000627 if looks_valid:
628 # Exists but not in the LRU anymore.
629 self._delete_file(digest, size)
630 return False
631 if not looks_valid:
632 self._lru.pop(digest)
633 # Exists but not in the LRU anymore.
634 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400635 return False
636 self._lru.touch(digest)
637 self._protected = self._protected or digest
638 return True
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def getfileobj(self, digest):
641 try:
642 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400643 except IOError:
644 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000645 with self._lock:
646 try:
647 self._used.append(self._lru[digest])
648 except KeyError:
649 # If the digest is not actually in _lru, assume it is a cache miss.
650 # Existing file will be overwritten by whoever uses the cache and added
651 # to _lru.
652 f.close()
653 raise CacheMiss(digest)
654 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400655
656 def write(self, digest, content):
657 assert content is not None
658 with self._lock:
659 self._protected = self._protected or digest
660 path = self._path(digest)
661 # A stale broken file may remain. It is possible for the file to have write
662 # access bit removed which would cause the file_write() call to fail to open
663 # in write mode. Take no chance here.
664 file_path.try_remove(path)
665 try:
666 size = file_write(path, content)
667 except:
668 # There are two possible places were an exception can occur:
669 # 1) Inside |content| generator in case of network or unzipping errors.
670 # 2) Inside file_write itself in case of disk IO errors.
671 # In any case delete an incomplete file and propagate the exception to
672 # caller, it will be logged there.
673 file_path.try_remove(path)
674 raise
675 # Make the file read-only in the cache. This has a few side-effects since
676 # the file node is modified, so every directory entries to this file becomes
677 # read-only. It's fine here because it is a new file.
678 file_path.set_read_only(path, True)
679 with self._lock:
680 self._add(digest, size)
681 return digest
682
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000683 # Internal functions.
684
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400685 def _load(self, trim, time_fn):
686 """Loads state of the cache from json file.
687
688 If cache_dir does not exist on disk, it is created.
689 """
690 self._lock.assert_locked()
691
692 if not fs.isfile(self.state_file):
693 if not fs.isdir(self.cache_dir):
694 fs.makedirs(self.cache_dir)
695 else:
696 # Load state of the cache.
697 try:
698 self._lru = lru.LRUDict.load(self.state_file)
699 except ValueError as err:
700 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000701 # Don't want to keep broken cache dir.
702 file_path.rmtree(self.cache_dir)
703 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000704 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 if time_fn:
706 self._lru.time_fn = time_fn
707 if trim:
708 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
710 def _save(self):
711 """Saves the LRU ordering."""
712 self._lock.assert_locked()
713 if sys.platform != 'win32':
714 d = os.path.dirname(self.state_file)
715 if fs.isdir(d):
716 # Necessary otherwise the file can't be created.
717 file_path.set_read_only(d, False)
718 if fs.isfile(self.state_file):
719 file_path.set_read_only(self.state_file, False)
720 self._lru.save(self.state_file)
721
722 def _trim(self):
723 """Trims anything we don't know, make sure enough free space exists."""
724 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000725 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400726
727 # Trim old items.
728 if self.policies.max_age_secs:
729 cutoff = self._lru.time_fn() - self.policies.max_age_secs
730 while self._lru:
731 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000732 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400733 if oldest[1][1] >= cutoff:
734 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000735 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400736
737 # Ensure maximum cache size.
738 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000739 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400740 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000741 e = self._remove_lru_file(True)
742 evicted.append(e)
743 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744
745 # Ensure maximum number of items in the cache.
746 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000747 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000748 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400749
750 # Ensure enough free space.
751 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400752 while (
753 self.policies.min_free_space and
754 self._lru and
755 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000756 # self._free_disk is updated by this call.
757 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400758
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000759 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000760 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400761 usage_percent = 0.
762 if total_usage:
763 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
764
765 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000766 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
767 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000768 '%.1fkb)', len(evicted),
769 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
770 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400771 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000772 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400773
774 def _path(self, digest):
775 """Returns the path to one item."""
776 return os.path.join(self.cache_dir, digest)
777
778 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000779 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000780
781 Updates self._free_disk.
782 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400783 self._lock.assert_locked()
784 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000785 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400786 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000787 total_size = sum(self._lru.values())
788 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000789 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
790 '%s bytes (%.3f GiB) free_space') % (
791 self.policies, total_size, float(total_size) / 1024**3,
792 len(self._lru), self._free_disk,
793 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400794 raise NoMoreSpace(msg)
795 except KeyError:
796 # That means an internal error.
797 raise NoMoreSpace('Nothing to remove, can\'t happend')
798 digest, (size, _) = self._lru.pop_oldest()
Takuto Ikuta8d8ca9b2021-02-26 02:31:43 +0000799 logging.debug('Removing LRU file %s with size %s bytes', digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400800 self._delete_file(digest, size)
801 return size
802
803 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
804 """Adds an item into LRU cache marking it as a newest one."""
805 self._lock.assert_locked()
806 if size == UNKNOWN_FILE_SIZE:
807 size = fs.stat(self._path(digest)).st_size
808 self._added.append(size)
809 self._lru.add(digest, size)
810 self._free_disk -= size
811 # Do a quicker version of self._trim(). It only enforces free disk space,
812 # not cache size limits. It doesn't actually look at real free disk space,
813 # only uses its cache values. self._trim() will be called later to enforce
814 # real trimming but doing this quick version here makes it possible to map
815 # an isolated that is larger than the current amount of free disk space when
816 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000817 while (self.policies.min_free_space and self._lru and
818 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000819 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400820 if self._remove_lru_file(False) == -1:
821 break
822
823 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000824 """Deletes cache file from the file system.
825
826 Updates self._free_disk.
827 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400828 self._lock.assert_locked()
829 try:
830 if size == UNKNOWN_FILE_SIZE:
831 try:
832 size = fs.stat(self._path(digest)).st_size
833 except OSError:
834 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000835 if file_path.try_remove(self._path(digest)):
836 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400837 except OSError as e:
838 if e.errno != errno.ENOENT:
839 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000841 def _get_mtime(self, digest):
842 """Get mtime of cache file."""
843 return os.path.getmtime(self._path(digest))
844
845 def _is_valid_hash(self, digest):
846 """Verify digest with supported hash algos."""
847 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000848 if digest == isolated_format.hash_file(self._path(digest), algo):
849 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000850 return False
851
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400852
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400853class NamedCache(Cache):
854 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400856 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400857 name is a short identifier that describes the contents of the cache, e.g.
858 "git_v8" could be all git repositories required by v8 builds, or
859 "build_chromium" could be build artefacts of the Chromium.
860 path is a directory path relative to the task run dir. Cache installation
861 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400862 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 _DIR_ALPHABET = string.ascii_letters + string.digits
864 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000865 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400866
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400867 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400868 """Initializes NamedCaches.
869
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400870 Arguments:
871 - cache_dir is a directory for persistent cache storage.
872 - policies is a CachePolicies instance.
873 - time_fn is a function that returns timestamp (float) and used to take
874 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400875 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000878 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
880 self._lru = lru.LRUDict()
881 if not fs.isdir(self.cache_dir):
882 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000883 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000884 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400885 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000886 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000887 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000888 with open(self.state_file, 'r') as f:
889 logging.info('named cache state file: %s\n%s', self.state_file,
890 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000891 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000892
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000894 logging.exception(
895 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400896 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000897 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000898 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000899 with self._lock:
900 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901 if time_fn:
902 self._lru.time_fn = time_fn
903
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904 @property
905 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000906 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400907 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000908 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400909
Takuto Ikutaeab23172020-07-02 03:50:02 +0000910 def _sudo_chown(self, path):
911 if sys.platform == 'win32':
912 return
913 uid = os.getuid()
914 if os.stat(path).st_uid == uid:
915 return
916 # Maybe owner of |path| is different from runner of this script. This is to
917 # make fs.rename work in that case.
918 # https://crbug.com/986676
919 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
920
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000921 def install(self, dst, name):
922 """Creates the directory |dst| and moves a previous named cache |name| if it
923 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400924
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000925 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400926
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000927 Returns the reused named cache size in bytes, or 0 if none was present.
928
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400929 Raises NamedCacheError if cannot install the cache.
930 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000931 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400932 with self._lock:
933 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000934 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400935 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000936 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400937
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000938 # Remove the named symlink if it exists.
939 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000940 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000941 # Remove the symlink itself, not its destination.
942 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000943
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000944 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000945 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000947 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000948 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000949 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000950 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000951 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400952 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000953 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400954
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000955 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400956 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400957
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000958 # The named cache does not exist, create an empty directory. When
959 # uninstalling, we will move it back to the cache and create an an
960 # entry.
961 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000962 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000963 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400964 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000965 # Raise using the original traceback.
966 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000967 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000968 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000969 finally:
970 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400971
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000972 def uninstall(self, src, name):
973 """Moves the cache directory back into the named cache hive for an eventual
974 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400975
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000976 The opposite of install().
977
978 src must be absolute and unicode. Its content is moved back into the local
979 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400980
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000981 Returns the named cache size in bytes.
982
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400983 Raises NamedCacheError if cannot uninstall the cache.
984 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000985 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000986 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400987 with self._lock:
988 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000989 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400990 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000991 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000992 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400993 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400994
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000995 if name in self._lru:
996 # This shouldn't happen but just remove the preexisting one and move
997 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000998 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000999 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001000
Takuto Ikuta93483272020-06-05 09:06:34 +00001001 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001002 size = _get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +00001003 logging.info('- Size is %s', size)
1004 if size is None:
1005 # Do not save a named cache that was deleted.
1006 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001007
1008 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001009 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001010 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001011 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001012 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +00001013 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001014 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001015
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001016 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001017 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001018
1019 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
1020 # for user convenience.
1021 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001022 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001023 file_path.remove(named_path)
1024 else:
1025 file_path.ensure_tree(os.path.dirname(named_path))
1026
1027 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001028 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001029 logging.info(
1030 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001031 except OSError:
1032 # Ignore on Windows. It happens when running as a normal user or when
1033 # UAC is enabled and the user is a filtered administrator account.
1034 if sys.platform != 'win32':
1035 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001036 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001037 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +00001038 # Raise using the original traceback.
1039 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001040 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001041 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001042 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001043 # Call save() at every uninstall. The assumptions are:
1044 # - The total the number of named caches is low, so the state.json file
1045 # is small, so the time it takes to write it to disk is short.
1046 # - The number of mapped named caches per task is low, so the number of
1047 # times save() is called on tear-down isn't high enough to be
1048 # significant.
1049 # - uninstall() sometimes throws due to file locking on Windows or
1050 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001051 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001052 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1053 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001054
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001055 # Cache interface implementation.
1056
1057 def __len__(self):
1058 with self._lock:
1059 return len(self._lru)
1060
1061 def __iter__(self):
1062 # This is not thread-safe.
1063 return self._lru.__iter__()
1064
John Budorickc6186972020-02-26 00:58:14 +00001065 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001066 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001067 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001068
1069 @property
1070 def total_size(self):
1071 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001072 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001073
1074 def get_oldest(self):
1075 with self._lock:
1076 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001077 # (key, (value, ts))
1078 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001079 except KeyError:
1080 return None
1081
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001082 def remove_oldest(self):
1083 with self._lock:
1084 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001085 _name, size = self._remove_lru_item()
1086 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001087
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001088 def save(self):
1089 with self._lock:
1090 return self._save()
1091
John Budorickc6186972020-02-26 00:58:14 +00001092 def touch(self, *names):
1093 with self._lock:
1094 for name in names:
1095 if name in self._lru:
1096 self._lru.touch(name)
1097 self._save()
1098
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001099 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001100 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001101 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001102 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001103 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001104
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001105 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001106 if self._policies.max_items:
1107 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001108 name, size = self._remove_lru_item()
1109 evicted.append(size)
1110 logging.info(
1111 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1112 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001113
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001114 # Trim according to maximum age.
1115 if self._policies.max_age_secs:
1116 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1117 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001118 _name, (_data, ts) = self._lru.get_oldest()
1119 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001120 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001121 name, size = self._remove_lru_item()
1122 evicted.append(size)
1123 logging.info(
1124 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1125 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001126
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001127 # Trim according to minimum free space.
1128 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001129 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001130 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001131 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001132 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001133 name, size = self._remove_lru_item()
1134 evicted.append(size)
1135 logging.info(
1136 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1137 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001138
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001139 # Trim according to maximum total size.
1140 if self._policies.max_cache_size:
1141 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001142 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001143 if total <= self._policies.max_cache_size:
1144 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001145 name, size = self._remove_lru_item()
1146 evicted.append(size)
1147 logging.info(
1148 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1149 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001150
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001151 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001152 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001153
1154 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001155 """Removes unknown directories.
1156
1157 Does not recalculate the cache size since it's surprisingly slow on some
1158 OSes.
1159 """
1160 success = True
1161 with self._lock:
1162 try:
1163 actual = set(fs.listdir(self.cache_dir))
1164 actual.discard(self.NAMED_DIR)
1165 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001166 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001167 # First, handle the actual cache content.
1168 # Remove missing entries.
1169 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001170 name, size = self._lru.pop(expected[missing])
1171 logging.warning(
1172 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001173 # Remove unexpected items.
1174 for unexpected in (actual - set(expected)):
1175 try:
1176 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001177 logging.warning(
1178 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001179 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001180 file_path.rmtree(p)
1181 else:
1182 fs.remove(p)
1183 except (IOError, OSError) as e:
1184 logging.error('Failed to remove %s: %s', unexpected, e)
1185 success = False
1186
1187 # Second, fix named cache links.
1188 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001189 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001190 actual = set(fs.listdir(named))
1191 expected = set(self._lru)
1192 # Confirm entries. Do not add missing ones for now.
1193 for name in expected.intersection(actual):
1194 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001195 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001196 if fs.islink(p):
1197 link = fs.readlink(p)
1198 if expected_link == link:
1199 continue
1200 logging.warning(
1201 'Unexpected symlink for cache %s: %s, expected %s',
1202 name, link, expected_link)
1203 else:
1204 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001205 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001206 file_path.rmtree(p)
1207 else:
1208 fs.remove(p)
1209 # Remove unexpected items.
1210 for unexpected in (actual - expected):
1211 try:
1212 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1213 if fs.isdir(p):
1214 file_path.rmtree(p)
1215 else:
1216 fs.remove(p)
1217 except (IOError, OSError) as e:
1218 logging.error('Failed to remove %s: %s', unexpected, e)
1219 success = False
1220 finally:
1221 self._save()
1222 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001223
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001224 # Internal functions.
1225
1226 def _try_upgrade(self):
1227 """Upgrades from the old format to the new one if necessary.
1228
1229 This code can be removed so all bots are known to have the right new format.
1230 """
1231 if not self._lru:
1232 return
1233 _name, (data, _ts) = self._lru.get_oldest()
1234 if isinstance(data, (list, tuple)):
1235 return
1236 # Update to v2.
1237 def upgrade(_name, rel_cache):
1238 abs_cache = os.path.join(self.cache_dir, rel_cache)
1239 return rel_cache, _get_recursive_size(abs_cache)
1240 self._lru.transform(upgrade)
1241 self._save()
1242
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001243 def _remove_lru_item(self):
1244 """Removes the oldest LRU entry. LRU must not be empty."""
1245 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1246 logging.info('Removing named cache %r', name)
1247 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001248 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001249
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001250 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001251 """Creates and returns relative path of a new cache directory.
1252
1253 In practice, it is a 2-letter string.
1254 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001255 # We randomly generate directory names that have two lower/upper case
1256 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1257 abc_len = len(self._DIR_ALPHABET)
1258 tried = set()
1259 while len(tried) < 1000:
1260 i = random.randint(0, abc_len * abc_len - 1)
1261 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001262 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001263 if rel_path in tried:
1264 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001265 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001266 if not fs.exists(abs_path):
1267 return rel_path
1268 tried.add(rel_path)
1269 raise NamedCacheError(
1270 'could not allocate a new cache dir, too many cache dirs')
1271
1272 def _remove(self, name):
1273 """Removes a cache directory and entry.
1274
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001275 Returns:
1276 Number of caches deleted.
1277 """
1278 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001279 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001280 named_dir = self._get_named_path(name)
1281 if fs.islink(named_dir):
1282 fs.unlink(named_dir)
1283
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001284 # Then remove the actual data.
1285 if name not in self._lru:
1286 return
1287 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001288 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001289 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001290 file_path.rmtree(abs_path)
1291 self._lru.pop(name)
1292
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001293 def _save(self):
1294 self._lock.assert_locked()
1295 file_path.ensure_tree(self.cache_dir)
1296 self._lru.save(self.state_file)
1297
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001298 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001299 return os.path.join(self.cache_dir, self.NAMED_DIR, name)