blob: 646f21f0efea9a802f77e0081d986d864e154983 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
109
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000110def _get_recursive_size(path):
111 """Returns the total data size for the specified path.
112
113 This function can be surprisingly slow on OSX, so its output should be cached.
114 """
115 try:
116 total = 0
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000117 if sys.platform == 'win32':
118 # Use scandir in windows for faster execution.
119 # Do not use in other OS due to crbug.com/989409
120 stack = [path]
121 while stack:
122 for entry in scandir.scandir(stack.pop()):
123 if entry.is_symlink():
124 continue
125 if entry.is_file():
126 total += entry.stat().st_size
127 elif entry.is_dir():
128 stack.append(entry.path)
129 else:
130 logging.warning('non directory/file entry: %s', entry)
131 return total
132
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000133 for root, _, files in fs.walk(path):
134 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000135 st = fs.lstat(os.path.join(root, f))
136 if stat.S_ISLNK(st.st_mode):
137 continue
138 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000139 return total
140 except (IOError, OSError, UnicodeEncodeError) as exc:
141 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
142 return None
143
144
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400145class NamedCacheError(Exception):
146 """Named cache specific error."""
147
148
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400149class NoMoreSpace(Exception):
150 """Not enough space to map the whole directory."""
151 pass
152
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400153
154class CachePolicies(object):
155 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
156 """Common caching policies for the multiple caches (isolated, named, cipd).
157
158 Arguments:
159 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
160 cache is effectively a leak.
161 - min_free_space: Trim if disk free space becomes lower than this value. If
162 0, it will unconditionally fill the disk.
163 - max_items: Maximum number of items to keep in the cache. If 0, do not
164 enforce a limit.
165 - max_age_secs: Maximum age an item is kept in the cache until it is
166 automatically evicted. Having a lot of dead luggage slows
167 everything down.
168 """
169 self.max_cache_size = max_cache_size
170 self.min_free_space = min_free_space
171 self.max_items = max_items
172 self.max_age_secs = max_age_secs
173
174 def __str__(self):
175 return (
176 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
177 'max_age_secs=%s)') % (
178 self.max_cache_size, self.max_items, self.min_free_space,
179 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400180
181
182class CacheMiss(Exception):
183 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400184 def __init__(self, digest):
185 self.digest = digest
186 super(CacheMiss, self).__init__(
187 'Item with digest %r is not found in cache' % digest)
188
189
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400190class Cache(object):
191 def __init__(self, cache_dir):
192 if cache_dir is not None:
193 assert isinstance(cache_dir, unicode), cache_dir
194 assert file_path.isabs(cache_dir), cache_dir
195 self.cache_dir = cache_dir
196 self._lock = threading_utils.LockWithAssert()
197 # Profiling values.
198 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400199 self._used = []
200
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000201 def __nonzero__(self):
202 """A cache is always True.
203
204 Otherwise it falls back to __len__, which is surprising.
205 """
206 return True
207
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000208 def __len__(self):
209 """Returns the number of entries in the cache."""
210 raise NotImplementedError()
211
212 def __iter__(self):
213 """Iterates over all the entries names."""
214 raise NotImplementedError()
215
216 def __contains__(self, name):
217 """Returns if an entry is in the cache."""
218 raise NotImplementedError()
219
220 @property
221 def total_size(self):
222 """Returns the total size of the cache in bytes."""
223 raise NotImplementedError()
224
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400225 @property
226 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000227 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400228 with self._lock:
229 return self._added[:]
230
231 @property
232 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000233 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400234 with self._lock:
235 return self._used[:]
236
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000237 def get_oldest(self):
238 """Returns timestamp of oldest cache entry or None.
239
240 Returns:
241 Timestamp of the oldest item.
242
243 Used for manual trimming.
244 """
245 raise NotImplementedError()
246
247 def remove_oldest(self):
248 """Removes the oldest item from the cache.
249
250 Returns:
251 Size of the oldest item.
252
253 Used for manual trimming.
254 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400255 raise NotImplementedError()
256
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000257 def save(self):
258 """Saves the current cache to disk."""
259 raise NotImplementedError()
260
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400261 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000262 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400263
264 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000265 Slice with the size of evicted items.
266 """
267 raise NotImplementedError()
268
269 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 """Deletes any corrupted item from the cache, then calls trim(), then
271 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000272
273 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 """
275 raise NotImplementedError()
276
277
278class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400279 """Content addressed cache that stores objects temporarily.
280
281 It can be accessed concurrently from multiple threads, so it should protect
282 its internal state with some lock.
283 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400284
285 def __enter__(self):
286 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000287 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400288 return self
289
290 def __exit__(self, _exc_type, _exec_value, _traceback):
291 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000292 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400293 return False
294
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400295 def touch(self, digest, size):
296 """Ensures item is not corrupted and updates its LRU position.
297
298 Arguments:
299 digest: hash digest of item to check.
300 size: expected size of this item.
301
302 Returns:
303 True if item is in cache and not corrupted.
304 """
305 raise NotImplementedError()
306
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400307 def getfileobj(self, digest):
308 """Returns a readable file like object.
309
310 If file exists on the file system it will have a .name attribute with an
311 absolute path to the file.
312 """
313 raise NotImplementedError()
314
315 def write(self, digest, content):
316 """Reads data from |content| generator and stores it in cache.
317
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000318 It is possible to write to an object that already exists. It may be
319 ignored (sent to /dev/null) but the timestamp is still updated.
320
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321 Returns digest to simplify chaining.
322 """
323 raise NotImplementedError()
324
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400325
326class MemoryContentAddressedCache(ContentAddressedCache):
327 """ContentAddressedCache implementation that stores everything in memory."""
328
Lei Leife202df2019-06-11 17:33:34 +0000329 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400330 """Args:
331 file_mode_mask: bit mask to AND file mode with. Default value will make
332 all mapped files to be read only.
333 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400334 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400335 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000336 # Items in a LRU lookup dict(digest: size).
337 self._lru = lru.LRUDict()
338
339 # Cache interface implementation.
340
341 def __len__(self):
342 with self._lock:
343 return len(self._lru)
344
345 def __iter__(self):
346 # This is not thread-safe.
347 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348
349 def __contains__(self, digest):
350 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000351 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400352
353 @property
354 def total_size(self):
355 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000356 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000358 def get_oldest(self):
359 with self._lock:
360 try:
361 # (key, (value, ts))
362 return self._lru.get_oldest()[1][1]
363 except KeyError:
364 return None
365
366 def remove_oldest(self):
367 with self._lock:
368 # TODO(maruel): Update self._added.
369 # (key, (value, ts))
370 return len(self._lru.pop_oldest()[1][0])
371
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000372 def save(self):
373 pass
374
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000375 def trim(self):
376 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000377 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378
379 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000380 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400381 pass
382
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000383 # ContentAddressedCache interface implementation.
384
385 def __contains__(self, digest):
386 with self._lock:
387 return digest in self._lru
388
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400389 def touch(self, digest, size):
390 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000391 try:
392 self._lru.touch(digest)
393 except KeyError:
394 return False
395 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400397 def getfileobj(self, digest):
398 with self._lock:
399 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000400 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400401 except KeyError:
402 raise CacheMiss(digest)
403 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400405 return io.BytesIO(d)
406
407 def write(self, digest, content):
408 # Assemble whole stream before taking the lock.
409 data = ''.join(content)
410 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000411 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400412 self._added.append(len(data))
413 return digest
414
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415
416class DiskContentAddressedCache(ContentAddressedCache):
417 """Stateful LRU cache in a flat hash table in a directory.
418
419 Saves its state as json file.
420 """
421 STATE_FILE = u'state.json'
422
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000423 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400424 """
425 Arguments:
426 cache_dir: directory where to place the cache.
427 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000429 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400430 """
431 # All protected methods (starting with '_') except _path should be called
432 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400433 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400434 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400435 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
436 # Items in a LRU lookup dict(digest: size).
437 self._lru = lru.LRUDict()
438 # Current cached free disk space. It is updated by self._trim().
439 file_path.ensure_tree(self.cache_dir)
440 self._free_disk = file_path.get_free_space(self.cache_dir)
441 # The first item in the LRU cache that must not be evicted during this run
442 # since it was referenced. All items more recent that _protected in the LRU
443 # cache are also inherently protected. It could be a set() of all items
444 # referenced but this increases memory usage without a use case.
445 self._protected = None
446 # Cleanup operations done by self._load(), if any.
447 self._operations = []
448 with tools.Profiler('Setup'):
449 with self._lock:
450 self._load(trim, time_fn)
451
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000452 # Cache interface implementation.
453
454 def __len__(self):
455 with self._lock:
456 return len(self._lru)
457
458 def __iter__(self):
459 # This is not thread-safe.
460 return self._lru.__iter__()
461
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400462 def __contains__(self, digest):
463 with self._lock:
464 return digest in self._lru
465
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400466 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400467 def total_size(self):
468 with self._lock:
469 return sum(self._lru.itervalues())
470
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000471 def get_oldest(self):
472 with self._lock:
473 try:
474 # (key, (value, ts))
475 return self._lru.get_oldest()[1][1]
476 except KeyError:
477 return None
478
479 def remove_oldest(self):
480 with self._lock:
481 # TODO(maruel): Update self._added.
482 return self._remove_lru_file(True)
483
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000484 def save(self):
485 with self._lock:
486 return self._save()
487
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000488 def trim(self):
489 """Forces retention policies."""
490 with self._lock:
491 return self._trim()
492
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 def cleanup(self):
494 """Cleans up the cache directory.
495
496 Ensures there is no unknown files in cache_dir.
497 Ensures the read-only bits are set correctly.
498
499 At that point, the cache was already loaded, trimmed to respect cache
500 policies.
501 """
502 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000503 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400504 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000505 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 # It'd be faster if there were a readdir() function.
507 for filename in fs.listdir(self.cache_dir):
508 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000509 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400510 continue
511 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000512 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400513 previous.remove(filename)
514 continue
515
516 # An untracked file. Delete it.
517 logging.warning('Removing unknown file %s from cache', filename)
518 p = self._path(filename)
519 if fs.isdir(p):
520 try:
521 file_path.rmtree(p)
522 except OSError:
523 pass
524 else:
525 file_path.try_remove(p)
526 continue
527
528 if previous:
529 # Filter out entries that were not found.
530 logging.warning('Removed %d lost files', len(previous))
531 for filename in previous:
532 self._lru.pop(filename)
533 self._save()
534
535 # What remains to be done is to hash every single item to
536 # detect corruption, then save to ensure state.json is up to date.
537 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
538 # TODO(maruel): Let's revisit once directory metadata is stored in
539 # state.json so only the files that had been mapped since the last cleanup()
540 # call are manually verified.
541 #
542 #with self._lock:
543 # for digest in self._lru:
544 # if not isolated_format.is_valid_hash(
545 # self._path(digest), self.hash_algo):
546 # self.evict(digest)
547 # logging.info('Deleted corrupted item: %s', digest)
548
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000549 # ContentAddressedCache interface implementation.
550
551 def __contains__(self, digest):
552 with self._lock:
553 return digest in self._lru
554
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400555 def touch(self, digest, size):
556 """Verifies an actual file is valid and bumps its LRU position.
557
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000558 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400559
560 Note that is doesn't compute the hash so it could still be corrupted if the
561 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400562 """
563 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000564 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565
566 # Update its LRU position.
567 with self._lock:
568 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000569 if looks_valid:
570 # Exists but not in the LRU anymore.
571 self._delete_file(digest, size)
572 return False
573 if not looks_valid:
574 self._lru.pop(digest)
575 # Exists but not in the LRU anymore.
576 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400577 return False
578 self._lru.touch(digest)
579 self._protected = self._protected or digest
580 return True
581
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400582 def getfileobj(self, digest):
583 try:
584 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585 except IOError:
586 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000587 with self._lock:
588 try:
589 self._used.append(self._lru[digest])
590 except KeyError:
591 # If the digest is not actually in _lru, assume it is a cache miss.
592 # Existing file will be overwritten by whoever uses the cache and added
593 # to _lru.
594 f.close()
595 raise CacheMiss(digest)
596 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400597
598 def write(self, digest, content):
599 assert content is not None
600 with self._lock:
601 self._protected = self._protected or digest
602 path = self._path(digest)
603 # A stale broken file may remain. It is possible for the file to have write
604 # access bit removed which would cause the file_write() call to fail to open
605 # in write mode. Take no chance here.
606 file_path.try_remove(path)
607 try:
608 size = file_write(path, content)
609 except:
610 # There are two possible places were an exception can occur:
611 # 1) Inside |content| generator in case of network or unzipping errors.
612 # 2) Inside file_write itself in case of disk IO errors.
613 # In any case delete an incomplete file and propagate the exception to
614 # caller, it will be logged there.
615 file_path.try_remove(path)
616 raise
617 # Make the file read-only in the cache. This has a few side-effects since
618 # the file node is modified, so every directory entries to this file becomes
619 # read-only. It's fine here because it is a new file.
620 file_path.set_read_only(path, True)
621 with self._lock:
622 self._add(digest, size)
623 return digest
624
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000625 # Internal functions.
626
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400627 def _load(self, trim, time_fn):
628 """Loads state of the cache from json file.
629
630 If cache_dir does not exist on disk, it is created.
631 """
632 self._lock.assert_locked()
633
634 if not fs.isfile(self.state_file):
635 if not fs.isdir(self.cache_dir):
636 fs.makedirs(self.cache_dir)
637 else:
638 # Load state of the cache.
639 try:
640 self._lru = lru.LRUDict.load(self.state_file)
641 except ValueError as err:
642 logging.error('Failed to load cache state: %s' % (err,))
643 # Don't want to keep broken state file.
644 file_path.try_remove(self.state_file)
645 if time_fn:
646 self._lru.time_fn = time_fn
647 if trim:
648 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400649
650 def _save(self):
651 """Saves the LRU ordering."""
652 self._lock.assert_locked()
653 if sys.platform != 'win32':
654 d = os.path.dirname(self.state_file)
655 if fs.isdir(d):
656 # Necessary otherwise the file can't be created.
657 file_path.set_read_only(d, False)
658 if fs.isfile(self.state_file):
659 file_path.set_read_only(self.state_file, False)
660 self._lru.save(self.state_file)
661
662 def _trim(self):
663 """Trims anything we don't know, make sure enough free space exists."""
664 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000665 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400666
667 # Trim old items.
668 if self.policies.max_age_secs:
669 cutoff = self._lru.time_fn() - self.policies.max_age_secs
670 while self._lru:
671 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000672 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400673 if oldest[1][1] >= cutoff:
674 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000675 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400676
677 # Ensure maximum cache size.
678 if self.policies.max_cache_size:
679 total_size = sum(self._lru.itervalues())
680 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000681 e = self._remove_lru_file(True)
682 evicted.append(e)
683 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400684
685 # Ensure maximum number of items in the cache.
686 if self.policies.max_items and len(self._lru) > self.policies.max_items:
687 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000688 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400689
690 # Ensure enough free space.
691 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400692 while (
693 self.policies.min_free_space and
694 self._lru and
695 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000696 # self._free_disk is updated by this call.
697 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000699 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400700 total_usage = sum(self._lru.itervalues())
701 usage_percent = 0.
702 if total_usage:
703 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
704
705 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000706 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
707 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
708 '%.1fkb)',
709 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400710 self._free_disk / 1024.,
711 total_usage / 1024.,
712 usage_percent,
713 self.policies.max_cache_size / 1024.)
714 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000715 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400716
717 def _path(self, digest):
718 """Returns the path to one item."""
719 return os.path.join(self.cache_dir, digest)
720
721 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000722 """Removes the lastest recently used file and returns its size.
723
724 Updates self._free_disk.
725 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400726 self._lock.assert_locked()
727 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000728 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400729 if not allow_protected and digest == self._protected:
730 total_size = sum(self._lru.itervalues())+size
731 msg = (
732 'Not enough space to fetch the whole isolated tree.\n'
733 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
734 self.policies, total_size, len(self._lru)+1, self._free_disk)
735 raise NoMoreSpace(msg)
736 except KeyError:
737 # That means an internal error.
738 raise NoMoreSpace('Nothing to remove, can\'t happend')
739 digest, (size, _) = self._lru.pop_oldest()
740 logging.debug('Removing LRU file %s', digest)
741 self._delete_file(digest, size)
742 return size
743
744 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
745 """Adds an item into LRU cache marking it as a newest one."""
746 self._lock.assert_locked()
747 if size == UNKNOWN_FILE_SIZE:
748 size = fs.stat(self._path(digest)).st_size
749 self._added.append(size)
750 self._lru.add(digest, size)
751 self._free_disk -= size
752 # Do a quicker version of self._trim(). It only enforces free disk space,
753 # not cache size limits. It doesn't actually look at real free disk space,
754 # only uses its cache values. self._trim() will be called later to enforce
755 # real trimming but doing this quick version here makes it possible to map
756 # an isolated that is larger than the current amount of free disk space when
757 # the cache size is already large.
758 while (
759 self.policies.min_free_space and
760 self._lru and
761 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000762 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400763 if self._remove_lru_file(False) == -1:
764 break
765
766 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000767 """Deletes cache file from the file system.
768
769 Updates self._free_disk.
770 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400771 self._lock.assert_locked()
772 try:
773 if size == UNKNOWN_FILE_SIZE:
774 try:
775 size = fs.stat(self._path(digest)).st_size
776 except OSError:
777 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000778 if file_path.try_remove(self._path(digest)):
779 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400780 except OSError as e:
781 if e.errno != errno.ENOENT:
782 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400783
784
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400785class NamedCache(Cache):
786 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400787
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400788 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400789 name is a short identifier that describes the contents of the cache, e.g.
790 "git_v8" could be all git repositories required by v8 builds, or
791 "build_chromium" could be build artefacts of the Chromium.
792 path is a directory path relative to the task run dir. Cache installation
793 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400794 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400795 _DIR_ALPHABET = string.ascii_letters + string.digits
796 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000797 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400798
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400799 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400800 """Initializes NamedCaches.
801
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802 Arguments:
803 - cache_dir is a directory for persistent cache storage.
804 - policies is a CachePolicies instance.
805 - time_fn is a function that returns timestamp (float) and used to take
806 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400807 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400808 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400809 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000810 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400811 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
812 self._lru = lru.LRUDict()
813 if not fs.isdir(self.cache_dir):
814 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000815 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000816 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400817 self._lru = lru.LRUDict.load(self.state_file)
818 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000819 logging.exception(
820 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400821 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000822 with self._lock:
823 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400824 if time_fn:
825 self._lru.time_fn = time_fn
826
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400827 @property
828 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000829 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000831 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400832
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000833 def install(self, dst, name):
834 """Creates the directory |dst| and moves a previous named cache |name| if it
835 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400836
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000837 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400838
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000839 Returns the reused named cache size in bytes, or 0 if none was present.
840
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841 Raises NamedCacheError if cannot install the cache.
842 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000843 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400844 with self._lock:
845 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000846 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000848 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400849
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000850 # Remove the named symlink if it exists.
851 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000852 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000853 # Remove the symlink itself, not its destination.
854 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000855
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000856 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000857 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000859 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000860 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000861 file_path.ensure_tree(os.path.dirname(dst))
862 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000864 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400865
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000866 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400867 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400868
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000869 # The named cache does not exist, create an empty directory. When
870 # uninstalling, we will move it back to the cache and create an an
871 # entry.
872 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000873 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000874 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400875 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000876 # Raise using the original traceback.
877 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000878 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000879 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000880 finally:
881 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000883 def uninstall(self, src, name):
884 """Moves the cache directory back into the named cache hive for an eventual
885 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400886
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000887 The opposite of install().
888
889 src must be absolute and unicode. Its content is moved back into the local
890 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400891
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000892 Returns the named cache size in bytes.
893
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400894 Raises NamedCacheError if cannot uninstall the cache.
895 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000896 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400897 with self._lock:
898 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000899 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400900 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000901 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000902 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000905 if name in self._lru:
906 # This shouldn't happen but just remove the preexisting one and move
907 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000908 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000909 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000910
911 # Calculate the size of the named cache to keep. It's important because
912 # if size is zero (it's empty), we do not want to add it back to the
913 # named caches cache.
914 size = _get_recursive_size(src)
915 logging.info('- Size is %d', size)
916 if not size:
917 # Do not save empty named cache.
918 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400919
920 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000921 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400922 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000923 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400924 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000925 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400926
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000927 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000928 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000929
930 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
931 # for user convenience.
932 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000933 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000934 file_path.remove(named_path)
935 else:
936 file_path.ensure_tree(os.path.dirname(named_path))
937
938 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000939 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000940 logging.info(
941 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942 except OSError:
943 # Ignore on Windows. It happens when running as a normal user or when
944 # UAC is enabled and the user is a filtered administrator account.
945 if sys.platform != 'win32':
946 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000947 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400948 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000949 # Raise using the original traceback.
950 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000951 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000952 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000953 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000954 # Call save() at every uninstall. The assumptions are:
955 # - The total the number of named caches is low, so the state.json file
956 # is small, so the time it takes to write it to disk is short.
957 # - The number of mapped named caches per task is low, so the number of
958 # times save() is called on tear-down isn't high enough to be
959 # significant.
960 # - uninstall() sometimes throws due to file locking on Windows or
961 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000962 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400963
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000964 # Cache interface implementation.
965
966 def __len__(self):
967 with self._lock:
968 return len(self._lru)
969
970 def __iter__(self):
971 # This is not thread-safe.
972 return self._lru.__iter__()
973
974 def __contains__(self, digest):
975 with self._lock:
976 return digest in self._lru
977
978 @property
979 def total_size(self):
980 with self._lock:
981 return sum(size for _rel_path, size in self._lru.itervalues())
982
983 def get_oldest(self):
984 with self._lock:
985 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000986 # (key, (value, ts))
987 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000988 except KeyError:
989 return None
990
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000991 def remove_oldest(self):
992 with self._lock:
993 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000994 _name, size = self._remove_lru_item()
995 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000996
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000997 def save(self):
998 with self._lock:
999 return self._save()
1000
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001001 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001002 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001003 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001004 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001005 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001006
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001007 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001008 if self._policies.max_items:
1009 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001010 name, size = self._remove_lru_item()
1011 evicted.append(size)
1012 logging.info(
1013 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1014 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001015
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001016 # Trim according to maximum age.
1017 if self._policies.max_age_secs:
1018 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1019 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001020 _name, (_data, ts) = self._lru.get_oldest()
1021 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001022 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001023 name, size = self._remove_lru_item()
1024 evicted.append(size)
1025 logging.info(
1026 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1027 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001028
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001029 # Trim according to minimum free space.
1030 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001031 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001032 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001033 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001034 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001035 name, size = self._remove_lru_item()
1036 evicted.append(size)
1037 logging.info(
1038 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1039 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001040
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001041 # Trim according to maximum total size.
1042 if self._policies.max_cache_size:
1043 while self._lru:
1044 total = sum(size for _rel_cache, size in self._lru.itervalues())
1045 if total <= self._policies.max_cache_size:
1046 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001047 name, size = self._remove_lru_item()
1048 evicted.append(size)
1049 logging.info(
1050 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1051 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001052
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001053 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001054 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001055
1056 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001057 """Removes unknown directories.
1058
1059 Does not recalculate the cache size since it's surprisingly slow on some
1060 OSes.
1061 """
1062 success = True
1063 with self._lock:
1064 try:
1065 actual = set(fs.listdir(self.cache_dir))
1066 actual.discard(self.NAMED_DIR)
1067 actual.discard(self.STATE_FILE)
1068 expected = {v[0]: k for k, v in self._lru.iteritems()}
1069 # First, handle the actual cache content.
1070 # Remove missing entries.
1071 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001072 name, size = self._lru.pop(expected[missing])
1073 logging.warning(
1074 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001075 # Remove unexpected items.
1076 for unexpected in (actual - set(expected)):
1077 try:
1078 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001079 logging.warning(
1080 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001081 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001082 file_path.rmtree(p)
1083 else:
1084 fs.remove(p)
1085 except (IOError, OSError) as e:
1086 logging.error('Failed to remove %s: %s', unexpected, e)
1087 success = False
1088
1089 # Second, fix named cache links.
1090 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001091 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001092 actual = set(fs.listdir(named))
1093 expected = set(self._lru)
1094 # Confirm entries. Do not add missing ones for now.
1095 for name in expected.intersection(actual):
1096 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001097 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001098 if fs.islink(p):
1099 link = fs.readlink(p)
1100 if expected_link == link:
1101 continue
1102 logging.warning(
1103 'Unexpected symlink for cache %s: %s, expected %s',
1104 name, link, expected_link)
1105 else:
1106 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001107 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001108 file_path.rmtree(p)
1109 else:
1110 fs.remove(p)
1111 # Remove unexpected items.
1112 for unexpected in (actual - expected):
1113 try:
1114 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1115 if fs.isdir(p):
1116 file_path.rmtree(p)
1117 else:
1118 fs.remove(p)
1119 except (IOError, OSError) as e:
1120 logging.error('Failed to remove %s: %s', unexpected, e)
1121 success = False
1122 finally:
1123 self._save()
1124 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001125
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001126 # Internal functions.
1127
1128 def _try_upgrade(self):
1129 """Upgrades from the old format to the new one if necessary.
1130
1131 This code can be removed so all bots are known to have the right new format.
1132 """
1133 if not self._lru:
1134 return
1135 _name, (data, _ts) = self._lru.get_oldest()
1136 if isinstance(data, (list, tuple)):
1137 return
1138 # Update to v2.
1139 def upgrade(_name, rel_cache):
1140 abs_cache = os.path.join(self.cache_dir, rel_cache)
1141 return rel_cache, _get_recursive_size(abs_cache)
1142 self._lru.transform(upgrade)
1143 self._save()
1144
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001145 def _remove_lru_item(self):
1146 """Removes the oldest LRU entry. LRU must not be empty."""
1147 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1148 logging.info('Removing named cache %r', name)
1149 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001150 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001151
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001152 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001153 """Creates and returns relative path of a new cache directory.
1154
1155 In practice, it is a 2-letter string.
1156 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001157 # We randomly generate directory names that have two lower/upper case
1158 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1159 abc_len = len(self._DIR_ALPHABET)
1160 tried = set()
1161 while len(tried) < 1000:
1162 i = random.randint(0, abc_len * abc_len - 1)
1163 rel_path = (
1164 self._DIR_ALPHABET[i / abc_len] +
1165 self._DIR_ALPHABET[i % abc_len])
1166 if rel_path in tried:
1167 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001168 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001169 if not fs.exists(abs_path):
1170 return rel_path
1171 tried.add(rel_path)
1172 raise NamedCacheError(
1173 'could not allocate a new cache dir, too many cache dirs')
1174
1175 def _remove(self, name):
1176 """Removes a cache directory and entry.
1177
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001178 Returns:
1179 Number of caches deleted.
1180 """
1181 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001182 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001183 named_dir = self._get_named_path(name)
1184 if fs.islink(named_dir):
1185 fs.unlink(named_dir)
1186
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001187 # Then remove the actual data.
1188 if name not in self._lru:
1189 return
1190 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001191 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001192 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001193 file_path.rmtree(abs_path)
1194 self._lru.pop(name)
1195
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001196 def _save(self):
1197 self._lock.assert_locked()
1198 file_path.ensure_tree(self.cache_dir)
1199 self._lru.save(self.state_file)
1200
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001201 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001202 return os.path.join(self.cache_dir, self.NAMED_DIR, name)