blob: 4911c5393bb4da1efd2483bcdfbe47487eec047c [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
15
16from utils import file_path
17from utils import fs
18from utils import lru
19from utils import threading_utils
20from utils import tools
21
22
23# The file size to be used when we don't know the correct file size,
24# generally used for .isolated files.
25UNKNOWN_FILE_SIZE = None
26
27
28def file_write(path, content_generator):
29 """Writes file content as generated by content_generator.
30
31 Creates the intermediary directory as needed.
32
33 Returns the number of bytes written.
34
35 Meant to be mocked out in unit tests.
36 """
37 file_path.ensure_tree(os.path.dirname(path))
38 total = 0
39 with fs.open(path, 'wb') as f:
40 for d in content_generator:
41 total += len(d)
42 f.write(d)
43 return total
44
45
46def is_valid_file(path, size):
47 """Determines if the given files appears valid.
48
49 Currently it just checks the file exists and its size matches the expectation.
50 """
51 if size == UNKNOWN_FILE_SIZE:
52 return fs.isfile(path)
53 try:
54 actual_size = fs.stat(path).st_size
55 except OSError as e:
56 logging.warning(
57 'Can\'t read item %s, assuming it\'s invalid: %s',
58 os.path.basename(path), e)
59 return False
60 if size != actual_size:
61 logging.warning(
62 'Found invalid item %s; %d != %d',
63 os.path.basename(path), actual_size, size)
64 return False
65 return True
66
67
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040068class NamedCacheError(Exception):
69 """Named cache specific error."""
70
71
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040072class NoMoreSpace(Exception):
73 """Not enough space to map the whole directory."""
74 pass
75
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -040076
77class CachePolicies(object):
78 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
79 """Common caching policies for the multiple caches (isolated, named, cipd).
80
81 Arguments:
82 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
83 cache is effectively a leak.
84 - min_free_space: Trim if disk free space becomes lower than this value. If
85 0, it will unconditionally fill the disk.
86 - max_items: Maximum number of items to keep in the cache. If 0, do not
87 enforce a limit.
88 - max_age_secs: Maximum age an item is kept in the cache until it is
89 automatically evicted. Having a lot of dead luggage slows
90 everything down.
91 """
92 self.max_cache_size = max_cache_size
93 self.min_free_space = min_free_space
94 self.max_items = max_items
95 self.max_age_secs = max_age_secs
96
97 def __str__(self):
98 return (
99 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
100 'max_age_secs=%s)') % (
101 self.max_cache_size, self.max_items, self.min_free_space,
102 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400103
104
105class CacheMiss(Exception):
106 """Raised when an item is not in cache."""
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000107
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400108 def __init__(self, digest):
109 self.digest = digest
110 super(CacheMiss, self).__init__(
111 'Item with digest %r is not found in cache' % digest)
112
113
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000114class ContentAddressedCache(object):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400115 """Content addressed cache that stores objects temporarily.
116
117 It can be accessed concurrently from multiple threads, so it should protect
118 its internal state with some lock.
119 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000120 cache_dir = None
121
122 def __init__(self):
123 self._lock = threading_utils.LockWithAssert()
124 # Profiling values.
125 self._added = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400126 self._initial_number_items = 0
127 self._initial_size = 0
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000128 self._evicted = []
129 self._used = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400130
131 def __contains__(self, digest):
132 raise NotImplementedError()
133
134 def __enter__(self):
135 """Context manager interface."""
136 return self
137
138 def __exit__(self, _exc_type, _exec_value, _traceback):
139 """Context manager interface."""
140 return False
141
142 @property
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000143 def added(self):
144 return self._added[:]
145
146 @property
147 def evicted(self):
148 return self._evicted[:]
149
150 @property
151 def used(self):
152 return self._used[:]
153
154 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400155 def initial_number_items(self):
156 return self._initial_number_items
157
158 @property
159 def initial_size(self):
160 return self._initial_size
161
162 @property
163 def number_items(self):
164 """Returns the total size of the cache in bytes."""
165 raise NotImplementedError()
166
167 @property
168 def total_size(self):
169 """Returns the total size of the cache in bytes."""
170 raise NotImplementedError()
171
172 def cached_set(self):
173 """Returns a set of all cached digests (always a new object)."""
174 raise NotImplementedError()
175
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000176 def cleanup(self):
177 """Deletes any corrupted item from the cache and trims it if necessary."""
178 raise NotImplementedError()
179
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400180 def touch(self, digest, size):
181 """Ensures item is not corrupted and updates its LRU position.
182
183 Arguments:
184 digest: hash digest of item to check.
185 size: expected size of this item.
186
187 Returns:
188 True if item is in cache and not corrupted.
189 """
190 raise NotImplementedError()
191
192 def evict(self, digest):
193 """Removes item from cache if it's there."""
194 raise NotImplementedError()
195
196 def getfileobj(self, digest):
197 """Returns a readable file like object.
198
199 If file exists on the file system it will have a .name attribute with an
200 absolute path to the file.
201 """
202 raise NotImplementedError()
203
204 def write(self, digest, content):
205 """Reads data from |content| generator and stores it in cache.
206
207 Returns digest to simplify chaining.
208 """
209 raise NotImplementedError()
210
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000211 def trim(self):
212 """Enforces cache policies.
213
214 Returns:
215 Number of items evicted.
216 """
217 raise NotImplementedError()
218
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400219
220class MemoryContentAddressedCache(ContentAddressedCache):
221 """ContentAddressedCache implementation that stores everything in memory."""
222
223 def __init__(self, file_mode_mask=0500):
224 """Args:
225 file_mode_mask: bit mask to AND file mode with. Default value will make
226 all mapped files to be read only.
227 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000228 super(MemoryContentAddressedCache, self).__init__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400229 self._file_mode_mask = file_mode_mask
230 self._contents = {}
231
232 def __contains__(self, digest):
233 with self._lock:
234 return digest in self._contents
235
236 @property
237 def number_items(self):
238 with self._lock:
239 return len(self._contents)
240
241 @property
242 def total_size(self):
243 with self._lock:
244 return sum(len(i) for i in self._contents.itervalues())
245
246 def cached_set(self):
247 with self._lock:
248 return set(self._contents)
249
250 def cleanup(self):
251 pass
252
253 def touch(self, digest, size):
254 with self._lock:
255 return digest in self._contents
256
257 def evict(self, digest):
258 with self._lock:
259 v = self._contents.pop(digest, None)
260 if v is not None:
261 self._evicted.add(v)
262
263 def getfileobj(self, digest):
264 with self._lock:
265 try:
266 d = self._contents[digest]
267 except KeyError:
268 raise CacheMiss(digest)
269 self._used.append(len(d))
270 return io.BytesIO(d)
271
272 def write(self, digest, content):
273 # Assemble whole stream before taking the lock.
274 data = ''.join(content)
275 with self._lock:
276 self._contents[digest] = data
277 self._added.append(len(data))
278 return digest
279
280 def trim(self):
281 """Trimming is not implemented for MemoryContentAddressedCache."""
282 return 0
283
284
285class DiskContentAddressedCache(ContentAddressedCache):
286 """Stateful LRU cache in a flat hash table in a directory.
287
288 Saves its state as json file.
289 """
290 STATE_FILE = u'state.json'
291
292 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
293 """
294 Arguments:
295 cache_dir: directory where to place the cache.
296 policies: CachePolicies instance, cache retention policies.
297 algo: hashing algorithm used.
298 trim: if True to enforce |policies| right away.
299 It can be done later by calling trim() explicitly.
300 """
301 # All protected methods (starting with '_') except _path should be called
302 # with self._lock held.
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000303 super(DiskContentAddressedCache, self).__init__()
304 self.cache_dir = cache_dir
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 self.policies = policies
306 self.hash_algo = hash_algo
307 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
308 # Items in a LRU lookup dict(digest: size).
309 self._lru = lru.LRUDict()
310 # Current cached free disk space. It is updated by self._trim().
311 file_path.ensure_tree(self.cache_dir)
312 self._free_disk = file_path.get_free_space(self.cache_dir)
313 # The first item in the LRU cache that must not be evicted during this run
314 # since it was referenced. All items more recent that _protected in the LRU
315 # cache are also inherently protected. It could be a set() of all items
316 # referenced but this increases memory usage without a use case.
317 self._protected = None
318 # Cleanup operations done by self._load(), if any.
319 self._operations = []
320 with tools.Profiler('Setup'):
321 with self._lock:
322 self._load(trim, time_fn)
323
324 def __contains__(self, digest):
325 with self._lock:
326 return digest in self._lru
327
328 def __enter__(self):
329 return self
330
331 def __exit__(self, _exc_type, _exec_value, _traceback):
332 with tools.Profiler('CleanupTrimming'):
333 with self._lock:
334 self._trim()
335
336 logging.info(
337 '%5d (%8dkb) added',
338 len(self._added), sum(self._added) / 1024)
339 logging.info(
340 '%5d (%8dkb) current',
341 len(self._lru),
342 sum(self._lru.itervalues()) / 1024)
343 logging.info(
344 '%5d (%8dkb) evicted',
345 len(self._evicted), sum(self._evicted) / 1024)
346 logging.info(
347 ' %8dkb free',
348 self._free_disk / 1024)
349 return False
350
351 @property
352 def number_items(self):
353 with self._lock:
354 return len(self._lru)
355
356 @property
357 def total_size(self):
358 with self._lock:
359 return sum(self._lru.itervalues())
360
361 def cached_set(self):
362 with self._lock:
363 return self._lru.keys_set()
364
365 def cleanup(self):
366 """Cleans up the cache directory.
367
368 Ensures there is no unknown files in cache_dir.
369 Ensures the read-only bits are set correctly.
370
371 At that point, the cache was already loaded, trimmed to respect cache
372 policies.
373 """
374 with self._lock:
375 fs.chmod(self.cache_dir, 0700)
376 # Ensure that all files listed in the state still exist and add new ones.
377 previous = self._lru.keys_set()
378 # It'd be faster if there were a readdir() function.
379 for filename in fs.listdir(self.cache_dir):
380 if filename == self.STATE_FILE:
381 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
382 continue
383 if filename in previous:
384 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
385 previous.remove(filename)
386 continue
387
388 # An untracked file. Delete it.
389 logging.warning('Removing unknown file %s from cache', filename)
390 p = self._path(filename)
391 if fs.isdir(p):
392 try:
393 file_path.rmtree(p)
394 except OSError:
395 pass
396 else:
397 file_path.try_remove(p)
398 continue
399
400 if previous:
401 # Filter out entries that were not found.
402 logging.warning('Removed %d lost files', len(previous))
403 for filename in previous:
404 self._lru.pop(filename)
405 self._save()
406
407 # What remains to be done is to hash every single item to
408 # detect corruption, then save to ensure state.json is up to date.
409 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
410 # TODO(maruel): Let's revisit once directory metadata is stored in
411 # state.json so only the files that had been mapped since the last cleanup()
412 # call are manually verified.
413 #
414 #with self._lock:
415 # for digest in self._lru:
416 # if not isolated_format.is_valid_hash(
417 # self._path(digest), self.hash_algo):
418 # self.evict(digest)
419 # logging.info('Deleted corrupted item: %s', digest)
420
421 def touch(self, digest, size):
422 """Verifies an actual file is valid and bumps its LRU position.
423
424 Returns False if the file is missing or invalid. Doesn't kick it from LRU
425 though (call 'evict' explicitly).
426
427 Note that is doesn't compute the hash so it could still be corrupted if the
428 file size didn't change.
429
430 TODO(maruel): More stringent verification while keeping the check fast.
431 """
432 # Do the check outside the lock.
433 if not is_valid_file(self._path(digest), size):
434 return False
435
436 # Update its LRU position.
437 with self._lock:
438 if digest not in self._lru:
439 return False
440 self._lru.touch(digest)
441 self._protected = self._protected or digest
442 return True
443
444 def evict(self, digest):
445 with self._lock:
446 # Do not check for 'digest == self._protected' since it could be because
447 # the object is corrupted.
448 self._lru.pop(digest)
449 self._delete_file(digest, UNKNOWN_FILE_SIZE)
450
451 def getfileobj(self, digest):
452 try:
453 f = fs.open(self._path(digest), 'rb')
454 with self._lock:
455 self._used.append(self._lru[digest])
456 return f
457 except IOError:
458 raise CacheMiss(digest)
459
460 def write(self, digest, content):
461 assert content is not None
462 with self._lock:
463 self._protected = self._protected or digest
464 path = self._path(digest)
465 # A stale broken file may remain. It is possible for the file to have write
466 # access bit removed which would cause the file_write() call to fail to open
467 # in write mode. Take no chance here.
468 file_path.try_remove(path)
469 try:
470 size = file_write(path, content)
471 except:
472 # There are two possible places were an exception can occur:
473 # 1) Inside |content| generator in case of network or unzipping errors.
474 # 2) Inside file_write itself in case of disk IO errors.
475 # In any case delete an incomplete file and propagate the exception to
476 # caller, it will be logged there.
477 file_path.try_remove(path)
478 raise
479 # Make the file read-only in the cache. This has a few side-effects since
480 # the file node is modified, so every directory entries to this file becomes
481 # read-only. It's fine here because it is a new file.
482 file_path.set_read_only(path, True)
483 with self._lock:
484 self._add(digest, size)
485 return digest
486
487 def get_oldest(self):
488 """Returns digest of the LRU item or None."""
489 try:
490 return self._lru.get_oldest()[0]
491 except KeyError:
492 return None
493
494 def get_timestamp(self, digest):
495 """Returns timestamp of last use of an item.
496
497 Raises KeyError if item is not found.
498 """
499 return self._lru.get_timestamp(digest)
500
501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
506 def _load(self, trim, time_fn):
507 """Loads state of the cache from json file.
508
509 If cache_dir does not exist on disk, it is created.
510 """
511 self._lock.assert_locked()
512
513 if not fs.isfile(self.state_file):
514 if not fs.isdir(self.cache_dir):
515 fs.makedirs(self.cache_dir)
516 else:
517 # Load state of the cache.
518 try:
519 self._lru = lru.LRUDict.load(self.state_file)
520 except ValueError as err:
521 logging.error('Failed to load cache state: %s' % (err,))
522 # Don't want to keep broken state file.
523 file_path.try_remove(self.state_file)
524 if time_fn:
525 self._lru.time_fn = time_fn
526 if trim:
527 self._trim()
528 # We want the initial cache size after trimming, i.e. what is readily
529 # avaiable.
530 self._initial_number_items = len(self._lru)
531 self._initial_size = sum(self._lru.itervalues())
532 if self._evicted:
533 logging.info(
534 'Trimming evicted items with the following sizes: %s',
535 sorted(self._evicted))
536
537 def _save(self):
538 """Saves the LRU ordering."""
539 self._lock.assert_locked()
540 if sys.platform != 'win32':
541 d = os.path.dirname(self.state_file)
542 if fs.isdir(d):
543 # Necessary otherwise the file can't be created.
544 file_path.set_read_only(d, False)
545 if fs.isfile(self.state_file):
546 file_path.set_read_only(self.state_file, False)
547 self._lru.save(self.state_file)
548
549 def _trim(self):
550 """Trims anything we don't know, make sure enough free space exists."""
551 self._lock.assert_locked()
552
553 # Trim old items.
554 if self.policies.max_age_secs:
555 cutoff = self._lru.time_fn() - self.policies.max_age_secs
556 while self._lru:
557 oldest = self._lru.get_oldest()
558 if oldest[1][1] >= cutoff:
559 break
560 self._remove_lru_file(True)
561
562 # Ensure maximum cache size.
563 if self.policies.max_cache_size:
564 total_size = sum(self._lru.itervalues())
565 while total_size > self.policies.max_cache_size:
566 total_size -= self._remove_lru_file(True)
567
568 # Ensure maximum number of items in the cache.
569 if self.policies.max_items and len(self._lru) > self.policies.max_items:
570 for _ in xrange(len(self._lru) - self.policies.max_items):
571 self._remove_lru_file(True)
572
573 # Ensure enough free space.
574 self._free_disk = file_path.get_free_space(self.cache_dir)
575 trimmed_due_to_space = 0
576 while (
577 self.policies.min_free_space and
578 self._lru and
579 self._free_disk < self.policies.min_free_space):
580 trimmed_due_to_space += 1
581 self._remove_lru_file(True)
582
583 if trimmed_due_to_space:
584 total_usage = sum(self._lru.itervalues())
585 usage_percent = 0.
586 if total_usage:
587 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
588
589 logging.warning(
590 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,'
591 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)',
592 trimmed_due_to_space,
593 self._free_disk / 1024.,
594 total_usage / 1024.,
595 usage_percent,
596 self.policies.max_cache_size / 1024.)
597 self._save()
598 return trimmed_due_to_space
599
600 def _path(self, digest):
601 """Returns the path to one item."""
602 return os.path.join(self.cache_dir, digest)
603
604 def _remove_lru_file(self, allow_protected):
605 """Removes the lastest recently used file and returns its size."""
606 self._lock.assert_locked()
607 try:
608 digest, (size, _) = self._lru.get_oldest()
609 if not allow_protected and digest == self._protected:
610 total_size = sum(self._lru.itervalues())+size
611 msg = (
612 'Not enough space to fetch the whole isolated tree.\n'
613 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
614 self.policies, total_size, len(self._lru)+1, self._free_disk)
615 raise NoMoreSpace(msg)
616 except KeyError:
617 # That means an internal error.
618 raise NoMoreSpace('Nothing to remove, can\'t happend')
619 digest, (size, _) = self._lru.pop_oldest()
620 logging.debug('Removing LRU file %s', digest)
621 self._delete_file(digest, size)
622 return size
623
624 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
625 """Adds an item into LRU cache marking it as a newest one."""
626 self._lock.assert_locked()
627 if size == UNKNOWN_FILE_SIZE:
628 size = fs.stat(self._path(digest)).st_size
629 self._added.append(size)
630 self._lru.add(digest, size)
631 self._free_disk -= size
632 # Do a quicker version of self._trim(). It only enforces free disk space,
633 # not cache size limits. It doesn't actually look at real free disk space,
634 # only uses its cache values. self._trim() will be called later to enforce
635 # real trimming but doing this quick version here makes it possible to map
636 # an isolated that is larger than the current amount of free disk space when
637 # the cache size is already large.
638 while (
639 self.policies.min_free_space and
640 self._lru and
641 self._free_disk < self.policies.min_free_space):
642 if self._remove_lru_file(False) == -1:
643 break
644
645 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
646 """Deletes cache file from the file system."""
647 self._lock.assert_locked()
648 try:
649 if size == UNKNOWN_FILE_SIZE:
650 try:
651 size = fs.stat(self._path(digest)).st_size
652 except OSError:
653 size = 0
654 file_path.try_remove(self._path(digest))
655 self._evicted.append(size)
656 self._free_disk += size
657 except OSError as e:
658 if e.errno != errno.ENOENT:
659 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400660
661
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000662class CacheManager(object):
663 """Manages cache directories exposed to a task.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400664
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000665 A task can specify that caches should be present on a bot. A cache is
666 tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400667 name is a short identifier that describes the contents of the cache, e.g.
668 "git_v8" could be all git repositories required by v8 builds, or
669 "build_chromium" could be build artefacts of the Chromium.
670 path is a directory path relative to the task run dir. Cache installation
671 puts the requested cache directory at the path.
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000672 policies is a CachePolicies instance.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400673 """
674
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000675 def __init__(self, root_dir, policies):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400676 """Initializes NamedCaches.
677
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000678 |root_dir| is a directory for persistent cache storage.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400679 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000680 assert isinstance(root_dir, unicode), root_dir
681 assert file_path.isabs(root_dir), root_dir
682 self.root_dir = root_dir
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400683 self._policies = policies
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000684 self._lock = threading_utils.LockWithAssert()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400685 # LRU {cache_name -> cache_location}
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000686 # It is saved to |root_dir|/state.json.
687 self._lru = None
Marc-Antoine Ruel0bf98772018-05-22 20:25:53 -0400688
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000689 @contextlib.contextmanager
690 def open(self, time_fn=None):
691 """Opens NamedCaches for mutation operations, such as install.
Marc-Antoine Ruel0bf98772018-05-22 20:25:53 -0400692
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000693 Only one caller can open the cache manager at a time. If the same thread
694 calls this function after opening it earlier, the call will deadlock.
695
696 time_fn is a function that returns timestamp (float) and used to take
697 timestamps when new caches are requested.
698
699 Returns a context manager that must be closed as soon as possible.
700 """
Marc-Antoine Ruel0bf98772018-05-22 20:25:53 -0400701 with self._lock:
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000702 state_path = os.path.join(self.root_dir, u'state.json')
703 assert self._lru is None, 'acquired lock, but self._lru is not None'
704 if os.path.isfile(state_path):
705 try:
706 self._lru = lru.LRUDict.load(state_path)
707 except ValueError:
708 logging.exception('failed to load named cache state file')
709 logging.warning('deleting named caches')
710 file_path.rmtree(self.root_dir)
711 self._lru = self._lru or lru.LRUDict()
712 if time_fn:
713 self._lru.time_fn = time_fn
714 try:
715 yield
716 finally:
717 file_path.ensure_tree(self.root_dir)
718 self._lru.save(state_path)
719 self._lru = None
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400720
721 def __len__(self):
722 """Returns number of items in the cache.
723
724 NamedCache must be open.
725 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000726 return len(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400727
728 def get_oldest(self):
729 """Returns name of the LRU cache or None.
730
731 NamedCache must be open.
732 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000733 self._lock.assert_locked()
734 try:
735 return self._lru.get_oldest()[0]
736 except KeyError:
737 return None
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400738
739 def get_timestamp(self, name):
740 """Returns timestamp of last use of an item.
741
742 NamedCache must be open.
743
744 Raises KeyError if cache is not found.
745 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000746 self._lock.assert_locked()
747 assert isinstance(name, basestring), name
748 return self._lru.get_timestamp(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400749
750 @property
751 def available(self):
752 """Returns a set of names of available caches.
753
754 NamedCache must be open.
755 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000756 self._lock.assert_locked()
757 return self._lru.keys_set()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400758
759 def install(self, path, name):
760 """Moves the directory for the specified named cache to |path|.
761
762 NamedCache must be open. path must be absolute, unicode and must not exist.
763
764 Raises NamedCacheError if cannot install the cache.
765 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000766 self._lock.assert_locked()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400767 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000768 try:
769 _check_abs(path)
770 if os.path.isdir(path):
771 raise NamedCacheError('installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400772
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000773 rel_cache = self._lru.get(name)
774 if rel_cache:
775 abs_cache = os.path.join(self.root_dir, rel_cache)
776 if os.path.isdir(abs_cache):
777 logging.info('Moving %r to %r', abs_cache, path)
778 file_path.ensure_tree(os.path.dirname(path))
779 fs.rename(abs_cache, path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400780 self._remove(name)
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000781 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400782
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000783 logging.warning('directory for named cache %r does not exist', name)
784 self._remove(name)
785
786 # The named cache does not exist, create an empty directory.
787 # When uninstalling, we will move it back to the cache and create an
788 # an entry.
789 file_path.ensure_tree(path)
790 except (IOError, OSError) as ex:
791 raise NamedCacheError(
792 'cannot install cache named %r at %r: %s' % (
793 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400794
795 def uninstall(self, path, name):
796 """Moves the cache directory back. Opposite to install().
797
798 NamedCache must be open. path must be absolute and unicode.
799
800 Raises NamedCacheError if cannot uninstall the cache.
801 """
802 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000803 try:
804 _check_abs(path)
805 if not os.path.isdir(path):
806 logging.warning(
807 'Directory %r does not exist anymore. Cache lost.', path)
808 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400809
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000810 rel_cache = self._lru.get(name)
811 if rel_cache:
812 # Do not crash because cache already exists.
813 logging.warning('overwriting an existing named cache %r', name)
814 create_named_link = False
815 else:
816 rel_cache = self._allocate_dir()
817 create_named_link = True
818
819 # Move the dir and create an entry for the named cache.
820 abs_cache = os.path.join(self.root_dir, rel_cache)
821 logging.info('Moving %r to %r', path, abs_cache)
822 file_path.ensure_tree(os.path.dirname(abs_cache))
823 fs.rename(path, abs_cache)
824 self._lru.add(name, rel_cache)
825
826 if create_named_link:
827 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
828 # for user convenience.
829 named_path = self._get_named_path(name)
830 if os.path.exists(named_path):
831 file_path.remove(named_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400832 else:
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000833 file_path.ensure_tree(os.path.dirname(named_path))
834 try:
835 fs.symlink(abs_cache, named_path)
836 logging.info('Created symlink %r to %r', named_path, abs_cache)
837 except OSError:
838 # Ignore on Windows. It happens when running as a normal user or when
839 # UAC is enabled and the user is a filtered administrator account.
840 if sys.platform != 'win32':
841 raise
842 except (IOError, OSError) as ex:
843 raise NamedCacheError(
844 'cannot uninstall cache named %r at %r: %s' % (
845 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400846
847 def trim(self):
848 """Purges cache entries that do not comply with the cache policies.
849
850 NamedCache must be open.
851
852 Returns:
853 Number of caches deleted.
854 """
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000855 self._lock.assert_locked()
856 if not os.path.isdir(self.root_dir):
857 return 0
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000859 removed = []
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400860
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000861 def _remove_lru_file():
862 """Removes the oldest LRU entry. LRU must not be empty."""
863 name, _data = self._lru.get_oldest()
864 logging.info('Removing named cache %r', name)
865 self._remove(name)
866 removed.append(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400867
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000868 # Trim according to maximum number of items.
869 while len(self._lru) > self._policies.max_items:
870 _remove_lru_file()
871
872 # Trim according to maximum age.
873 if self._policies.max_age_secs:
874 cutoff = self._lru.time_fn() - self._policies.max_age_secs
875 while self._lru:
876 _name, (_content, timestamp) = self._lru.get_oldest()
877 if timestamp >= cutoff:
878 break
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400879 _remove_lru_file()
880
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000881 # Trim according to minimum free space.
882 if self._policies.min_free_space:
883 while True:
884 free_space = file_path.get_free_space(self.root_dir)
885 if not self._lru or free_space >= self._policies.min_free_space:
886 break
887 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400888
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000889 # TODO(maruel): Trim according to self._policies.max_cache_size. Do it last
890 # as it requires counting the size of each entry.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400891
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000892 # TODO(maruel): Trim empty directories. An empty directory is not a cache,
893 # something needs to be in it.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400894
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000895 return len(removed)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400896
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000897 _DIR_ALPHABET = string.ascii_letters + string.digits
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400898
899 def _allocate_dir(self):
900 """Creates and returns relative path of a new cache directory."""
901 # We randomly generate directory names that have two lower/upper case
902 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
903 abc_len = len(self._DIR_ALPHABET)
904 tried = set()
905 while len(tried) < 1000:
906 i = random.randint(0, abc_len * abc_len - 1)
907 rel_path = (
908 self._DIR_ALPHABET[i / abc_len] +
909 self._DIR_ALPHABET[i % abc_len])
910 if rel_path in tried:
911 continue
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000912 abs_path = os.path.join(self.root_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400913 if not fs.exists(abs_path):
914 return rel_path
915 tried.add(rel_path)
916 raise NamedCacheError(
917 'could not allocate a new cache dir, too many cache dirs')
918
919 def _remove(self, name):
920 """Removes a cache directory and entry.
921
922 NamedCache must be open.
923
924 Returns:
925 Number of caches deleted.
926 """
927 self._lock.assert_locked()
928 rel_path = self._lru.get(name)
929 if not rel_path:
930 return
931
932 named_dir = self._get_named_path(name)
933 if fs.islink(named_dir):
934 fs.unlink(named_dir)
935
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000936 abs_path = os.path.join(self.root_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400937 if os.path.isdir(abs_path):
938 file_path.rmtree(abs_path)
939 self._lru.pop(name)
940
941 def _get_named_path(self, name):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000942 return os.path.join(self.root_dir, 'named', name)
943
944
945def _check_abs(path):
946 if not isinstance(path, unicode):
947 raise NamedCacheError('named cache installation path must be unicode')
948 if not os.path.isabs(path):
949 raise NamedCacheError('named cache installation path must be absolute')