blob: ced28e5caa501adbac684c94906fb7c6950aa596 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
15
16from utils import file_path
17from utils import fs
18from utils import lru
19from utils import threading_utils
20from utils import tools
21
22
23# The file size to be used when we don't know the correct file size,
24# generally used for .isolated files.
25UNKNOWN_FILE_SIZE = None
26
27
28def file_write(path, content_generator):
29 """Writes file content as generated by content_generator.
30
31 Creates the intermediary directory as needed.
32
33 Returns the number of bytes written.
34
35 Meant to be mocked out in unit tests.
36 """
37 file_path.ensure_tree(os.path.dirname(path))
38 total = 0
39 with fs.open(path, 'wb') as f:
40 for d in content_generator:
41 total += len(d)
42 f.write(d)
43 return total
44
45
46def is_valid_file(path, size):
47 """Determines if the given files appears valid.
48
49 Currently it just checks the file exists and its size matches the expectation.
50 """
51 if size == UNKNOWN_FILE_SIZE:
52 return fs.isfile(path)
53 try:
54 actual_size = fs.stat(path).st_size
55 except OSError as e:
56 logging.warning(
57 'Can\'t read item %s, assuming it\'s invalid: %s',
58 os.path.basename(path), e)
59 return False
60 if size != actual_size:
61 logging.warning(
62 'Found invalid item %s; %d != %d',
63 os.path.basename(path), actual_size, size)
64 return False
65 return True
66
67
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040068class NamedCacheError(Exception):
69 """Named cache specific error."""
70
71
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040072class NoMoreSpace(Exception):
73 """Not enough space to map the whole directory."""
74 pass
75
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -040076
77class CachePolicies(object):
78 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
79 """Common caching policies for the multiple caches (isolated, named, cipd).
80
81 Arguments:
82 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
83 cache is effectively a leak.
84 - min_free_space: Trim if disk free space becomes lower than this value. If
85 0, it will unconditionally fill the disk.
86 - max_items: Maximum number of items to keep in the cache. If 0, do not
87 enforce a limit.
88 - max_age_secs: Maximum age an item is kept in the cache until it is
89 automatically evicted. Having a lot of dead luggage slows
90 everything down.
91 """
92 self.max_cache_size = max_cache_size
93 self.min_free_space = min_free_space
94 self.max_items = max_items
95 self.max_age_secs = max_age_secs
96
97 def __str__(self):
98 return (
99 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
100 'max_age_secs=%s)') % (
101 self.max_cache_size, self.max_items, self.min_free_space,
102 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400103
104
105class CacheMiss(Exception):
106 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400107 def __init__(self, digest):
108 self.digest = digest
109 super(CacheMiss, self).__init__(
110 'Item with digest %r is not found in cache' % digest)
111
112
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400113class Cache(object):
114 def __init__(self, cache_dir):
115 if cache_dir is not None:
116 assert isinstance(cache_dir, unicode), cache_dir
117 assert file_path.isabs(cache_dir), cache_dir
118 self.cache_dir = cache_dir
119 self._lock = threading_utils.LockWithAssert()
120 # Profiling values.
121 self._added = []
122 self._evicted = []
123 self._used = []
124
125 @property
126 def added(self):
127 with self._lock:
128 return self._added[:]
129
130 @property
131 def used(self):
132 with self._lock:
133 return self._used[:]
134
135 def cleanup(self):
136 """Deletes any corrupted item from the cache and trims it if necessary."""
137 raise NotImplementedError()
138
139 def trim(self):
140 """Enforces cache policies.
141
142 Returns:
143 Number of items evicted.
144 """
145 raise NotImplementedError()
146
147
148class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400149 """Content addressed cache that stores objects temporarily.
150
151 It can be accessed concurrently from multiple threads, so it should protect
152 its internal state with some lock.
153 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400154 def __init__(self, *args, **kwargs):
155 super(ContentAddressedCache, self).__init__(*args, **kwargs)
156 # These shall be initialized in the constructor.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400157 self._initial_number_items = 0
158 self._initial_size = 0
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400159
160 def __contains__(self, digest):
161 raise NotImplementedError()
162
163 def __enter__(self):
164 """Context manager interface."""
165 return self
166
167 def __exit__(self, _exc_type, _exec_value, _traceback):
168 """Context manager interface."""
169 return False
170
171 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400172 def initial_number_items(self):
173 return self._initial_number_items
174
175 @property
176 def initial_size(self):
177 return self._initial_size
178
179 @property
180 def number_items(self):
181 """Returns the total size of the cache in bytes."""
182 raise NotImplementedError()
183
184 @property
185 def total_size(self):
186 """Returns the total size of the cache in bytes."""
187 raise NotImplementedError()
188
189 def cached_set(self):
190 """Returns a set of all cached digests (always a new object)."""
191 raise NotImplementedError()
192
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193 def touch(self, digest, size):
194 """Ensures item is not corrupted and updates its LRU position.
195
196 Arguments:
197 digest: hash digest of item to check.
198 size: expected size of this item.
199
200 Returns:
201 True if item is in cache and not corrupted.
202 """
203 raise NotImplementedError()
204
205 def evict(self, digest):
206 """Removes item from cache if it's there."""
207 raise NotImplementedError()
208
209 def getfileobj(self, digest):
210 """Returns a readable file like object.
211
212 If file exists on the file system it will have a .name attribute with an
213 absolute path to the file.
214 """
215 raise NotImplementedError()
216
217 def write(self, digest, content):
218 """Reads data from |content| generator and stores it in cache.
219
220 Returns digest to simplify chaining.
221 """
222 raise NotImplementedError()
223
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400224
225class MemoryContentAddressedCache(ContentAddressedCache):
226 """ContentAddressedCache implementation that stores everything in memory."""
227
228 def __init__(self, file_mode_mask=0500):
229 """Args:
230 file_mode_mask: bit mask to AND file mode with. Default value will make
231 all mapped files to be read only.
232 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400233 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400234 self._file_mode_mask = file_mode_mask
235 self._contents = {}
236
237 def __contains__(self, digest):
238 with self._lock:
239 return digest in self._contents
240
241 @property
242 def number_items(self):
243 with self._lock:
244 return len(self._contents)
245
246 @property
247 def total_size(self):
248 with self._lock:
249 return sum(len(i) for i in self._contents.itervalues())
250
251 def cached_set(self):
252 with self._lock:
253 return set(self._contents)
254
255 def cleanup(self):
256 pass
257
258 def touch(self, digest, size):
259 with self._lock:
260 return digest in self._contents
261
262 def evict(self, digest):
263 with self._lock:
264 v = self._contents.pop(digest, None)
265 if v is not None:
266 self._evicted.add(v)
267
268 def getfileobj(self, digest):
269 with self._lock:
270 try:
271 d = self._contents[digest]
272 except KeyError:
273 raise CacheMiss(digest)
274 self._used.append(len(d))
275 return io.BytesIO(d)
276
277 def write(self, digest, content):
278 # Assemble whole stream before taking the lock.
279 data = ''.join(content)
280 with self._lock:
281 self._contents[digest] = data
282 self._added.append(len(data))
283 return digest
284
285 def trim(self):
286 """Trimming is not implemented for MemoryContentAddressedCache."""
287 return 0
288
289
290class DiskContentAddressedCache(ContentAddressedCache):
291 """Stateful LRU cache in a flat hash table in a directory.
292
293 Saves its state as json file.
294 """
295 STATE_FILE = u'state.json'
296
297 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
298 """
299 Arguments:
300 cache_dir: directory where to place the cache.
301 policies: CachePolicies instance, cache retention policies.
302 algo: hashing algorithm used.
303 trim: if True to enforce |policies| right away.
304 It can be done later by calling trim() explicitly.
305 """
306 # All protected methods (starting with '_') except _path should be called
307 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400308 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309 self.policies = policies
310 self.hash_algo = hash_algo
311 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
312 # Items in a LRU lookup dict(digest: size).
313 self._lru = lru.LRUDict()
314 # Current cached free disk space. It is updated by self._trim().
315 file_path.ensure_tree(self.cache_dir)
316 self._free_disk = file_path.get_free_space(self.cache_dir)
317 # The first item in the LRU cache that must not be evicted during this run
318 # since it was referenced. All items more recent that _protected in the LRU
319 # cache are also inherently protected. It could be a set() of all items
320 # referenced but this increases memory usage without a use case.
321 self._protected = None
322 # Cleanup operations done by self._load(), if any.
323 self._operations = []
324 with tools.Profiler('Setup'):
325 with self._lock:
326 self._load(trim, time_fn)
327
328 def __contains__(self, digest):
329 with self._lock:
330 return digest in self._lru
331
332 def __enter__(self):
333 return self
334
335 def __exit__(self, _exc_type, _exec_value, _traceback):
336 with tools.Profiler('CleanupTrimming'):
337 with self._lock:
338 self._trim()
339
340 logging.info(
341 '%5d (%8dkb) added',
342 len(self._added), sum(self._added) / 1024)
343 logging.info(
344 '%5d (%8dkb) current',
345 len(self._lru),
346 sum(self._lru.itervalues()) / 1024)
347 logging.info(
348 '%5d (%8dkb) evicted',
349 len(self._evicted), sum(self._evicted) / 1024)
350 logging.info(
351 ' %8dkb free',
352 self._free_disk / 1024)
353 return False
354
355 @property
356 def number_items(self):
357 with self._lock:
358 return len(self._lru)
359
360 @property
361 def total_size(self):
362 with self._lock:
363 return sum(self._lru.itervalues())
364
365 def cached_set(self):
366 with self._lock:
367 return self._lru.keys_set()
368
369 def cleanup(self):
370 """Cleans up the cache directory.
371
372 Ensures there is no unknown files in cache_dir.
373 Ensures the read-only bits are set correctly.
374
375 At that point, the cache was already loaded, trimmed to respect cache
376 policies.
377 """
378 with self._lock:
379 fs.chmod(self.cache_dir, 0700)
380 # Ensure that all files listed in the state still exist and add new ones.
381 previous = self._lru.keys_set()
382 # It'd be faster if there were a readdir() function.
383 for filename in fs.listdir(self.cache_dir):
384 if filename == self.STATE_FILE:
385 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
386 continue
387 if filename in previous:
388 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
389 previous.remove(filename)
390 continue
391
392 # An untracked file. Delete it.
393 logging.warning('Removing unknown file %s from cache', filename)
394 p = self._path(filename)
395 if fs.isdir(p):
396 try:
397 file_path.rmtree(p)
398 except OSError:
399 pass
400 else:
401 file_path.try_remove(p)
402 continue
403
404 if previous:
405 # Filter out entries that were not found.
406 logging.warning('Removed %d lost files', len(previous))
407 for filename in previous:
408 self._lru.pop(filename)
409 self._save()
410
411 # What remains to be done is to hash every single item to
412 # detect corruption, then save to ensure state.json is up to date.
413 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
414 # TODO(maruel): Let's revisit once directory metadata is stored in
415 # state.json so only the files that had been mapped since the last cleanup()
416 # call are manually verified.
417 #
418 #with self._lock:
419 # for digest in self._lru:
420 # if not isolated_format.is_valid_hash(
421 # self._path(digest), self.hash_algo):
422 # self.evict(digest)
423 # logging.info('Deleted corrupted item: %s', digest)
424
425 def touch(self, digest, size):
426 """Verifies an actual file is valid and bumps its LRU position.
427
428 Returns False if the file is missing or invalid. Doesn't kick it from LRU
429 though (call 'evict' explicitly).
430
431 Note that is doesn't compute the hash so it could still be corrupted if the
432 file size didn't change.
433
434 TODO(maruel): More stringent verification while keeping the check fast.
435 """
436 # Do the check outside the lock.
437 if not is_valid_file(self._path(digest), size):
438 return False
439
440 # Update its LRU position.
441 with self._lock:
442 if digest not in self._lru:
443 return False
444 self._lru.touch(digest)
445 self._protected = self._protected or digest
446 return True
447
448 def evict(self, digest):
449 with self._lock:
450 # Do not check for 'digest == self._protected' since it could be because
451 # the object is corrupted.
452 self._lru.pop(digest)
453 self._delete_file(digest, UNKNOWN_FILE_SIZE)
454
455 def getfileobj(self, digest):
456 try:
457 f = fs.open(self._path(digest), 'rb')
458 with self._lock:
459 self._used.append(self._lru[digest])
460 return f
461 except IOError:
462 raise CacheMiss(digest)
463
464 def write(self, digest, content):
465 assert content is not None
466 with self._lock:
467 self._protected = self._protected or digest
468 path = self._path(digest)
469 # A stale broken file may remain. It is possible for the file to have write
470 # access bit removed which would cause the file_write() call to fail to open
471 # in write mode. Take no chance here.
472 file_path.try_remove(path)
473 try:
474 size = file_write(path, content)
475 except:
476 # There are two possible places were an exception can occur:
477 # 1) Inside |content| generator in case of network or unzipping errors.
478 # 2) Inside file_write itself in case of disk IO errors.
479 # In any case delete an incomplete file and propagate the exception to
480 # caller, it will be logged there.
481 file_path.try_remove(path)
482 raise
483 # Make the file read-only in the cache. This has a few side-effects since
484 # the file node is modified, so every directory entries to this file becomes
485 # read-only. It's fine here because it is a new file.
486 file_path.set_read_only(path, True)
487 with self._lock:
488 self._add(digest, size)
489 return digest
490
491 def get_oldest(self):
492 """Returns digest of the LRU item or None."""
493 try:
494 return self._lru.get_oldest()[0]
495 except KeyError:
496 return None
497
498 def get_timestamp(self, digest):
499 """Returns timestamp of last use of an item.
500
501 Raises KeyError if item is not found.
502 """
503 return self._lru.get_timestamp(digest)
504
505 def trim(self):
506 """Forces retention policies."""
507 with self._lock:
508 return self._trim()
509
510 def _load(self, trim, time_fn):
511 """Loads state of the cache from json file.
512
513 If cache_dir does not exist on disk, it is created.
514 """
515 self._lock.assert_locked()
516
517 if not fs.isfile(self.state_file):
518 if not fs.isdir(self.cache_dir):
519 fs.makedirs(self.cache_dir)
520 else:
521 # Load state of the cache.
522 try:
523 self._lru = lru.LRUDict.load(self.state_file)
524 except ValueError as err:
525 logging.error('Failed to load cache state: %s' % (err,))
526 # Don't want to keep broken state file.
527 file_path.try_remove(self.state_file)
528 if time_fn:
529 self._lru.time_fn = time_fn
530 if trim:
531 self._trim()
532 # We want the initial cache size after trimming, i.e. what is readily
533 # avaiable.
534 self._initial_number_items = len(self._lru)
535 self._initial_size = sum(self._lru.itervalues())
536 if self._evicted:
537 logging.info(
538 'Trimming evicted items with the following sizes: %s',
539 sorted(self._evicted))
540
541 def _save(self):
542 """Saves the LRU ordering."""
543 self._lock.assert_locked()
544 if sys.platform != 'win32':
545 d = os.path.dirname(self.state_file)
546 if fs.isdir(d):
547 # Necessary otherwise the file can't be created.
548 file_path.set_read_only(d, False)
549 if fs.isfile(self.state_file):
550 file_path.set_read_only(self.state_file, False)
551 self._lru.save(self.state_file)
552
553 def _trim(self):
554 """Trims anything we don't know, make sure enough free space exists."""
555 self._lock.assert_locked()
556
557 # Trim old items.
558 if self.policies.max_age_secs:
559 cutoff = self._lru.time_fn() - self.policies.max_age_secs
560 while self._lru:
561 oldest = self._lru.get_oldest()
562 if oldest[1][1] >= cutoff:
563 break
564 self._remove_lru_file(True)
565
566 # Ensure maximum cache size.
567 if self.policies.max_cache_size:
568 total_size = sum(self._lru.itervalues())
569 while total_size > self.policies.max_cache_size:
570 total_size -= self._remove_lru_file(True)
571
572 # Ensure maximum number of items in the cache.
573 if self.policies.max_items and len(self._lru) > self.policies.max_items:
574 for _ in xrange(len(self._lru) - self.policies.max_items):
575 self._remove_lru_file(True)
576
577 # Ensure enough free space.
578 self._free_disk = file_path.get_free_space(self.cache_dir)
579 trimmed_due_to_space = 0
580 while (
581 self.policies.min_free_space and
582 self._lru and
583 self._free_disk < self.policies.min_free_space):
584 trimmed_due_to_space += 1
585 self._remove_lru_file(True)
586
587 if trimmed_due_to_space:
588 total_usage = sum(self._lru.itervalues())
589 usage_percent = 0.
590 if total_usage:
591 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
592
593 logging.warning(
594 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,'
595 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)',
596 trimmed_due_to_space,
597 self._free_disk / 1024.,
598 total_usage / 1024.,
599 usage_percent,
600 self.policies.max_cache_size / 1024.)
601 self._save()
602 return trimmed_due_to_space
603
604 def _path(self, digest):
605 """Returns the path to one item."""
606 return os.path.join(self.cache_dir, digest)
607
608 def _remove_lru_file(self, allow_protected):
609 """Removes the lastest recently used file and returns its size."""
610 self._lock.assert_locked()
611 try:
612 digest, (size, _) = self._lru.get_oldest()
613 if not allow_protected and digest == self._protected:
614 total_size = sum(self._lru.itervalues())+size
615 msg = (
616 'Not enough space to fetch the whole isolated tree.\n'
617 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
618 self.policies, total_size, len(self._lru)+1, self._free_disk)
619 raise NoMoreSpace(msg)
620 except KeyError:
621 # That means an internal error.
622 raise NoMoreSpace('Nothing to remove, can\'t happend')
623 digest, (size, _) = self._lru.pop_oldest()
624 logging.debug('Removing LRU file %s', digest)
625 self._delete_file(digest, size)
626 return size
627
628 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
629 """Adds an item into LRU cache marking it as a newest one."""
630 self._lock.assert_locked()
631 if size == UNKNOWN_FILE_SIZE:
632 size = fs.stat(self._path(digest)).st_size
633 self._added.append(size)
634 self._lru.add(digest, size)
635 self._free_disk -= size
636 # Do a quicker version of self._trim(). It only enforces free disk space,
637 # not cache size limits. It doesn't actually look at real free disk space,
638 # only uses its cache values. self._trim() will be called later to enforce
639 # real trimming but doing this quick version here makes it possible to map
640 # an isolated that is larger than the current amount of free disk space when
641 # the cache size is already large.
642 while (
643 self.policies.min_free_space and
644 self._lru and
645 self._free_disk < self.policies.min_free_space):
646 if self._remove_lru_file(False) == -1:
647 break
648
649 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
650 """Deletes cache file from the file system."""
651 self._lock.assert_locked()
652 try:
653 if size == UNKNOWN_FILE_SIZE:
654 try:
655 size = fs.stat(self._path(digest)).st_size
656 except OSError:
657 size = 0
658 file_path.try_remove(self._path(digest))
659 self._evicted.append(size)
660 self._free_disk += size
661 except OSError as e:
662 if e.errno != errno.ENOENT:
663 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400664
665
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400666class NamedCache(Cache):
667 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400668
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400669 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400670 name is a short identifier that describes the contents of the cache, e.g.
671 "git_v8" could be all git repositories required by v8 builds, or
672 "build_chromium" could be build artefacts of the Chromium.
673 path is a directory path relative to the task run dir. Cache installation
674 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400675 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400676 _DIR_ALPHABET = string.ascii_letters + string.digits
677 STATE_FILE = u'state.json'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400678
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400679 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400680 """Initializes NamedCaches.
681
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400682 Arguments:
683 - cache_dir is a directory for persistent cache storage.
684 - policies is a CachePolicies instance.
685 - time_fn is a function that returns timestamp (float) and used to take
686 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400687 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400688 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400689 self._policies = policies
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400690 # LRU {cache_name -> cache_location}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400691 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
692 self._lru = lru.LRUDict()
693 if not fs.isdir(self.cache_dir):
694 fs.makedirs(self.cache_dir)
695 if os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000696 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400697 self._lru = lru.LRUDict.load(self.state_file)
698 except ValueError:
699 logging.exception('failed to load named cache state file')
700 logging.warning('deleting named caches')
701 file_path.rmtree(self.cache_dir)
702 if time_fn:
703 self._lru.time_fn = time_fn
704
705 def __enter__(self):
706 return self
707
708 def __exit__(self, _exc_type, _exec_value, _traceback):
709 with self._lock:
710 self._save()
711 return False
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400712
713 def __len__(self):
714 """Returns number of items in the cache.
715
716 NamedCache must be open.
717 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400718 with self._lock:
719 return len(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400720
721 def get_oldest(self):
722 """Returns name of the LRU cache or None.
723
724 NamedCache must be open.
725 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400726 with self._lock:
727 try:
728 return self._lru.get_oldest()[0]
729 except KeyError:
730 return None
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400731
732 def get_timestamp(self, name):
733 """Returns timestamp of last use of an item.
734
735 NamedCache must be open.
736
737 Raises KeyError if cache is not found.
738 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400739 with self._lock:
740 assert isinstance(name, basestring), name
741 return self._lru.get_timestamp(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400742
743 @property
744 def available(self):
745 """Returns a set of names of available caches.
746
747 NamedCache must be open.
748 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400749 with self._lock:
750 return self._lru.keys_set()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400751
752 def install(self, path, name):
753 """Moves the directory for the specified named cache to |path|.
754
755 NamedCache must be open. path must be absolute, unicode and must not exist.
756
757 Raises NamedCacheError if cannot install the cache.
758 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400759 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400760 with self._lock:
761 try:
762 if os.path.isdir(path):
763 raise NamedCacheError(
764 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400765
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400766 rel_cache = self._lru.get(name)
767 if rel_cache:
768 abs_cache = os.path.join(self.cache_dir, rel_cache)
769 if os.path.isdir(abs_cache):
770 logging.info('Moving %r to %r', abs_cache, path)
771 file_path.ensure_tree(os.path.dirname(path))
772 fs.rename(abs_cache, path)
773 self._remove(name)
774 return
775
776 logging.warning(
777 'directory for named cache %r does not exist at %s', name,
778 rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400779 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400780
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400781 # The named cache does not exist, create an empty directory.
782 # When uninstalling, we will move it back to the cache and create an
783 # an entry.
784 file_path.ensure_tree(path)
785 except (IOError, OSError) as ex:
786 raise NamedCacheError(
787 'cannot install cache named %r at %r: %s' % (
788 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400789
790 def uninstall(self, path, name):
791 """Moves the cache directory back. Opposite to install().
792
793 NamedCache must be open. path must be absolute and unicode.
794
795 Raises NamedCacheError if cannot uninstall the cache.
796 """
797 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400798 with self._lock:
799 try:
800 if not os.path.isdir(path):
801 logging.warning(
802 'Directory %r does not exist anymore. Cache lost.', path)
803 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400804
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400805 rel_cache = self._lru.get(name)
806 if rel_cache:
807 # Do not crash because cache already exists.
808 logging.warning('overwriting an existing named cache %r', name)
809 create_named_link = False
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810 else:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400811 rel_cache = self._allocate_dir()
812 create_named_link = True
813
814 # Move the dir and create an entry for the named cache.
815 abs_cache = os.path.join(self.cache_dir, rel_cache)
816 logging.info('Moving %r to %r', path, abs_cache)
817 file_path.ensure_tree(os.path.dirname(abs_cache))
818 fs.rename(path, abs_cache)
819 self._lru.add(name, rel_cache)
820
821 if create_named_link:
822 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short
823 # name> for user convenience.
824 named_path = self._get_named_path(name)
825 if os.path.exists(named_path):
826 file_path.remove(named_path)
827 else:
828 file_path.ensure_tree(os.path.dirname(named_path))
829 try:
830 fs.symlink(abs_cache, named_path)
831 logging.info('Created symlink %r to %r', named_path, abs_cache)
832 except OSError:
833 # Ignore on Windows. It happens when running as a normal user or
834 # when UAC is enabled and the user is a filtered administrator
835 # account.
836 if sys.platform != 'win32':
837 raise
838 except (IOError, OSError) as ex:
839 raise NamedCacheError(
840 'cannot uninstall cache named %r at %r: %s' % (
841 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400842
843 def trim(self):
844 """Purges cache entries that do not comply with the cache policies.
845
846 NamedCache must be open.
847
848 Returns:
849 Number of caches deleted.
850 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400851 with self._lock:
852 if not os.path.isdir(self.cache_dir):
853 return 0
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400854
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400855 removed = []
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400856
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 def _remove_lru_file():
858 """Removes the oldest LRU entry. LRU must not be empty."""
859 name, _data = self._lru.get_oldest()
860 logging.info('Removing named cache %r', name)
861 self._remove(name)
862 removed.append(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400863
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400864 # Trim according to maximum number of items.
865 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400866 _remove_lru_file()
867
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868 # Trim according to maximum age.
869 if self._policies.max_age_secs:
870 cutoff = self._lru.time_fn() - self._policies.max_age_secs
871 while self._lru:
872 _name, (_content, timestamp) = self._lru.get_oldest()
873 if timestamp >= cutoff:
874 break
875 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400877 # Trim according to minimum free space.
878 if self._policies.min_free_space:
879 while True:
880 free_space = file_path.get_free_space(self.cache_dir)
881 if not self._lru or free_space >= self._policies.min_free_space:
882 break
883 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400884
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400885 # TODO(maruel): Trim according to self._policies.max_cache_size. Do it
886 # last as it requires counting the size of each entry.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400888 # TODO(maruel): Trim empty directories. An empty directory is not a cache,
889 # something needs to be in it.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400890
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400891 return len(removed)
892
893 def cleanup(self):
894 # TODO(maruel): Implement.
895 pass
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400896
897 def _allocate_dir(self):
898 """Creates and returns relative path of a new cache directory."""
899 # We randomly generate directory names that have two lower/upper case
900 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
901 abc_len = len(self._DIR_ALPHABET)
902 tried = set()
903 while len(tried) < 1000:
904 i = random.randint(0, abc_len * abc_len - 1)
905 rel_path = (
906 self._DIR_ALPHABET[i / abc_len] +
907 self._DIR_ALPHABET[i % abc_len])
908 if rel_path in tried:
909 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400910 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400911 if not fs.exists(abs_path):
912 return rel_path
913 tried.add(rel_path)
914 raise NamedCacheError(
915 'could not allocate a new cache dir, too many cache dirs')
916
917 def _remove(self, name):
918 """Removes a cache directory and entry.
919
920 NamedCache must be open.
921
922 Returns:
923 Number of caches deleted.
924 """
925 self._lock.assert_locked()
926 rel_path = self._lru.get(name)
927 if not rel_path:
928 return
929
930 named_dir = self._get_named_path(name)
931 if fs.islink(named_dir):
932 fs.unlink(named_dir)
933
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400934 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400935 if os.path.isdir(abs_path):
936 file_path.rmtree(abs_path)
937 self._lru.pop(name)
938
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400939 def _save(self):
940 self._lock.assert_locked()
941 file_path.ensure_tree(self.cache_dir)
942 self._lru.save(self.state_file)
943
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400944 def _get_named_path(self, name):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400945 return os.path.join(self.cache_dir, 'named', name)