blob: 3f18f6f4128bb148573ad12575f8601a32bfe12e [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
15
16from utils import file_path
17from utils import fs
18from utils import lru
19from utils import threading_utils
20from utils import tools
21
22
23# The file size to be used when we don't know the correct file size,
24# generally used for .isolated files.
25UNKNOWN_FILE_SIZE = None
26
27
28def file_write(path, content_generator):
29 """Writes file content as generated by content_generator.
30
31 Creates the intermediary directory as needed.
32
33 Returns the number of bytes written.
34
35 Meant to be mocked out in unit tests.
36 """
37 file_path.ensure_tree(os.path.dirname(path))
38 total = 0
39 with fs.open(path, 'wb') as f:
40 for d in content_generator:
41 total += len(d)
42 f.write(d)
43 return total
44
45
46def is_valid_file(path, size):
47 """Determines if the given files appears valid.
48
49 Currently it just checks the file exists and its size matches the expectation.
50 """
51 if size == UNKNOWN_FILE_SIZE:
52 return fs.isfile(path)
53 try:
54 actual_size = fs.stat(path).st_size
55 except OSError as e:
56 logging.warning(
57 'Can\'t read item %s, assuming it\'s invalid: %s',
58 os.path.basename(path), e)
59 return False
60 if size != actual_size:
61 logging.warning(
62 'Found invalid item %s; %d != %d',
63 os.path.basename(path), actual_size, size)
64 return False
65 return True
66
67
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040068class NamedCacheError(Exception):
69 """Named cache specific error."""
70
71
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040072class NoMoreSpace(Exception):
73 """Not enough space to map the whole directory."""
74 pass
75
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -040076
77class CachePolicies(object):
78 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
79 """Common caching policies for the multiple caches (isolated, named, cipd).
80
81 Arguments:
82 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
83 cache is effectively a leak.
84 - min_free_space: Trim if disk free space becomes lower than this value. If
85 0, it will unconditionally fill the disk.
86 - max_items: Maximum number of items to keep in the cache. If 0, do not
87 enforce a limit.
88 - max_age_secs: Maximum age an item is kept in the cache until it is
89 automatically evicted. Having a lot of dead luggage slows
90 everything down.
91 """
92 self.max_cache_size = max_cache_size
93 self.min_free_space = min_free_space
94 self.max_items = max_items
95 self.max_age_secs = max_age_secs
96
97 def __str__(self):
98 return (
99 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
100 'max_age_secs=%s)') % (
101 self.max_cache_size, self.max_items, self.min_free_space,
102 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400103
104
105class CacheMiss(Exception):
106 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400107 def __init__(self, digest):
108 self.digest = digest
109 super(CacheMiss, self).__init__(
110 'Item with digest %r is not found in cache' % digest)
111
112
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400113class Cache(object):
114 def __init__(self, cache_dir):
115 if cache_dir is not None:
116 assert isinstance(cache_dir, unicode), cache_dir
117 assert file_path.isabs(cache_dir), cache_dir
118 self.cache_dir = cache_dir
119 self._lock = threading_utils.LockWithAssert()
120 # Profiling values.
121 self._added = []
122 self._evicted = []
123 self._used = []
124
125 @property
126 def added(self):
127 with self._lock:
128 return self._added[:]
129
130 @property
131 def used(self):
132 with self._lock:
133 return self._used[:]
134
135 def cleanup(self):
136 """Deletes any corrupted item from the cache and trims it if necessary."""
137 raise NotImplementedError()
138
139 def trim(self):
140 """Enforces cache policies.
141
142 Returns:
143 Number of items evicted.
144 """
145 raise NotImplementedError()
146
147
148class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400149 """Content addressed cache that stores objects temporarily.
150
151 It can be accessed concurrently from multiple threads, so it should protect
152 its internal state with some lock.
153 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400154 def __init__(self, *args, **kwargs):
155 super(ContentAddressedCache, self).__init__(*args, **kwargs)
156 # These shall be initialized in the constructor.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400157 self._initial_number_items = 0
158 self._initial_size = 0
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400159
160 def __contains__(self, digest):
161 raise NotImplementedError()
162
163 def __enter__(self):
164 """Context manager interface."""
165 return self
166
167 def __exit__(self, _exc_type, _exec_value, _traceback):
168 """Context manager interface."""
169 return False
170
171 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400172 def initial_number_items(self):
173 return self._initial_number_items
174
175 @property
176 def initial_size(self):
177 return self._initial_size
178
179 @property
180 def number_items(self):
181 """Returns the total size of the cache in bytes."""
182 raise NotImplementedError()
183
184 @property
185 def total_size(self):
186 """Returns the total size of the cache in bytes."""
187 raise NotImplementedError()
188
189 def cached_set(self):
190 """Returns a set of all cached digests (always a new object)."""
191 raise NotImplementedError()
192
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193 def touch(self, digest, size):
194 """Ensures item is not corrupted and updates its LRU position.
195
196 Arguments:
197 digest: hash digest of item to check.
198 size: expected size of this item.
199
200 Returns:
201 True if item is in cache and not corrupted.
202 """
203 raise NotImplementedError()
204
205 def evict(self, digest):
206 """Removes item from cache if it's there."""
207 raise NotImplementedError()
208
209 def getfileobj(self, digest):
210 """Returns a readable file like object.
211
212 If file exists on the file system it will have a .name attribute with an
213 absolute path to the file.
214 """
215 raise NotImplementedError()
216
217 def write(self, digest, content):
218 """Reads data from |content| generator and stores it in cache.
219
220 Returns digest to simplify chaining.
221 """
222 raise NotImplementedError()
223
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400224
225class MemoryContentAddressedCache(ContentAddressedCache):
226 """ContentAddressedCache implementation that stores everything in memory."""
227
228 def __init__(self, file_mode_mask=0500):
229 """Args:
230 file_mode_mask: bit mask to AND file mode with. Default value will make
231 all mapped files to be read only.
232 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400233 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400234 self._file_mode_mask = file_mode_mask
235 self._contents = {}
236
237 def __contains__(self, digest):
238 with self._lock:
239 return digest in self._contents
240
241 @property
242 def number_items(self):
243 with self._lock:
244 return len(self._contents)
245
246 @property
247 def total_size(self):
248 with self._lock:
249 return sum(len(i) for i in self._contents.itervalues())
250
251 def cached_set(self):
252 with self._lock:
253 return set(self._contents)
254
255 def cleanup(self):
256 pass
257
258 def touch(self, digest, size):
259 with self._lock:
260 return digest in self._contents
261
262 def evict(self, digest):
263 with self._lock:
264 v = self._contents.pop(digest, None)
265 if v is not None:
266 self._evicted.add(v)
267
268 def getfileobj(self, digest):
269 with self._lock:
270 try:
271 d = self._contents[digest]
272 except KeyError:
273 raise CacheMiss(digest)
274 self._used.append(len(d))
275 return io.BytesIO(d)
276
277 def write(self, digest, content):
278 # Assemble whole stream before taking the lock.
279 data = ''.join(content)
280 with self._lock:
281 self._contents[digest] = data
282 self._added.append(len(data))
283 return digest
284
285 def trim(self):
286 """Trimming is not implemented for MemoryContentAddressedCache."""
287 return 0
288
289
290class DiskContentAddressedCache(ContentAddressedCache):
291 """Stateful LRU cache in a flat hash table in a directory.
292
293 Saves its state as json file.
294 """
295 STATE_FILE = u'state.json'
296
297 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
298 """
299 Arguments:
300 cache_dir: directory where to place the cache.
301 policies: CachePolicies instance, cache retention policies.
302 algo: hashing algorithm used.
303 trim: if True to enforce |policies| right away.
304 It can be done later by calling trim() explicitly.
305 """
306 # All protected methods (starting with '_') except _path should be called
307 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400308 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309 self.policies = policies
310 self.hash_algo = hash_algo
311 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
312 # Items in a LRU lookup dict(digest: size).
313 self._lru = lru.LRUDict()
314 # Current cached free disk space. It is updated by self._trim().
315 file_path.ensure_tree(self.cache_dir)
316 self._free_disk = file_path.get_free_space(self.cache_dir)
317 # The first item in the LRU cache that must not be evicted during this run
318 # since it was referenced. All items more recent that _protected in the LRU
319 # cache are also inherently protected. It could be a set() of all items
320 # referenced but this increases memory usage without a use case.
321 self._protected = None
322 # Cleanup operations done by self._load(), if any.
323 self._operations = []
324 with tools.Profiler('Setup'):
325 with self._lock:
326 self._load(trim, time_fn)
327
328 def __contains__(self, digest):
329 with self._lock:
330 return digest in self._lru
331
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400332 @property
333 def number_items(self):
334 with self._lock:
335 return len(self._lru)
336
337 @property
338 def total_size(self):
339 with self._lock:
340 return sum(self._lru.itervalues())
341
342 def cached_set(self):
343 with self._lock:
344 return self._lru.keys_set()
345
346 def cleanup(self):
347 """Cleans up the cache directory.
348
349 Ensures there is no unknown files in cache_dir.
350 Ensures the read-only bits are set correctly.
351
352 At that point, the cache was already loaded, trimmed to respect cache
353 policies.
354 """
355 with self._lock:
356 fs.chmod(self.cache_dir, 0700)
357 # Ensure that all files listed in the state still exist and add new ones.
358 previous = self._lru.keys_set()
359 # It'd be faster if there were a readdir() function.
360 for filename in fs.listdir(self.cache_dir):
361 if filename == self.STATE_FILE:
362 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
363 continue
364 if filename in previous:
365 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
366 previous.remove(filename)
367 continue
368
369 # An untracked file. Delete it.
370 logging.warning('Removing unknown file %s from cache', filename)
371 p = self._path(filename)
372 if fs.isdir(p):
373 try:
374 file_path.rmtree(p)
375 except OSError:
376 pass
377 else:
378 file_path.try_remove(p)
379 continue
380
381 if previous:
382 # Filter out entries that were not found.
383 logging.warning('Removed %d lost files', len(previous))
384 for filename in previous:
385 self._lru.pop(filename)
386 self._save()
387
388 # What remains to be done is to hash every single item to
389 # detect corruption, then save to ensure state.json is up to date.
390 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
391 # TODO(maruel): Let's revisit once directory metadata is stored in
392 # state.json so only the files that had been mapped since the last cleanup()
393 # call are manually verified.
394 #
395 #with self._lock:
396 # for digest in self._lru:
397 # if not isolated_format.is_valid_hash(
398 # self._path(digest), self.hash_algo):
399 # self.evict(digest)
400 # logging.info('Deleted corrupted item: %s', digest)
401
402 def touch(self, digest, size):
403 """Verifies an actual file is valid and bumps its LRU position.
404
405 Returns False if the file is missing or invalid. Doesn't kick it from LRU
406 though (call 'evict' explicitly).
407
408 Note that is doesn't compute the hash so it could still be corrupted if the
409 file size didn't change.
410
411 TODO(maruel): More stringent verification while keeping the check fast.
412 """
413 # Do the check outside the lock.
414 if not is_valid_file(self._path(digest), size):
415 return False
416
417 # Update its LRU position.
418 with self._lock:
419 if digest not in self._lru:
420 return False
421 self._lru.touch(digest)
422 self._protected = self._protected or digest
423 return True
424
425 def evict(self, digest):
426 with self._lock:
427 # Do not check for 'digest == self._protected' since it could be because
428 # the object is corrupted.
429 self._lru.pop(digest)
430 self._delete_file(digest, UNKNOWN_FILE_SIZE)
431
432 def getfileobj(self, digest):
433 try:
434 f = fs.open(self._path(digest), 'rb')
435 with self._lock:
436 self._used.append(self._lru[digest])
437 return f
438 except IOError:
439 raise CacheMiss(digest)
440
441 def write(self, digest, content):
442 assert content is not None
443 with self._lock:
444 self._protected = self._protected or digest
445 path = self._path(digest)
446 # A stale broken file may remain. It is possible for the file to have write
447 # access bit removed which would cause the file_write() call to fail to open
448 # in write mode. Take no chance here.
449 file_path.try_remove(path)
450 try:
451 size = file_write(path, content)
452 except:
453 # There are two possible places were an exception can occur:
454 # 1) Inside |content| generator in case of network or unzipping errors.
455 # 2) Inside file_write itself in case of disk IO errors.
456 # In any case delete an incomplete file and propagate the exception to
457 # caller, it will be logged there.
458 file_path.try_remove(path)
459 raise
460 # Make the file read-only in the cache. This has a few side-effects since
461 # the file node is modified, so every directory entries to this file becomes
462 # read-only. It's fine here because it is a new file.
463 file_path.set_read_only(path, True)
464 with self._lock:
465 self._add(digest, size)
466 return digest
467
468 def get_oldest(self):
469 """Returns digest of the LRU item or None."""
470 try:
471 return self._lru.get_oldest()[0]
472 except KeyError:
473 return None
474
475 def get_timestamp(self, digest):
476 """Returns timestamp of last use of an item.
477
478 Raises KeyError if item is not found.
479 """
480 return self._lru.get_timestamp(digest)
481
482 def trim(self):
483 """Forces retention policies."""
484 with self._lock:
485 return self._trim()
486
487 def _load(self, trim, time_fn):
488 """Loads state of the cache from json file.
489
490 If cache_dir does not exist on disk, it is created.
491 """
492 self._lock.assert_locked()
493
494 if not fs.isfile(self.state_file):
495 if not fs.isdir(self.cache_dir):
496 fs.makedirs(self.cache_dir)
497 else:
498 # Load state of the cache.
499 try:
500 self._lru = lru.LRUDict.load(self.state_file)
501 except ValueError as err:
502 logging.error('Failed to load cache state: %s' % (err,))
503 # Don't want to keep broken state file.
504 file_path.try_remove(self.state_file)
505 if time_fn:
506 self._lru.time_fn = time_fn
507 if trim:
508 self._trim()
509 # We want the initial cache size after trimming, i.e. what is readily
510 # avaiable.
511 self._initial_number_items = len(self._lru)
512 self._initial_size = sum(self._lru.itervalues())
513 if self._evicted:
514 logging.info(
515 'Trimming evicted items with the following sizes: %s',
516 sorted(self._evicted))
517
518 def _save(self):
519 """Saves the LRU ordering."""
520 self._lock.assert_locked()
521 if sys.platform != 'win32':
522 d = os.path.dirname(self.state_file)
523 if fs.isdir(d):
524 # Necessary otherwise the file can't be created.
525 file_path.set_read_only(d, False)
526 if fs.isfile(self.state_file):
527 file_path.set_read_only(self.state_file, False)
528 self._lru.save(self.state_file)
529
530 def _trim(self):
531 """Trims anything we don't know, make sure enough free space exists."""
532 self._lock.assert_locked()
533
534 # Trim old items.
535 if self.policies.max_age_secs:
536 cutoff = self._lru.time_fn() - self.policies.max_age_secs
537 while self._lru:
538 oldest = self._lru.get_oldest()
539 if oldest[1][1] >= cutoff:
540 break
541 self._remove_lru_file(True)
542
543 # Ensure maximum cache size.
544 if self.policies.max_cache_size:
545 total_size = sum(self._lru.itervalues())
546 while total_size > self.policies.max_cache_size:
547 total_size -= self._remove_lru_file(True)
548
549 # Ensure maximum number of items in the cache.
550 if self.policies.max_items and len(self._lru) > self.policies.max_items:
551 for _ in xrange(len(self._lru) - self.policies.max_items):
552 self._remove_lru_file(True)
553
554 # Ensure enough free space.
555 self._free_disk = file_path.get_free_space(self.cache_dir)
556 trimmed_due_to_space = 0
557 while (
558 self.policies.min_free_space and
559 self._lru and
560 self._free_disk < self.policies.min_free_space):
561 trimmed_due_to_space += 1
562 self._remove_lru_file(True)
563
564 if trimmed_due_to_space:
565 total_usage = sum(self._lru.itervalues())
566 usage_percent = 0.
567 if total_usage:
568 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
569
570 logging.warning(
571 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,'
572 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)',
573 trimmed_due_to_space,
574 self._free_disk / 1024.,
575 total_usage / 1024.,
576 usage_percent,
577 self.policies.max_cache_size / 1024.)
578 self._save()
579 return trimmed_due_to_space
580
581 def _path(self, digest):
582 """Returns the path to one item."""
583 return os.path.join(self.cache_dir, digest)
584
585 def _remove_lru_file(self, allow_protected):
586 """Removes the lastest recently used file and returns its size."""
587 self._lock.assert_locked()
588 try:
589 digest, (size, _) = self._lru.get_oldest()
590 if not allow_protected and digest == self._protected:
591 total_size = sum(self._lru.itervalues())+size
592 msg = (
593 'Not enough space to fetch the whole isolated tree.\n'
594 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
595 self.policies, total_size, len(self._lru)+1, self._free_disk)
596 raise NoMoreSpace(msg)
597 except KeyError:
598 # That means an internal error.
599 raise NoMoreSpace('Nothing to remove, can\'t happend')
600 digest, (size, _) = self._lru.pop_oldest()
601 logging.debug('Removing LRU file %s', digest)
602 self._delete_file(digest, size)
603 return size
604
605 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
606 """Adds an item into LRU cache marking it as a newest one."""
607 self._lock.assert_locked()
608 if size == UNKNOWN_FILE_SIZE:
609 size = fs.stat(self._path(digest)).st_size
610 self._added.append(size)
611 self._lru.add(digest, size)
612 self._free_disk -= size
613 # Do a quicker version of self._trim(). It only enforces free disk space,
614 # not cache size limits. It doesn't actually look at real free disk space,
615 # only uses its cache values. self._trim() will be called later to enforce
616 # real trimming but doing this quick version here makes it possible to map
617 # an isolated that is larger than the current amount of free disk space when
618 # the cache size is already large.
619 while (
620 self.policies.min_free_space and
621 self._lru and
622 self._free_disk < self.policies.min_free_space):
623 if self._remove_lru_file(False) == -1:
624 break
625
626 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
627 """Deletes cache file from the file system."""
628 self._lock.assert_locked()
629 try:
630 if size == UNKNOWN_FILE_SIZE:
631 try:
632 size = fs.stat(self._path(digest)).st_size
633 except OSError:
634 size = 0
635 file_path.try_remove(self._path(digest))
636 self._evicted.append(size)
637 self._free_disk += size
638 except OSError as e:
639 if e.errno != errno.ENOENT:
640 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400641
642
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400643class NamedCache(Cache):
644 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400645
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400646 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400647 name is a short identifier that describes the contents of the cache, e.g.
648 "git_v8" could be all git repositories required by v8 builds, or
649 "build_chromium" could be build artefacts of the Chromium.
650 path is a directory path relative to the task run dir. Cache installation
651 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400652 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400653 _DIR_ALPHABET = string.ascii_letters + string.digits
654 STATE_FILE = u'state.json'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400655
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400656 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400657 """Initializes NamedCaches.
658
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400659 Arguments:
660 - cache_dir is a directory for persistent cache storage.
661 - policies is a CachePolicies instance.
662 - time_fn is a function that returns timestamp (float) and used to take
663 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400664 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400665 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400666 self._policies = policies
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400667 # LRU {cache_name -> cache_location}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400668 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
669 self._lru = lru.LRUDict()
670 if not fs.isdir(self.cache_dir):
671 fs.makedirs(self.cache_dir)
672 if os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000673 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400674 self._lru = lru.LRUDict.load(self.state_file)
675 except ValueError:
676 logging.exception('failed to load named cache state file')
677 logging.warning('deleting named caches')
678 file_path.rmtree(self.cache_dir)
679 if time_fn:
680 self._lru.time_fn = time_fn
681
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400682 def __len__(self):
683 """Returns number of items in the cache.
684
685 NamedCache must be open.
686 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400687 with self._lock:
688 return len(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400689
690 def get_oldest(self):
691 """Returns name of the LRU cache or None.
692
693 NamedCache must be open.
694 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400695 with self._lock:
696 try:
697 return self._lru.get_oldest()[0]
698 except KeyError:
699 return None
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400700
701 def get_timestamp(self, name):
702 """Returns timestamp of last use of an item.
703
704 NamedCache must be open.
705
706 Raises KeyError if cache is not found.
707 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400708 with self._lock:
709 assert isinstance(name, basestring), name
710 return self._lru.get_timestamp(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400711
712 @property
713 def available(self):
714 """Returns a set of names of available caches.
715
716 NamedCache must be open.
717 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400718 with self._lock:
719 return self._lru.keys_set()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400720
721 def install(self, path, name):
722 """Moves the directory for the specified named cache to |path|.
723
724 NamedCache must be open. path must be absolute, unicode and must not exist.
725
726 Raises NamedCacheError if cannot install the cache.
727 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400728 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400729 with self._lock:
730 try:
731 if os.path.isdir(path):
732 raise NamedCacheError(
733 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400734
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400735 rel_cache = self._lru.get(name)
736 if rel_cache:
737 abs_cache = os.path.join(self.cache_dir, rel_cache)
738 if os.path.isdir(abs_cache):
739 logging.info('Moving %r to %r', abs_cache, path)
740 file_path.ensure_tree(os.path.dirname(path))
741 fs.rename(abs_cache, path)
742 self._remove(name)
743 return
744
745 logging.warning(
746 'directory for named cache %r does not exist at %s', name,
747 rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400748 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400749
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400750 # The named cache does not exist, create an empty directory.
751 # When uninstalling, we will move it back to the cache and create an
752 # an entry.
753 file_path.ensure_tree(path)
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000754 self._save()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400755 except (IOError, OSError) as ex:
756 raise NamedCacheError(
757 'cannot install cache named %r at %r: %s' % (
758 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400759
760 def uninstall(self, path, name):
761 """Moves the cache directory back. Opposite to install().
762
763 NamedCache must be open. path must be absolute and unicode.
764
765 Raises NamedCacheError if cannot uninstall the cache.
766 """
767 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400768 with self._lock:
769 try:
770 if not os.path.isdir(path):
771 logging.warning(
772 'Directory %r does not exist anymore. Cache lost.', path)
773 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400774
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400775 rel_cache = self._lru.get(name)
776 if rel_cache:
777 # Do not crash because cache already exists.
778 logging.warning('overwriting an existing named cache %r', name)
779 create_named_link = False
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400780 else:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400781 rel_cache = self._allocate_dir()
782 create_named_link = True
783
784 # Move the dir and create an entry for the named cache.
785 abs_cache = os.path.join(self.cache_dir, rel_cache)
786 logging.info('Moving %r to %r', path, abs_cache)
787 file_path.ensure_tree(os.path.dirname(abs_cache))
788 fs.rename(path, abs_cache)
789 self._lru.add(name, rel_cache)
790
791 if create_named_link:
792 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short
793 # name> for user convenience.
794 named_path = self._get_named_path(name)
795 if os.path.exists(named_path):
796 file_path.remove(named_path)
797 else:
798 file_path.ensure_tree(os.path.dirname(named_path))
799 try:
800 fs.symlink(abs_cache, named_path)
801 logging.info('Created symlink %r to %r', named_path, abs_cache)
802 except OSError:
803 # Ignore on Windows. It happens when running as a normal user or
804 # when UAC is enabled and the user is a filtered administrator
805 # account.
806 if sys.platform != 'win32':
807 raise
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000808 self._save()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809 except (IOError, OSError) as ex:
810 raise NamedCacheError(
811 'cannot uninstall cache named %r at %r: %s' % (
812 name, path, ex))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813
814 def trim(self):
815 """Purges cache entries that do not comply with the cache policies.
816
817 NamedCache must be open.
818
819 Returns:
820 Number of caches deleted.
821 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400822 with self._lock:
823 if not os.path.isdir(self.cache_dir):
824 return 0
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 removed = []
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400827
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400828 def _remove_lru_file():
829 """Removes the oldest LRU entry. LRU must not be empty."""
830 name, _data = self._lru.get_oldest()
831 logging.info('Removing named cache %r', name)
832 self._remove(name)
833 removed.append(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400834
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 # Trim according to maximum number of items.
836 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400837 _remove_lru_file()
838
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400839 # Trim according to maximum age.
840 if self._policies.max_age_secs:
841 cutoff = self._lru.time_fn() - self._policies.max_age_secs
842 while self._lru:
843 _name, (_content, timestamp) = self._lru.get_oldest()
844 if timestamp >= cutoff:
845 break
846 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400847
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400848 # Trim according to minimum free space.
849 if self._policies.min_free_space:
850 while True:
851 free_space = file_path.get_free_space(self.cache_dir)
852 if not self._lru or free_space >= self._policies.min_free_space:
853 break
854 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400856 # TODO(maruel): Trim according to self._policies.max_cache_size. Do it
857 # last as it requires counting the size of each entry.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400859 # TODO(maruel): Trim empty directories. An empty directory is not a cache,
860 # something needs to be in it.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400861
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000862 self._save()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 return len(removed)
864
865 def cleanup(self):
866 # TODO(maruel): Implement.
867 pass
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400868
869 def _allocate_dir(self):
870 """Creates and returns relative path of a new cache directory."""
871 # We randomly generate directory names that have two lower/upper case
872 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
873 abc_len = len(self._DIR_ALPHABET)
874 tried = set()
875 while len(tried) < 1000:
876 i = random.randint(0, abc_len * abc_len - 1)
877 rel_path = (
878 self._DIR_ALPHABET[i / abc_len] +
879 self._DIR_ALPHABET[i % abc_len])
880 if rel_path in tried:
881 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400883 if not fs.exists(abs_path):
884 return rel_path
885 tried.add(rel_path)
886 raise NamedCacheError(
887 'could not allocate a new cache dir, too many cache dirs')
888
889 def _remove(self, name):
890 """Removes a cache directory and entry.
891
892 NamedCache must be open.
893
894 Returns:
895 Number of caches deleted.
896 """
897 self._lock.assert_locked()
898 rel_path = self._lru.get(name)
899 if not rel_path:
900 return
901
902 named_dir = self._get_named_path(name)
903 if fs.islink(named_dir):
904 fs.unlink(named_dir)
905
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400906 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907 if os.path.isdir(abs_path):
908 file_path.rmtree(abs_path)
909 self._lru.pop(name)
910
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400911 def _save(self):
912 self._lock.assert_locked()
913 file_path.ensure_tree(self.cache_dir)
914 self._lru.save(self.state_file)
915
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400916 def _get_named_path(self, name):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400917 return os.path.join(self.cache_dir, 'named', name)