blob: 13a67426ae92a437a756b32ba2a5c0ff915bec9f [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
15
16from utils import file_path
17from utils import fs
18from utils import lru
19from utils import threading_utils
20from utils import tools
21
22
23# The file size to be used when we don't know the correct file size,
24# generally used for .isolated files.
25UNKNOWN_FILE_SIZE = None
26
27
28def file_write(path, content_generator):
29 """Writes file content as generated by content_generator.
30
31 Creates the intermediary directory as needed.
32
33 Returns the number of bytes written.
34
35 Meant to be mocked out in unit tests.
36 """
37 file_path.ensure_tree(os.path.dirname(path))
38 total = 0
39 with fs.open(path, 'wb') as f:
40 for d in content_generator:
41 total += len(d)
42 f.write(d)
43 return total
44
45
46def is_valid_file(path, size):
47 """Determines if the given files appears valid.
48
49 Currently it just checks the file exists and its size matches the expectation.
50 """
51 if size == UNKNOWN_FILE_SIZE:
52 return fs.isfile(path)
53 try:
54 actual_size = fs.stat(path).st_size
55 except OSError as e:
56 logging.warning(
57 'Can\'t read item %s, assuming it\'s invalid: %s',
58 os.path.basename(path), e)
59 return False
60 if size != actual_size:
61 logging.warning(
62 'Found invalid item %s; %d != %d',
63 os.path.basename(path), actual_size, size)
64 return False
65 return True
66
67
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +000068def _get_recursive_size(path):
69 """Returns the total data size for the specified path.
70
71 This function can be surprisingly slow on OSX, so its output should be cached.
72 """
73 try:
74 total = 0
75 for root, _, files in fs.walk(path):
76 for f in files:
77 total += fs.lstat(os.path.join(root, f)).st_size
78 return total
79 except (IOError, OSError, UnicodeEncodeError) as exc:
80 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
81 return None
82
83
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040084class NamedCacheError(Exception):
85 """Named cache specific error."""
86
87
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040088class NoMoreSpace(Exception):
89 """Not enough space to map the whole directory."""
90 pass
91
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -040092
93class CachePolicies(object):
94 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
95 """Common caching policies for the multiple caches (isolated, named, cipd).
96
97 Arguments:
98 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
99 cache is effectively a leak.
100 - min_free_space: Trim if disk free space becomes lower than this value. If
101 0, it will unconditionally fill the disk.
102 - max_items: Maximum number of items to keep in the cache. If 0, do not
103 enforce a limit.
104 - max_age_secs: Maximum age an item is kept in the cache until it is
105 automatically evicted. Having a lot of dead luggage slows
106 everything down.
107 """
108 self.max_cache_size = max_cache_size
109 self.min_free_space = min_free_space
110 self.max_items = max_items
111 self.max_age_secs = max_age_secs
112
113 def __str__(self):
114 return (
115 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
116 'max_age_secs=%s)') % (
117 self.max_cache_size, self.max_items, self.min_free_space,
118 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400119
120
121class CacheMiss(Exception):
122 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400123 def __init__(self, digest):
124 self.digest = digest
125 super(CacheMiss, self).__init__(
126 'Item with digest %r is not found in cache' % digest)
127
128
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400129class Cache(object):
130 def __init__(self, cache_dir):
131 if cache_dir is not None:
132 assert isinstance(cache_dir, unicode), cache_dir
133 assert file_path.isabs(cache_dir), cache_dir
134 self.cache_dir = cache_dir
135 self._lock = threading_utils.LockWithAssert()
136 # Profiling values.
137 self._added = []
138 self._evicted = []
139 self._used = []
140
141 @property
142 def added(self):
143 with self._lock:
144 return self._added[:]
145
146 @property
147 def used(self):
148 with self._lock:
149 return self._used[:]
150
151 def cleanup(self):
152 """Deletes any corrupted item from the cache and trims it if necessary."""
153 raise NotImplementedError()
154
155 def trim(self):
156 """Enforces cache policies.
157
158 Returns:
159 Number of items evicted.
160 """
161 raise NotImplementedError()
162
163
164class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400165 """Content addressed cache that stores objects temporarily.
166
167 It can be accessed concurrently from multiple threads, so it should protect
168 its internal state with some lock.
169 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400170 def __init__(self, *args, **kwargs):
171 super(ContentAddressedCache, self).__init__(*args, **kwargs)
172 # These shall be initialized in the constructor.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400173 self._initial_number_items = 0
174 self._initial_size = 0
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400175
176 def __contains__(self, digest):
177 raise NotImplementedError()
178
179 def __enter__(self):
180 """Context manager interface."""
181 return self
182
183 def __exit__(self, _exc_type, _exec_value, _traceback):
184 """Context manager interface."""
185 return False
186
187 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400188 def initial_number_items(self):
189 return self._initial_number_items
190
191 @property
192 def initial_size(self):
193 return self._initial_size
194
195 @property
196 def number_items(self):
197 """Returns the total size of the cache in bytes."""
198 raise NotImplementedError()
199
200 @property
201 def total_size(self):
202 """Returns the total size of the cache in bytes."""
203 raise NotImplementedError()
204
205 def cached_set(self):
206 """Returns a set of all cached digests (always a new object)."""
207 raise NotImplementedError()
208
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400209 def touch(self, digest, size):
210 """Ensures item is not corrupted and updates its LRU position.
211
212 Arguments:
213 digest: hash digest of item to check.
214 size: expected size of this item.
215
216 Returns:
217 True if item is in cache and not corrupted.
218 """
219 raise NotImplementedError()
220
221 def evict(self, digest):
222 """Removes item from cache if it's there."""
223 raise NotImplementedError()
224
225 def getfileobj(self, digest):
226 """Returns a readable file like object.
227
228 If file exists on the file system it will have a .name attribute with an
229 absolute path to the file.
230 """
231 raise NotImplementedError()
232
233 def write(self, digest, content):
234 """Reads data from |content| generator and stores it in cache.
235
236 Returns digest to simplify chaining.
237 """
238 raise NotImplementedError()
239
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400240
241class MemoryContentAddressedCache(ContentAddressedCache):
242 """ContentAddressedCache implementation that stores everything in memory."""
243
244 def __init__(self, file_mode_mask=0500):
245 """Args:
246 file_mode_mask: bit mask to AND file mode with. Default value will make
247 all mapped files to be read only.
248 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400249 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400250 self._file_mode_mask = file_mode_mask
251 self._contents = {}
252
253 def __contains__(self, digest):
254 with self._lock:
255 return digest in self._contents
256
257 @property
258 def number_items(self):
259 with self._lock:
260 return len(self._contents)
261
262 @property
263 def total_size(self):
264 with self._lock:
265 return sum(len(i) for i in self._contents.itervalues())
266
267 def cached_set(self):
268 with self._lock:
269 return set(self._contents)
270
271 def cleanup(self):
272 pass
273
274 def touch(self, digest, size):
275 with self._lock:
276 return digest in self._contents
277
278 def evict(self, digest):
279 with self._lock:
280 v = self._contents.pop(digest, None)
281 if v is not None:
282 self._evicted.add(v)
283
284 def getfileobj(self, digest):
285 with self._lock:
286 try:
287 d = self._contents[digest]
288 except KeyError:
289 raise CacheMiss(digest)
290 self._used.append(len(d))
291 return io.BytesIO(d)
292
293 def write(self, digest, content):
294 # Assemble whole stream before taking the lock.
295 data = ''.join(content)
296 with self._lock:
297 self._contents[digest] = data
298 self._added.append(len(data))
299 return digest
300
301 def trim(self):
302 """Trimming is not implemented for MemoryContentAddressedCache."""
303 return 0
304
305
306class DiskContentAddressedCache(ContentAddressedCache):
307 """Stateful LRU cache in a flat hash table in a directory.
308
309 Saves its state as json file.
310 """
311 STATE_FILE = u'state.json'
312
313 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
314 """
315 Arguments:
316 cache_dir: directory where to place the cache.
317 policies: CachePolicies instance, cache retention policies.
318 algo: hashing algorithm used.
319 trim: if True to enforce |policies| right away.
320 It can be done later by calling trim() explicitly.
321 """
322 # All protected methods (starting with '_') except _path should be called
323 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400324 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400325 self.policies = policies
326 self.hash_algo = hash_algo
327 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
328 # Items in a LRU lookup dict(digest: size).
329 self._lru = lru.LRUDict()
330 # Current cached free disk space. It is updated by self._trim().
331 file_path.ensure_tree(self.cache_dir)
332 self._free_disk = file_path.get_free_space(self.cache_dir)
333 # The first item in the LRU cache that must not be evicted during this run
334 # since it was referenced. All items more recent that _protected in the LRU
335 # cache are also inherently protected. It could be a set() of all items
336 # referenced but this increases memory usage without a use case.
337 self._protected = None
338 # Cleanup operations done by self._load(), if any.
339 self._operations = []
340 with tools.Profiler('Setup'):
341 with self._lock:
342 self._load(trim, time_fn)
343
344 def __contains__(self, digest):
345 with self._lock:
346 return digest in self._lru
347
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 @property
349 def number_items(self):
350 with self._lock:
351 return len(self._lru)
352
353 @property
354 def total_size(self):
355 with self._lock:
356 return sum(self._lru.itervalues())
357
358 def cached_set(self):
359 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000360 return set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def cleanup(self):
363 """Cleans up the cache directory.
364
365 Ensures there is no unknown files in cache_dir.
366 Ensures the read-only bits are set correctly.
367
368 At that point, the cache was already loaded, trimmed to respect cache
369 policies.
370 """
371 with self._lock:
372 fs.chmod(self.cache_dir, 0700)
373 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000374 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375 # It'd be faster if there were a readdir() function.
376 for filename in fs.listdir(self.cache_dir):
377 if filename == self.STATE_FILE:
378 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
379 continue
380 if filename in previous:
381 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
382 previous.remove(filename)
383 continue
384
385 # An untracked file. Delete it.
386 logging.warning('Removing unknown file %s from cache', filename)
387 p = self._path(filename)
388 if fs.isdir(p):
389 try:
390 file_path.rmtree(p)
391 except OSError:
392 pass
393 else:
394 file_path.try_remove(p)
395 continue
396
397 if previous:
398 # Filter out entries that were not found.
399 logging.warning('Removed %d lost files', len(previous))
400 for filename in previous:
401 self._lru.pop(filename)
402 self._save()
403
404 # What remains to be done is to hash every single item to
405 # detect corruption, then save to ensure state.json is up to date.
406 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
407 # TODO(maruel): Let's revisit once directory metadata is stored in
408 # state.json so only the files that had been mapped since the last cleanup()
409 # call are manually verified.
410 #
411 #with self._lock:
412 # for digest in self._lru:
413 # if not isolated_format.is_valid_hash(
414 # self._path(digest), self.hash_algo):
415 # self.evict(digest)
416 # logging.info('Deleted corrupted item: %s', digest)
417
418 def touch(self, digest, size):
419 """Verifies an actual file is valid and bumps its LRU position.
420
421 Returns False if the file is missing or invalid. Doesn't kick it from LRU
422 though (call 'evict' explicitly).
423
424 Note that is doesn't compute the hash so it could still be corrupted if the
425 file size didn't change.
426
427 TODO(maruel): More stringent verification while keeping the check fast.
428 """
429 # Do the check outside the lock.
430 if not is_valid_file(self._path(digest), size):
431 return False
432
433 # Update its LRU position.
434 with self._lock:
435 if digest not in self._lru:
436 return False
437 self._lru.touch(digest)
438 self._protected = self._protected or digest
439 return True
440
441 def evict(self, digest):
442 with self._lock:
443 # Do not check for 'digest == self._protected' since it could be because
444 # the object is corrupted.
445 self._lru.pop(digest)
446 self._delete_file(digest, UNKNOWN_FILE_SIZE)
447
448 def getfileobj(self, digest):
449 try:
450 f = fs.open(self._path(digest), 'rb')
451 with self._lock:
452 self._used.append(self._lru[digest])
453 return f
454 except IOError:
455 raise CacheMiss(digest)
456
457 def write(self, digest, content):
458 assert content is not None
459 with self._lock:
460 self._protected = self._protected or digest
461 path = self._path(digest)
462 # A stale broken file may remain. It is possible for the file to have write
463 # access bit removed which would cause the file_write() call to fail to open
464 # in write mode. Take no chance here.
465 file_path.try_remove(path)
466 try:
467 size = file_write(path, content)
468 except:
469 # There are two possible places were an exception can occur:
470 # 1) Inside |content| generator in case of network or unzipping errors.
471 # 2) Inside file_write itself in case of disk IO errors.
472 # In any case delete an incomplete file and propagate the exception to
473 # caller, it will be logged there.
474 file_path.try_remove(path)
475 raise
476 # Make the file read-only in the cache. This has a few side-effects since
477 # the file node is modified, so every directory entries to this file becomes
478 # read-only. It's fine here because it is a new file.
479 file_path.set_read_only(path, True)
480 with self._lock:
481 self._add(digest, size)
482 return digest
483
484 def get_oldest(self):
485 """Returns digest of the LRU item or None."""
486 try:
487 return self._lru.get_oldest()[0]
488 except KeyError:
489 return None
490
491 def get_timestamp(self, digest):
492 """Returns timestamp of last use of an item.
493
494 Raises KeyError if item is not found.
495 """
496 return self._lru.get_timestamp(digest)
497
498 def trim(self):
499 """Forces retention policies."""
500 with self._lock:
501 return self._trim()
502
503 def _load(self, trim, time_fn):
504 """Loads state of the cache from json file.
505
506 If cache_dir does not exist on disk, it is created.
507 """
508 self._lock.assert_locked()
509
510 if not fs.isfile(self.state_file):
511 if not fs.isdir(self.cache_dir):
512 fs.makedirs(self.cache_dir)
513 else:
514 # Load state of the cache.
515 try:
516 self._lru = lru.LRUDict.load(self.state_file)
517 except ValueError as err:
518 logging.error('Failed to load cache state: %s' % (err,))
519 # Don't want to keep broken state file.
520 file_path.try_remove(self.state_file)
521 if time_fn:
522 self._lru.time_fn = time_fn
523 if trim:
524 self._trim()
525 # We want the initial cache size after trimming, i.e. what is readily
526 # avaiable.
527 self._initial_number_items = len(self._lru)
528 self._initial_size = sum(self._lru.itervalues())
529 if self._evicted:
530 logging.info(
531 'Trimming evicted items with the following sizes: %s',
532 sorted(self._evicted))
533
534 def _save(self):
535 """Saves the LRU ordering."""
536 self._lock.assert_locked()
537 if sys.platform != 'win32':
538 d = os.path.dirname(self.state_file)
539 if fs.isdir(d):
540 # Necessary otherwise the file can't be created.
541 file_path.set_read_only(d, False)
542 if fs.isfile(self.state_file):
543 file_path.set_read_only(self.state_file, False)
544 self._lru.save(self.state_file)
545
546 def _trim(self):
547 """Trims anything we don't know, make sure enough free space exists."""
548 self._lock.assert_locked()
549
550 # Trim old items.
551 if self.policies.max_age_secs:
552 cutoff = self._lru.time_fn() - self.policies.max_age_secs
553 while self._lru:
554 oldest = self._lru.get_oldest()
555 if oldest[1][1] >= cutoff:
556 break
557 self._remove_lru_file(True)
558
559 # Ensure maximum cache size.
560 if self.policies.max_cache_size:
561 total_size = sum(self._lru.itervalues())
562 while total_size > self.policies.max_cache_size:
563 total_size -= self._remove_lru_file(True)
564
565 # Ensure maximum number of items in the cache.
566 if self.policies.max_items and len(self._lru) > self.policies.max_items:
567 for _ in xrange(len(self._lru) - self.policies.max_items):
568 self._remove_lru_file(True)
569
570 # Ensure enough free space.
571 self._free_disk = file_path.get_free_space(self.cache_dir)
572 trimmed_due_to_space = 0
573 while (
574 self.policies.min_free_space and
575 self._lru and
576 self._free_disk < self.policies.min_free_space):
577 trimmed_due_to_space += 1
578 self._remove_lru_file(True)
579
580 if trimmed_due_to_space:
581 total_usage = sum(self._lru.itervalues())
582 usage_percent = 0.
583 if total_usage:
584 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
585
586 logging.warning(
587 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,'
588 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)',
589 trimmed_due_to_space,
590 self._free_disk / 1024.,
591 total_usage / 1024.,
592 usage_percent,
593 self.policies.max_cache_size / 1024.)
594 self._save()
595 return trimmed_due_to_space
596
597 def _path(self, digest):
598 """Returns the path to one item."""
599 return os.path.join(self.cache_dir, digest)
600
601 def _remove_lru_file(self, allow_protected):
602 """Removes the lastest recently used file and returns its size."""
603 self._lock.assert_locked()
604 try:
605 digest, (size, _) = self._lru.get_oldest()
606 if not allow_protected and digest == self._protected:
607 total_size = sum(self._lru.itervalues())+size
608 msg = (
609 'Not enough space to fetch the whole isolated tree.\n'
610 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
611 self.policies, total_size, len(self._lru)+1, self._free_disk)
612 raise NoMoreSpace(msg)
613 except KeyError:
614 # That means an internal error.
615 raise NoMoreSpace('Nothing to remove, can\'t happend')
616 digest, (size, _) = self._lru.pop_oldest()
617 logging.debug('Removing LRU file %s', digest)
618 self._delete_file(digest, size)
619 return size
620
621 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
622 """Adds an item into LRU cache marking it as a newest one."""
623 self._lock.assert_locked()
624 if size == UNKNOWN_FILE_SIZE:
625 size = fs.stat(self._path(digest)).st_size
626 self._added.append(size)
627 self._lru.add(digest, size)
628 self._free_disk -= size
629 # Do a quicker version of self._trim(). It only enforces free disk space,
630 # not cache size limits. It doesn't actually look at real free disk space,
631 # only uses its cache values. self._trim() will be called later to enforce
632 # real trimming but doing this quick version here makes it possible to map
633 # an isolated that is larger than the current amount of free disk space when
634 # the cache size is already large.
635 while (
636 self.policies.min_free_space and
637 self._lru and
638 self._free_disk < self.policies.min_free_space):
639 if self._remove_lru_file(False) == -1:
640 break
641
642 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
643 """Deletes cache file from the file system."""
644 self._lock.assert_locked()
645 try:
646 if size == UNKNOWN_FILE_SIZE:
647 try:
648 size = fs.stat(self._path(digest)).st_size
649 except OSError:
650 size = 0
651 file_path.try_remove(self._path(digest))
652 self._evicted.append(size)
653 self._free_disk += size
654 except OSError as e:
655 if e.errno != errno.ENOENT:
656 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400657
658
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400659class NamedCache(Cache):
660 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400661
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400662 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400663 name is a short identifier that describes the contents of the cache, e.g.
664 "git_v8" could be all git repositories required by v8 builds, or
665 "build_chromium" could be build artefacts of the Chromium.
666 path is a directory path relative to the task run dir. Cache installation
667 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400668 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400669 _DIR_ALPHABET = string.ascii_letters + string.digits
670 STATE_FILE = u'state.json'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400671
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400672 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400673 """Initializes NamedCaches.
674
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400675 Arguments:
676 - cache_dir is a directory for persistent cache storage.
677 - policies is a CachePolicies instance.
678 - time_fn is a function that returns timestamp (float) and used to take
679 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400680 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400681 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400682 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000683 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400684 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
685 self._lru = lru.LRUDict()
686 if not fs.isdir(self.cache_dir):
687 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000688 elif os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000689 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400690 self._lru = lru.LRUDict.load(self.state_file)
691 except ValueError:
692 logging.exception('failed to load named cache state file')
693 logging.warning('deleting named caches')
694 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000695 with self._lock:
696 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400697 if time_fn:
698 self._lru.time_fn = time_fn
699
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000700 def _try_upgrade(self):
701 """Upgrades from the old format to the new one if necessary.
702
703 This code can be removed so all bots are known to have the right new format.
704 """
705 if not self._lru:
706 return
707 _name, data = self._lru.get_oldest()
708 if isinstance(data[0], (list, tuple)):
709 return
710 # Update to v2.
711 def upgrade(_name, rel_cache):
712 abs_cache = os.path.join(self.cache_dir, rel_cache)
713 return rel_cache, _get_recursive_size(abs_cache)
714 self._lru.transform(upgrade)
715 self._save()
716
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400717 def __len__(self):
718 """Returns number of items in the cache.
719
720 NamedCache must be open.
721 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400722 with self._lock:
723 return len(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400724
725 def get_oldest(self):
726 """Returns name of the LRU cache or None.
727
728 NamedCache must be open.
729 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400730 with self._lock:
731 try:
732 return self._lru.get_oldest()[0]
733 except KeyError:
734 return None
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400735
736 def get_timestamp(self, name):
737 """Returns timestamp of last use of an item.
738
739 NamedCache must be open.
740
741 Raises KeyError if cache is not found.
742 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400743 with self._lock:
744 assert isinstance(name, basestring), name
745 return self._lru.get_timestamp(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400746
747 @property
748 def available(self):
749 """Returns a set of names of available caches.
750
751 NamedCache must be open.
752 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400753 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000754 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400755
756 def install(self, path, name):
757 """Moves the directory for the specified named cache to |path|.
758
759 NamedCache must be open. path must be absolute, unicode and must not exist.
760
761 Raises NamedCacheError if cannot install the cache.
762 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400763 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400764 with self._lock:
765 try:
766 if os.path.isdir(path):
767 raise NamedCacheError(
768 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400769
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000770 if name in self._lru:
771 rel_cache, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400772 abs_cache = os.path.join(self.cache_dir, rel_cache)
773 if os.path.isdir(abs_cache):
774 logging.info('Moving %r to %r', abs_cache, path)
775 file_path.ensure_tree(os.path.dirname(path))
776 fs.rename(abs_cache, path)
777 self._remove(name)
778 return
779
780 logging.warning(
781 'directory for named cache %r does not exist at %s', name,
782 rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400783 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400784
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400785 # The named cache does not exist, create an empty directory.
786 # When uninstalling, we will move it back to the cache and create an
787 # an entry.
788 file_path.ensure_tree(path)
789 except (IOError, OSError) as ex:
790 raise NamedCacheError(
791 'cannot install cache named %r at %r: %s' % (
792 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000793 finally:
794 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400795
796 def uninstall(self, path, name):
797 """Moves the cache directory back. Opposite to install().
798
799 NamedCache must be open. path must be absolute and unicode.
800
801 Raises NamedCacheError if cannot uninstall the cache.
802 """
803 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804 with self._lock:
805 try:
806 if not os.path.isdir(path):
807 logging.warning(
808 'Directory %r does not exist anymore. Cache lost.', path)
809 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000811 if name in self._lru:
812 # This shouldn't happen but just remove the preexisting one and move
813 # on.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400814 logging.warning('overwriting an existing named cache %r', name)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000815 self._remove(name)
816 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400817
818 # Move the dir and create an entry for the named cache.
819 abs_cache = os.path.join(self.cache_dir, rel_cache)
820 logging.info('Moving %r to %r', path, abs_cache)
821 file_path.ensure_tree(os.path.dirname(abs_cache))
822 fs.rename(path, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000824 # That succeeded, calculate its new size.
825 size = _get_recursive_size(abs_cache)
826 if not size:
827 # Do not save empty named cache.
828 return
829 self._lru.add(name, (rel_cache, size))
830
831 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
832 # for user convenience.
833 named_path = self._get_named_path(name)
834 if os.path.exists(named_path):
835 file_path.remove(named_path)
836 else:
837 file_path.ensure_tree(os.path.dirname(named_path))
838
839 try:
840 fs.symlink(abs_cache, named_path)
841 logging.info('Created symlink %r to %r', named_path, abs_cache)
842 except OSError:
843 # Ignore on Windows. It happens when running as a normal user or when
844 # UAC is enabled and the user is a filtered administrator account.
845 if sys.platform != 'win32':
846 raise
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 except (IOError, OSError) as ex:
848 raise NamedCacheError(
849 'cannot uninstall cache named %r at %r: %s' % (
850 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000851 finally:
852 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400853
854 def trim(self):
855 """Purges cache entries that do not comply with the cache policies.
856
857 NamedCache must be open.
858
859 Returns:
860 Number of caches deleted.
861 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862 with self._lock:
863 if not os.path.isdir(self.cache_dir):
864 return 0
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400866 removed = []
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400867
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868 def _remove_lru_file():
869 """Removes the oldest LRU entry. LRU must not be empty."""
870 name, _data = self._lru.get_oldest()
871 logging.info('Removing named cache %r', name)
872 self._remove(name)
873 removed.append(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400874
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400875 # Trim according to maximum number of items.
876 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877 _remove_lru_file()
878
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 # Trim according to maximum age.
880 if self._policies.max_age_secs:
881 cutoff = self._lru.time_fn() - self._policies.max_age_secs
882 while self._lru:
883 _name, (_content, timestamp) = self._lru.get_oldest()
884 if timestamp >= cutoff:
885 break
886 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400888 # Trim according to minimum free space.
889 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000890 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400891 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000892 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 break
894 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400895
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000896 # Trim according to maximum total size.
897 if self._policies.max_cache_size:
898 while self._lru:
899 total = sum(size for _rel_cache, size in self._lru.itervalues())
900 if total <= self._policies.max_cache_size:
901 break
902 _remove_lru_file()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400903
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000904 self._save()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905 return len(removed)
906
907 def cleanup(self):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000908 # TODO(maruel): Implement. In particular, remove the unexpected files and
909 # directories!
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400910 pass
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400911
912 def _allocate_dir(self):
913 """Creates and returns relative path of a new cache directory."""
914 # We randomly generate directory names that have two lower/upper case
915 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
916 abc_len = len(self._DIR_ALPHABET)
917 tried = set()
918 while len(tried) < 1000:
919 i = random.randint(0, abc_len * abc_len - 1)
920 rel_path = (
921 self._DIR_ALPHABET[i / abc_len] +
922 self._DIR_ALPHABET[i % abc_len])
923 if rel_path in tried:
924 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400925 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400926 if not fs.exists(abs_path):
927 return rel_path
928 tried.add(rel_path)
929 raise NamedCacheError(
930 'could not allocate a new cache dir, too many cache dirs')
931
932 def _remove(self, name):
933 """Removes a cache directory and entry.
934
935 NamedCache must be open.
936
937 Returns:
938 Number of caches deleted.
939 """
940 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000941 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400942 named_dir = self._get_named_path(name)
943 if fs.islink(named_dir):
944 fs.unlink(named_dir)
945
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000946 # Then remove the actual data.
947 if name not in self._lru:
948 return
949 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400950 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400951 if os.path.isdir(abs_path):
952 file_path.rmtree(abs_path)
953 self._lru.pop(name)
954
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400955 def _save(self):
956 self._lock.assert_locked()
957 file_path.ensure_tree(self.cache_dir)
958 self._lru.save(self.state_file)
959
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400960 def _get_named_path(self, name):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400961 return os.path.join(self.cache_dir, 'named', name)