blob: fce241f539d65f66381ba4ffd6693082b32748ea [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
22
23
24# The file size to be used when we don't know the correct file size,
25# generally used for .isolated files.
26UNKNOWN_FILE_SIZE = None
27
28
29def file_write(path, content_generator):
30 """Writes file content as generated by content_generator.
31
32 Creates the intermediary directory as needed.
33
34 Returns the number of bytes written.
35
36 Meant to be mocked out in unit tests.
37 """
38 file_path.ensure_tree(os.path.dirname(path))
39 total = 0
40 with fs.open(path, 'wb') as f:
41 for d in content_generator:
42 total += len(d)
43 f.write(d)
44 return total
45
46
47def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000048 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040049
50 Currently it just checks the file exists and its size matches the expectation.
51 """
52 if size == UNKNOWN_FILE_SIZE:
53 return fs.isfile(path)
54 try:
55 actual_size = fs.stat(path).st_size
56 except OSError as e:
57 logging.warning(
58 'Can\'t read item %s, assuming it\'s invalid: %s',
59 os.path.basename(path), e)
60 return False
61 if size != actual_size:
62 logging.warning(
63 'Found invalid item %s; %d != %d',
64 os.path.basename(path), actual_size, size)
65 return False
66 return True
67
68
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000069def trim_caches(caches, path, min_free_space, max_age_secs):
70 """Trims multiple caches.
71
72 The goal here is to coherently trim all caches in a coherent LRU fashion,
73 deleting older items independent of which container they belong to.
74
75 Two policies are enforced first:
76 - max_age_secs
77 - min_free_space
78
79 Once that's done, then we enforce each cache's own policies.
80
81 Returns:
82 Slice containing the size of all items evicted.
83 """
84 min_ts = time.time() - max_age_secs if max_age_secs else 0
85 free_disk = file_path.get_free_space(path) if min_free_space else 0
86 total = []
87 if min_ts or free_disk:
88 while True:
89 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
90 if not oldest:
91 break
92 oldest.sort(key=lambda (_, ts): ts)
93 c, ts = oldest[0]
94 if ts >= min_ts and free_disk >= min_free_space:
95 break
96 total.append(c.remove_oldest())
97 if min_free_space:
98 free_disk = file_path.get_free_space(path)
99 # Evaluate each cache's own policies.
100 for c in caches:
101 total.extend(c.trim())
102 return total
103
104
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000105def _get_recursive_size(path):
106 """Returns the total data size for the specified path.
107
108 This function can be surprisingly slow on OSX, so its output should be cached.
109 """
110 try:
111 total = 0
112 for root, _, files in fs.walk(path):
113 for f in files:
114 total += fs.lstat(os.path.join(root, f)).st_size
115 return total
116 except (IOError, OSError, UnicodeEncodeError) as exc:
117 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
118 return None
119
120
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400121class NamedCacheError(Exception):
122 """Named cache specific error."""
123
124
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400125class NoMoreSpace(Exception):
126 """Not enough space to map the whole directory."""
127 pass
128
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400129
130class CachePolicies(object):
131 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
132 """Common caching policies for the multiple caches (isolated, named, cipd).
133
134 Arguments:
135 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
136 cache is effectively a leak.
137 - min_free_space: Trim if disk free space becomes lower than this value. If
138 0, it will unconditionally fill the disk.
139 - max_items: Maximum number of items to keep in the cache. If 0, do not
140 enforce a limit.
141 - max_age_secs: Maximum age an item is kept in the cache until it is
142 automatically evicted. Having a lot of dead luggage slows
143 everything down.
144 """
145 self.max_cache_size = max_cache_size
146 self.min_free_space = min_free_space
147 self.max_items = max_items
148 self.max_age_secs = max_age_secs
149
150 def __str__(self):
151 return (
152 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
153 'max_age_secs=%s)') % (
154 self.max_cache_size, self.max_items, self.min_free_space,
155 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400156
157
158class CacheMiss(Exception):
159 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400160 def __init__(self, digest):
161 self.digest = digest
162 super(CacheMiss, self).__init__(
163 'Item with digest %r is not found in cache' % digest)
164
165
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166class Cache(object):
167 def __init__(self, cache_dir):
168 if cache_dir is not None:
169 assert isinstance(cache_dir, unicode), cache_dir
170 assert file_path.isabs(cache_dir), cache_dir
171 self.cache_dir = cache_dir
172 self._lock = threading_utils.LockWithAssert()
173 # Profiling values.
174 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400175 self._used = []
176
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000177 def __len__(self):
178 """Returns the number of entries in the cache."""
179 raise NotImplementedError()
180
181 def __iter__(self):
182 """Iterates over all the entries names."""
183 raise NotImplementedError()
184
185 def __contains__(self, name):
186 """Returns if an entry is in the cache."""
187 raise NotImplementedError()
188
189 @property
190 def total_size(self):
191 """Returns the total size of the cache in bytes."""
192 raise NotImplementedError()
193
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400194 @property
195 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000196 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400197 with self._lock:
198 return self._added[:]
199
200 @property
201 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000202 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203 with self._lock:
204 return self._used[:]
205
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000206 def get_oldest(self):
207 """Returns timestamp of oldest cache entry or None.
208
209 Returns:
210 Timestamp of the oldest item.
211
212 Used for manual trimming.
213 """
214 raise NotImplementedError()
215
216 def remove_oldest(self):
217 """Removes the oldest item from the cache.
218
219 Returns:
220 Size of the oldest item.
221
222 Used for manual trimming.
223 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400224 raise NotImplementedError()
225
226 def trim(self):
227 """Enforces cache policies.
228
229 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000230 Slice with the size of evicted items.
231 """
232 raise NotImplementedError()
233
234 def cleanup(self):
235 """Deletes any corrupted item from the cache and trims it if necessary.
236
237 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 """
239 raise NotImplementedError()
240
241
242class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400243 """Content addressed cache that stores objects temporarily.
244
245 It can be accessed concurrently from multiple threads, so it should protect
246 its internal state with some lock.
247 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400248
249 def __enter__(self):
250 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000251 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400252 return self
253
254 def __exit__(self, _exc_type, _exec_value, _traceback):
255 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000256 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400257 return False
258
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400259 def touch(self, digest, size):
260 """Ensures item is not corrupted and updates its LRU position.
261
262 Arguments:
263 digest: hash digest of item to check.
264 size: expected size of this item.
265
266 Returns:
267 True if item is in cache and not corrupted.
268 """
269 raise NotImplementedError()
270
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400271 def getfileobj(self, digest):
272 """Returns a readable file like object.
273
274 If file exists on the file system it will have a .name attribute with an
275 absolute path to the file.
276 """
277 raise NotImplementedError()
278
279 def write(self, digest, content):
280 """Reads data from |content| generator and stores it in cache.
281
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000282 It is possible to write to an object that already exists. It may be
283 ignored (sent to /dev/null) but the timestamp is still updated.
284
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400285 Returns digest to simplify chaining.
286 """
287 raise NotImplementedError()
288
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400289
290class MemoryContentAddressedCache(ContentAddressedCache):
291 """ContentAddressedCache implementation that stores everything in memory."""
292
293 def __init__(self, file_mode_mask=0500):
294 """Args:
295 file_mode_mask: bit mask to AND file mode with. Default value will make
296 all mapped files to be read only.
297 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400298 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400299 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # Items in a LRU lookup dict(digest: size).
301 self._lru = lru.LRUDict()
302
303 # Cache interface implementation.
304
305 def __len__(self):
306 with self._lock:
307 return len(self._lru)
308
309 def __iter__(self):
310 # This is not thread-safe.
311 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400312
313 def __contains__(self, digest):
314 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000315 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400316
317 @property
318 def total_size(self):
319 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000320 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000322 def get_oldest(self):
323 with self._lock:
324 try:
325 # (key, (value, ts))
326 return self._lru.get_oldest()[1][1]
327 except KeyError:
328 return None
329
330 def remove_oldest(self):
331 with self._lock:
332 # TODO(maruel): Update self._added.
333 # (key, (value, ts))
334 return len(self._lru.pop_oldest()[1][0])
335
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000336 def trim(self):
337 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000338 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400339
340 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000341 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400342 pass
343
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000344 # ContentAddressedCache interface implementation.
345
346 def __contains__(self, digest):
347 with self._lock:
348 return digest in self._lru
349
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400350 def touch(self, digest, size):
351 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000352 try:
353 self._lru.touch(digest)
354 except KeyError:
355 return False
356 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400358 def getfileobj(self, digest):
359 with self._lock:
360 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000361 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400362 except KeyError:
363 raise CacheMiss(digest)
364 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000365 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400366 return io.BytesIO(d)
367
368 def write(self, digest, content):
369 # Assemble whole stream before taking the lock.
370 data = ''.join(content)
371 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000372 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400373 self._added.append(len(data))
374 return digest
375
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376
377class DiskContentAddressedCache(ContentAddressedCache):
378 """Stateful LRU cache in a flat hash table in a directory.
379
380 Saves its state as json file.
381 """
382 STATE_FILE = u'state.json'
383
384 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
385 """
386 Arguments:
387 cache_dir: directory where to place the cache.
388 policies: CachePolicies instance, cache retention policies.
389 algo: hashing algorithm used.
390 trim: if True to enforce |policies| right away.
391 It can be done later by calling trim() explicitly.
392 """
393 # All protected methods (starting with '_') except _path should be called
394 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400395 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396 self.policies = policies
397 self.hash_algo = hash_algo
398 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
399 # Items in a LRU lookup dict(digest: size).
400 self._lru = lru.LRUDict()
401 # Current cached free disk space. It is updated by self._trim().
402 file_path.ensure_tree(self.cache_dir)
403 self._free_disk = file_path.get_free_space(self.cache_dir)
404 # The first item in the LRU cache that must not be evicted during this run
405 # since it was referenced. All items more recent that _protected in the LRU
406 # cache are also inherently protected. It could be a set() of all items
407 # referenced but this increases memory usage without a use case.
408 self._protected = None
409 # Cleanup operations done by self._load(), if any.
410 self._operations = []
411 with tools.Profiler('Setup'):
412 with self._lock:
413 self._load(trim, time_fn)
414
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000415 # Cache interface implementation.
416
417 def __len__(self):
418 with self._lock:
419 return len(self._lru)
420
421 def __iter__(self):
422 # This is not thread-safe.
423 return self._lru.__iter__()
424
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 def __contains__(self, digest):
426 with self._lock:
427 return digest in self._lru
428
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400429 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400430 def total_size(self):
431 with self._lock:
432 return sum(self._lru.itervalues())
433
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000434 def get_oldest(self):
435 with self._lock:
436 try:
437 # (key, (value, ts))
438 return self._lru.get_oldest()[1][1]
439 except KeyError:
440 return None
441
442 def remove_oldest(self):
443 with self._lock:
444 # TODO(maruel): Update self._added.
445 return self._remove_lru_file(True)
446
447 def trim(self):
448 """Forces retention policies."""
449 with self._lock:
450 return self._trim()
451
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400452 def cleanup(self):
453 """Cleans up the cache directory.
454
455 Ensures there is no unknown files in cache_dir.
456 Ensures the read-only bits are set correctly.
457
458 At that point, the cache was already loaded, trimmed to respect cache
459 policies.
460 """
461 with self._lock:
462 fs.chmod(self.cache_dir, 0700)
463 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000464 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400465 # It'd be faster if there were a readdir() function.
466 for filename in fs.listdir(self.cache_dir):
467 if filename == self.STATE_FILE:
468 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
469 continue
470 if filename in previous:
471 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
472 previous.remove(filename)
473 continue
474
475 # An untracked file. Delete it.
476 logging.warning('Removing unknown file %s from cache', filename)
477 p = self._path(filename)
478 if fs.isdir(p):
479 try:
480 file_path.rmtree(p)
481 except OSError:
482 pass
483 else:
484 file_path.try_remove(p)
485 continue
486
487 if previous:
488 # Filter out entries that were not found.
489 logging.warning('Removed %d lost files', len(previous))
490 for filename in previous:
491 self._lru.pop(filename)
492 self._save()
493
494 # What remains to be done is to hash every single item to
495 # detect corruption, then save to ensure state.json is up to date.
496 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
497 # TODO(maruel): Let's revisit once directory metadata is stored in
498 # state.json so only the files that had been mapped since the last cleanup()
499 # call are manually verified.
500 #
501 #with self._lock:
502 # for digest in self._lru:
503 # if not isolated_format.is_valid_hash(
504 # self._path(digest), self.hash_algo):
505 # self.evict(digest)
506 # logging.info('Deleted corrupted item: %s', digest)
507
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000508 # ContentAddressedCache interface implementation.
509
510 def __contains__(self, digest):
511 with self._lock:
512 return digest in self._lru
513
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400514 def touch(self, digest, size):
515 """Verifies an actual file is valid and bumps its LRU position.
516
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000517 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400518
519 Note that is doesn't compute the hash so it could still be corrupted if the
520 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400521 """
522 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000523 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400524
525 # Update its LRU position.
526 with self._lock:
527 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000528 if looks_valid:
529 # Exists but not in the LRU anymore.
530 self._delete_file(digest, size)
531 return False
532 if not looks_valid:
533 self._lru.pop(digest)
534 # Exists but not in the LRU anymore.
535 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536 return False
537 self._lru.touch(digest)
538 self._protected = self._protected or digest
539 return True
540
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400541 def getfileobj(self, digest):
542 try:
543 f = fs.open(self._path(digest), 'rb')
544 with self._lock:
545 self._used.append(self._lru[digest])
546 return f
547 except IOError:
548 raise CacheMiss(digest)
549
550 def write(self, digest, content):
551 assert content is not None
552 with self._lock:
553 self._protected = self._protected or digest
554 path = self._path(digest)
555 # A stale broken file may remain. It is possible for the file to have write
556 # access bit removed which would cause the file_write() call to fail to open
557 # in write mode. Take no chance here.
558 file_path.try_remove(path)
559 try:
560 size = file_write(path, content)
561 except:
562 # There are two possible places were an exception can occur:
563 # 1) Inside |content| generator in case of network or unzipping errors.
564 # 2) Inside file_write itself in case of disk IO errors.
565 # In any case delete an incomplete file and propagate the exception to
566 # caller, it will be logged there.
567 file_path.try_remove(path)
568 raise
569 # Make the file read-only in the cache. This has a few side-effects since
570 # the file node is modified, so every directory entries to this file becomes
571 # read-only. It's fine here because it is a new file.
572 file_path.set_read_only(path, True)
573 with self._lock:
574 self._add(digest, size)
575 return digest
576
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000577 # Internal functions.
578
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400579 def _load(self, trim, time_fn):
580 """Loads state of the cache from json file.
581
582 If cache_dir does not exist on disk, it is created.
583 """
584 self._lock.assert_locked()
585
586 if not fs.isfile(self.state_file):
587 if not fs.isdir(self.cache_dir):
588 fs.makedirs(self.cache_dir)
589 else:
590 # Load state of the cache.
591 try:
592 self._lru = lru.LRUDict.load(self.state_file)
593 except ValueError as err:
594 logging.error('Failed to load cache state: %s' % (err,))
595 # Don't want to keep broken state file.
596 file_path.try_remove(self.state_file)
597 if time_fn:
598 self._lru.time_fn = time_fn
599 if trim:
600 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400601
602 def _save(self):
603 """Saves the LRU ordering."""
604 self._lock.assert_locked()
605 if sys.platform != 'win32':
606 d = os.path.dirname(self.state_file)
607 if fs.isdir(d):
608 # Necessary otherwise the file can't be created.
609 file_path.set_read_only(d, False)
610 if fs.isfile(self.state_file):
611 file_path.set_read_only(self.state_file, False)
612 self._lru.save(self.state_file)
613
614 def _trim(self):
615 """Trims anything we don't know, make sure enough free space exists."""
616 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000617 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400618
619 # Trim old items.
620 if self.policies.max_age_secs:
621 cutoff = self._lru.time_fn() - self.policies.max_age_secs
622 while self._lru:
623 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000624 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400625 if oldest[1][1] >= cutoff:
626 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000627 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 # Ensure maximum cache size.
630 if self.policies.max_cache_size:
631 total_size = sum(self._lru.itervalues())
632 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000633 e = self._remove_lru_file(True)
634 evicted.append(e)
635 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400636
637 # Ensure maximum number of items in the cache.
638 if self.policies.max_items and len(self._lru) > self.policies.max_items:
639 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000640 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400641
642 # Ensure enough free space.
643 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400644 while (
645 self.policies.min_free_space and
646 self._lru and
647 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000648 # self._free_disk is updated by this call.
649 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400650
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000651 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400652 total_usage = sum(self._lru.itervalues())
653 usage_percent = 0.
654 if total_usage:
655 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
656
657 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000658 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
659 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
660 '%.1fkb)',
661 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662 self._free_disk / 1024.,
663 total_usage / 1024.,
664 usage_percent,
665 self.policies.max_cache_size / 1024.)
666 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000667 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400668
669 def _path(self, digest):
670 """Returns the path to one item."""
671 return os.path.join(self.cache_dir, digest)
672
673 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000674 """Removes the lastest recently used file and returns its size.
675
676 Updates self._free_disk.
677 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 self._lock.assert_locked()
679 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000680 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400681 if not allow_protected and digest == self._protected:
682 total_size = sum(self._lru.itervalues())+size
683 msg = (
684 'Not enough space to fetch the whole isolated tree.\n'
685 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
686 self.policies, total_size, len(self._lru)+1, self._free_disk)
687 raise NoMoreSpace(msg)
688 except KeyError:
689 # That means an internal error.
690 raise NoMoreSpace('Nothing to remove, can\'t happend')
691 digest, (size, _) = self._lru.pop_oldest()
692 logging.debug('Removing LRU file %s', digest)
693 self._delete_file(digest, size)
694 return size
695
696 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
697 """Adds an item into LRU cache marking it as a newest one."""
698 self._lock.assert_locked()
699 if size == UNKNOWN_FILE_SIZE:
700 size = fs.stat(self._path(digest)).st_size
701 self._added.append(size)
702 self._lru.add(digest, size)
703 self._free_disk -= size
704 # Do a quicker version of self._trim(). It only enforces free disk space,
705 # not cache size limits. It doesn't actually look at real free disk space,
706 # only uses its cache values. self._trim() will be called later to enforce
707 # real trimming but doing this quick version here makes it possible to map
708 # an isolated that is larger than the current amount of free disk space when
709 # the cache size is already large.
710 while (
711 self.policies.min_free_space and
712 self._lru and
713 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400715 if self._remove_lru_file(False) == -1:
716 break
717
718 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000719 """Deletes cache file from the file system.
720
721 Updates self._free_disk.
722 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400723 self._lock.assert_locked()
724 try:
725 if size == UNKNOWN_FILE_SIZE:
726 try:
727 size = fs.stat(self._path(digest)).st_size
728 except OSError:
729 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000730 if file_path.try_remove(self._path(digest)):
731 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400732 except OSError as e:
733 if e.errno != errno.ENOENT:
734 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400735
736
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400737class NamedCache(Cache):
738 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400739
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400740 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400741 name is a short identifier that describes the contents of the cache, e.g.
742 "git_v8" could be all git repositories required by v8 builds, or
743 "build_chromium" could be build artefacts of the Chromium.
744 path is a directory path relative to the task run dir. Cache installation
745 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400746 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400747 _DIR_ALPHABET = string.ascii_letters + string.digits
748 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000749 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400750
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400751 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400752 """Initializes NamedCaches.
753
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400754 Arguments:
755 - cache_dir is a directory for persistent cache storage.
756 - policies is a CachePolicies instance.
757 - time_fn is a function that returns timestamp (float) and used to take
758 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400759 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400760 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400761 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000762 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400763 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
764 self._lru = lru.LRUDict()
765 if not fs.isdir(self.cache_dir):
766 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000767 elif os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000768 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400769 self._lru = lru.LRUDict.load(self.state_file)
770 except ValueError:
771 logging.exception('failed to load named cache state file')
772 logging.warning('deleting named caches')
773 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000774 with self._lock:
775 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400776 if time_fn:
777 self._lru.time_fn = time_fn
778
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400779 @property
780 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000781 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400782 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000783 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400784
785 def install(self, path, name):
786 """Moves the directory for the specified named cache to |path|.
787
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000788 path must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400789
790 Raises NamedCacheError if cannot install the cache.
791 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400792 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400793 with self._lock:
794 try:
795 if os.path.isdir(path):
796 raise NamedCacheError(
797 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400798
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000799 link_name = os.path.join(self.cache_dir, name)
800 if fs.exists(link_name):
801 fs.rmtree(link_name)
802
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000803 if name in self._lru:
804 rel_cache, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400805 abs_cache = os.path.join(self.cache_dir, rel_cache)
806 if os.path.isdir(abs_cache):
807 logging.info('Moving %r to %r', abs_cache, path)
808 file_path.ensure_tree(os.path.dirname(path))
809 fs.rename(abs_cache, path)
810 self._remove(name)
811 return
812
813 logging.warning(
814 'directory for named cache %r does not exist at %s', name,
815 rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400816 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818 # The named cache does not exist, create an empty directory.
819 # When uninstalling, we will move it back to the cache and create an
820 # an entry.
821 file_path.ensure_tree(path)
822 except (IOError, OSError) as ex:
823 raise NamedCacheError(
824 'cannot install cache named %r at %r: %s' % (
825 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000826 finally:
827 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
829 def uninstall(self, path, name):
830 """Moves the cache directory back. Opposite to install().
831
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000832 path must be absolute and unicode.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400833
834 Raises NamedCacheError if cannot uninstall the cache.
835 """
836 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 with self._lock:
838 try:
839 if not os.path.isdir(path):
840 logging.warning(
841 'Directory %r does not exist anymore. Cache lost.', path)
842 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400843
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000844 if name in self._lru:
845 # This shouldn't happen but just remove the preexisting one and move
846 # on.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 logging.warning('overwriting an existing named cache %r', name)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000848 self._remove(name)
849 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400850
851 # Move the dir and create an entry for the named cache.
852 abs_cache = os.path.join(self.cache_dir, rel_cache)
853 logging.info('Moving %r to %r', path, abs_cache)
854 file_path.ensure_tree(os.path.dirname(abs_cache))
855 fs.rename(path, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400856
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000857 # That succeeded, calculate its new size.
858 size = _get_recursive_size(abs_cache)
859 if not size:
860 # Do not save empty named cache.
861 return
862 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000863 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000864
865 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
866 # for user convenience.
867 named_path = self._get_named_path(name)
868 if os.path.exists(named_path):
869 file_path.remove(named_path)
870 else:
871 file_path.ensure_tree(os.path.dirname(named_path))
872
873 try:
874 fs.symlink(abs_cache, named_path)
875 logging.info('Created symlink %r to %r', named_path, abs_cache)
876 except OSError:
877 # Ignore on Windows. It happens when running as a normal user or when
878 # UAC is enabled and the user is a filtered administrator account.
879 if sys.platform != 'win32':
880 raise
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400881 except (IOError, OSError) as ex:
882 raise NamedCacheError(
883 'cannot uninstall cache named %r at %r: %s' % (
884 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000885 finally:
886 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000888 # Cache interface implementation.
889
890 def __len__(self):
891 with self._lock:
892 return len(self._lru)
893
894 def __iter__(self):
895 # This is not thread-safe.
896 return self._lru.__iter__()
897
898 def __contains__(self, digest):
899 with self._lock:
900 return digest in self._lru
901
902 @property
903 def total_size(self):
904 with self._lock:
905 return sum(size for _rel_path, size in self._lru.itervalues())
906
907 def get_oldest(self):
908 with self._lock:
909 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000910 # (key, (value, ts))
911 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000912 except KeyError:
913 return None
914
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000915 def remove_oldest(self):
916 with self._lock:
917 # TODO(maruel): Update self._added.
918 return self._remove_lru_item()
919
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400920 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000921 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400922 with self._lock:
923 if not os.path.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000924 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400925
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400926 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000927 if self._policies.max_items:
928 while len(self._lru) > self._policies.max_items:
929 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400930
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400931 # Trim according to maximum age.
932 if self._policies.max_age_secs:
933 cutoff = self._lru.time_fn() - self._policies.max_age_secs
934 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000935 _name, (_data, ts) = self._lru.get_oldest()
936 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000938 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400939
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940 # Trim according to minimum free space.
941 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400943 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000944 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400945 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000946 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400947
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000948 # Trim according to maximum total size.
949 if self._policies.max_cache_size:
950 while self._lru:
951 total = sum(size for _rel_cache, size in self._lru.itervalues())
952 if total <= self._policies.max_cache_size:
953 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000954 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400955
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000956 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000957 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400958
959 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000960 """Removes unknown directories.
961
962 Does not recalculate the cache size since it's surprisingly slow on some
963 OSes.
964 """
965 success = True
966 with self._lock:
967 try:
968 actual = set(fs.listdir(self.cache_dir))
969 actual.discard(self.NAMED_DIR)
970 actual.discard(self.STATE_FILE)
971 expected = {v[0]: k for k, v in self._lru.iteritems()}
972 # First, handle the actual cache content.
973 # Remove missing entries.
974 for missing in (set(expected) - actual):
975 self._lru.pop(expected[missing])
976 # Remove unexpected items.
977 for unexpected in (actual - set(expected)):
978 try:
979 p = os.path.join(self.cache_dir, unexpected)
980 if fs.isdir(p):
981 file_path.rmtree(p)
982 else:
983 fs.remove(p)
984 except (IOError, OSError) as e:
985 logging.error('Failed to remove %s: %s', unexpected, e)
986 success = False
987
988 # Second, fix named cache links.
989 named = os.path.join(self.cache_dir, self.NAMED_DIR)
990 if os.path.isdir(named):
991 actual = set(fs.listdir(named))
992 expected = set(self._lru)
993 # Confirm entries. Do not add missing ones for now.
994 for name in expected.intersection(actual):
995 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
996 expected_link = os.path.join(self.cache_dir, self._lru[name][0])
997 if fs.islink(p):
Marc-Antoine Ruel5d5ec6d2018-06-18 13:31:34 +0000998 if sys.platform == 'win32':
999 # TODO(maruel): Implement readlink() on Windows in fs.py, then
1000 # remove this condition.
1001 # https://crbug.com/853721
1002 continue
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001003 link = fs.readlink(p)
1004 if expected_link == link:
1005 continue
1006 logging.warning(
1007 'Unexpected symlink for cache %s: %s, expected %s',
1008 name, link, expected_link)
1009 else:
1010 logging.warning('Unexpected non symlink for cache %s', name)
1011 if fs.isdir(p):
1012 file_path.rmtree(p)
1013 else:
1014 fs.remove(p)
1015 # Remove unexpected items.
1016 for unexpected in (actual - expected):
1017 try:
1018 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1019 if fs.isdir(p):
1020 file_path.rmtree(p)
1021 else:
1022 fs.remove(p)
1023 except (IOError, OSError) as e:
1024 logging.error('Failed to remove %s: %s', unexpected, e)
1025 success = False
1026 finally:
1027 self._save()
1028 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001029
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001030 # Internal functions.
1031
1032 def _try_upgrade(self):
1033 """Upgrades from the old format to the new one if necessary.
1034
1035 This code can be removed so all bots are known to have the right new format.
1036 """
1037 if not self._lru:
1038 return
1039 _name, (data, _ts) = self._lru.get_oldest()
1040 if isinstance(data, (list, tuple)):
1041 return
1042 # Update to v2.
1043 def upgrade(_name, rel_cache):
1044 abs_cache = os.path.join(self.cache_dir, rel_cache)
1045 return rel_cache, _get_recursive_size(abs_cache)
1046 self._lru.transform(upgrade)
1047 self._save()
1048
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001049 def _remove_lru_item(self):
1050 """Removes the oldest LRU entry. LRU must not be empty."""
1051 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1052 logging.info('Removing named cache %r', name)
1053 self._remove(name)
1054 return size
1055
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001056 def _allocate_dir(self):
1057 """Creates and returns relative path of a new cache directory."""
1058 # We randomly generate directory names that have two lower/upper case
1059 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1060 abc_len = len(self._DIR_ALPHABET)
1061 tried = set()
1062 while len(tried) < 1000:
1063 i = random.randint(0, abc_len * abc_len - 1)
1064 rel_path = (
1065 self._DIR_ALPHABET[i / abc_len] +
1066 self._DIR_ALPHABET[i % abc_len])
1067 if rel_path in tried:
1068 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001069 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001070 if not fs.exists(abs_path):
1071 return rel_path
1072 tried.add(rel_path)
1073 raise NamedCacheError(
1074 'could not allocate a new cache dir, too many cache dirs')
1075
1076 def _remove(self, name):
1077 """Removes a cache directory and entry.
1078
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001079 Returns:
1080 Number of caches deleted.
1081 """
1082 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001083 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001084 named_dir = self._get_named_path(name)
1085 if fs.islink(named_dir):
1086 fs.unlink(named_dir)
1087
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001088 # Then remove the actual data.
1089 if name not in self._lru:
1090 return
1091 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001092 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001093 if os.path.isdir(abs_path):
1094 file_path.rmtree(abs_path)
1095 self._lru.pop(name)
1096
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001097 def _save(self):
1098 self._lock.assert_locked()
1099 file_path.ensure_tree(self.cache_dir)
1100 self._lru.save(self.state_file)
1101
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001102 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001103 return os.path.join(self.cache_dir, self.NAMED_DIR, name)