blob: 0049e402f8f1c4a702aa442bc2d21c6560f28dc6 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
22
23
24# The file size to be used when we don't know the correct file size,
25# generally used for .isolated files.
26UNKNOWN_FILE_SIZE = None
27
28
29def file_write(path, content_generator):
30 """Writes file content as generated by content_generator.
31
32 Creates the intermediary directory as needed.
33
34 Returns the number of bytes written.
35
36 Meant to be mocked out in unit tests.
37 """
38 file_path.ensure_tree(os.path.dirname(path))
39 total = 0
40 with fs.open(path, 'wb') as f:
41 for d in content_generator:
42 total += len(d)
43 f.write(d)
44 return total
45
46
47def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000048 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040049
50 Currently it just checks the file exists and its size matches the expectation.
51 """
52 if size == UNKNOWN_FILE_SIZE:
53 return fs.isfile(path)
54 try:
55 actual_size = fs.stat(path).st_size
56 except OSError as e:
57 logging.warning(
58 'Can\'t read item %s, assuming it\'s invalid: %s',
59 os.path.basename(path), e)
60 return False
61 if size != actual_size:
62 logging.warning(
63 'Found invalid item %s; %d != %d',
64 os.path.basename(path), actual_size, size)
65 return False
66 return True
67
68
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000069def trim_caches(caches, path, min_free_space, max_age_secs):
70 """Trims multiple caches.
71
72 The goal here is to coherently trim all caches in a coherent LRU fashion,
73 deleting older items independent of which container they belong to.
74
75 Two policies are enforced first:
76 - max_age_secs
77 - min_free_space
78
79 Once that's done, then we enforce each cache's own policies.
80
81 Returns:
82 Slice containing the size of all items evicted.
83 """
84 min_ts = time.time() - max_age_secs if max_age_secs else 0
85 free_disk = file_path.get_free_space(path) if min_free_space else 0
86 total = []
87 if min_ts or free_disk:
88 while True:
89 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
90 if not oldest:
91 break
92 oldest.sort(key=lambda (_, ts): ts)
93 c, ts = oldest[0]
94 if ts >= min_ts and free_disk >= min_free_space:
95 break
96 total.append(c.remove_oldest())
97 if min_free_space:
98 free_disk = file_path.get_free_space(path)
99 # Evaluate each cache's own policies.
100 for c in caches:
101 total.extend(c.trim())
102 return total
103
104
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000105def _get_recursive_size(path):
106 """Returns the total data size for the specified path.
107
108 This function can be surprisingly slow on OSX, so its output should be cached.
109 """
110 try:
111 total = 0
112 for root, _, files in fs.walk(path):
113 for f in files:
114 total += fs.lstat(os.path.join(root, f)).st_size
115 return total
116 except (IOError, OSError, UnicodeEncodeError) as exc:
117 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
118 return None
119
120
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400121class NamedCacheError(Exception):
122 """Named cache specific error."""
123
124
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400125class NoMoreSpace(Exception):
126 """Not enough space to map the whole directory."""
127 pass
128
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400129
130class CachePolicies(object):
131 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
132 """Common caching policies for the multiple caches (isolated, named, cipd).
133
134 Arguments:
135 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
136 cache is effectively a leak.
137 - min_free_space: Trim if disk free space becomes lower than this value. If
138 0, it will unconditionally fill the disk.
139 - max_items: Maximum number of items to keep in the cache. If 0, do not
140 enforce a limit.
141 - max_age_secs: Maximum age an item is kept in the cache until it is
142 automatically evicted. Having a lot of dead luggage slows
143 everything down.
144 """
145 self.max_cache_size = max_cache_size
146 self.min_free_space = min_free_space
147 self.max_items = max_items
148 self.max_age_secs = max_age_secs
149
150 def __str__(self):
151 return (
152 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
153 'max_age_secs=%s)') % (
154 self.max_cache_size, self.max_items, self.min_free_space,
155 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400156
157
158class CacheMiss(Exception):
159 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400160 def __init__(self, digest):
161 self.digest = digest
162 super(CacheMiss, self).__init__(
163 'Item with digest %r is not found in cache' % digest)
164
165
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166class Cache(object):
167 def __init__(self, cache_dir):
168 if cache_dir is not None:
169 assert isinstance(cache_dir, unicode), cache_dir
170 assert file_path.isabs(cache_dir), cache_dir
171 self.cache_dir = cache_dir
172 self._lock = threading_utils.LockWithAssert()
173 # Profiling values.
174 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400175 self._used = []
176
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000177 def __len__(self):
178 """Returns the number of entries in the cache."""
179 raise NotImplementedError()
180
181 def __iter__(self):
182 """Iterates over all the entries names."""
183 raise NotImplementedError()
184
185 def __contains__(self, name):
186 """Returns if an entry is in the cache."""
187 raise NotImplementedError()
188
189 @property
190 def total_size(self):
191 """Returns the total size of the cache in bytes."""
192 raise NotImplementedError()
193
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400194 @property
195 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000196 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400197 with self._lock:
198 return self._added[:]
199
200 @property
201 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000202 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203 with self._lock:
204 return self._used[:]
205
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000206 def get_oldest(self):
207 """Returns timestamp of oldest cache entry or None.
208
209 Returns:
210 Timestamp of the oldest item.
211
212 Used for manual trimming.
213 """
214 raise NotImplementedError()
215
216 def remove_oldest(self):
217 """Removes the oldest item from the cache.
218
219 Returns:
220 Size of the oldest item.
221
222 Used for manual trimming.
223 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400224 raise NotImplementedError()
225
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000226 def save(self):
227 """Saves the current cache to disk."""
228 raise NotImplementedError()
229
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400230 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000231 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400232
233 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000234 Slice with the size of evicted items.
235 """
236 raise NotImplementedError()
237
238 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000239 """Deletes any corrupted item from the cache, then calls trim(), then
240 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000241
242 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400243 """
244 raise NotImplementedError()
245
246
247class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400248 """Content addressed cache that stores objects temporarily.
249
250 It can be accessed concurrently from multiple threads, so it should protect
251 its internal state with some lock.
252 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400253
254 def __enter__(self):
255 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000256 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400257 return self
258
259 def __exit__(self, _exc_type, _exec_value, _traceback):
260 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000261 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400262 return False
263
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400264 def touch(self, digest, size):
265 """Ensures item is not corrupted and updates its LRU position.
266
267 Arguments:
268 digest: hash digest of item to check.
269 size: expected size of this item.
270
271 Returns:
272 True if item is in cache and not corrupted.
273 """
274 raise NotImplementedError()
275
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400276 def getfileobj(self, digest):
277 """Returns a readable file like object.
278
279 If file exists on the file system it will have a .name attribute with an
280 absolute path to the file.
281 """
282 raise NotImplementedError()
283
284 def write(self, digest, content):
285 """Reads data from |content| generator and stores it in cache.
286
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000287 It is possible to write to an object that already exists. It may be
288 ignored (sent to /dev/null) but the timestamp is still updated.
289
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400290 Returns digest to simplify chaining.
291 """
292 raise NotImplementedError()
293
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400294
295class MemoryContentAddressedCache(ContentAddressedCache):
296 """ContentAddressedCache implementation that stores everything in memory."""
297
298 def __init__(self, file_mode_mask=0500):
299 """Args:
300 file_mode_mask: bit mask to AND file mode with. Default value will make
301 all mapped files to be read only.
302 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400303 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # Items in a LRU lookup dict(digest: size).
306 self._lru = lru.LRUDict()
307
308 # Cache interface implementation.
309
310 def __len__(self):
311 with self._lock:
312 return len(self._lru)
313
314 def __iter__(self):
315 # This is not thread-safe.
316 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400317
318 def __contains__(self, digest):
319 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000320 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321
322 @property
323 def total_size(self):
324 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000325 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400326
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000327 def get_oldest(self):
328 with self._lock:
329 try:
330 # (key, (value, ts))
331 return self._lru.get_oldest()[1][1]
332 except KeyError:
333 return None
334
335 def remove_oldest(self):
336 with self._lock:
337 # TODO(maruel): Update self._added.
338 # (key, (value, ts))
339 return len(self._lru.pop_oldest()[1][0])
340
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000341 def save(self):
342 pass
343
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000344 def trim(self):
345 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000346 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400347
348 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400350 pass
351
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000352 # ContentAddressedCache interface implementation.
353
354 def __contains__(self, digest):
355 with self._lock:
356 return digest in self._lru
357
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400358 def touch(self, digest, size):
359 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000360 try:
361 self._lru.touch(digest)
362 except KeyError:
363 return False
364 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400366 def getfileobj(self, digest):
367 with self._lock:
368 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000369 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370 except KeyError:
371 raise CacheMiss(digest)
372 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000373 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374 return io.BytesIO(d)
375
376 def write(self, digest, content):
377 # Assemble whole stream before taking the lock.
378 data = ''.join(content)
379 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000380 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400381 self._added.append(len(data))
382 return digest
383
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384
385class DiskContentAddressedCache(ContentAddressedCache):
386 """Stateful LRU cache in a flat hash table in a directory.
387
388 Saves its state as json file.
389 """
390 STATE_FILE = u'state.json'
391
392 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
393 """
394 Arguments:
395 cache_dir: directory where to place the cache.
396 policies: CachePolicies instance, cache retention policies.
397 algo: hashing algorithm used.
398 trim: if True to enforce |policies| right away.
399 It can be done later by calling trim() explicitly.
400 """
401 # All protected methods (starting with '_') except _path should be called
402 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400403 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 self.policies = policies
405 self.hash_algo = hash_algo
406 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
407 # Items in a LRU lookup dict(digest: size).
408 self._lru = lru.LRUDict()
409 # Current cached free disk space. It is updated by self._trim().
410 file_path.ensure_tree(self.cache_dir)
411 self._free_disk = file_path.get_free_space(self.cache_dir)
412 # The first item in the LRU cache that must not be evicted during this run
413 # since it was referenced. All items more recent that _protected in the LRU
414 # cache are also inherently protected. It could be a set() of all items
415 # referenced but this increases memory usage without a use case.
416 self._protected = None
417 # Cleanup operations done by self._load(), if any.
418 self._operations = []
419 with tools.Profiler('Setup'):
420 with self._lock:
421 self._load(trim, time_fn)
422
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000423 # Cache interface implementation.
424
425 def __len__(self):
426 with self._lock:
427 return len(self._lru)
428
429 def __iter__(self):
430 # This is not thread-safe.
431 return self._lru.__iter__()
432
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400433 def __contains__(self, digest):
434 with self._lock:
435 return digest in self._lru
436
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 def total_size(self):
439 with self._lock:
440 return sum(self._lru.itervalues())
441
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000442 def get_oldest(self):
443 with self._lock:
444 try:
445 # (key, (value, ts))
446 return self._lru.get_oldest()[1][1]
447 except KeyError:
448 return None
449
450 def remove_oldest(self):
451 with self._lock:
452 # TODO(maruel): Update self._added.
453 return self._remove_lru_file(True)
454
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000455 def save(self):
456 with self._lock:
457 return self._save()
458
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000459 def trim(self):
460 """Forces retention policies."""
461 with self._lock:
462 return self._trim()
463
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400464 def cleanup(self):
465 """Cleans up the cache directory.
466
467 Ensures there is no unknown files in cache_dir.
468 Ensures the read-only bits are set correctly.
469
470 At that point, the cache was already loaded, trimmed to respect cache
471 policies.
472 """
473 with self._lock:
474 fs.chmod(self.cache_dir, 0700)
475 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000476 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400477 # It'd be faster if there were a readdir() function.
478 for filename in fs.listdir(self.cache_dir):
479 if filename == self.STATE_FILE:
480 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
481 continue
482 if filename in previous:
483 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
484 previous.remove(filename)
485 continue
486
487 # An untracked file. Delete it.
488 logging.warning('Removing unknown file %s from cache', filename)
489 p = self._path(filename)
490 if fs.isdir(p):
491 try:
492 file_path.rmtree(p)
493 except OSError:
494 pass
495 else:
496 file_path.try_remove(p)
497 continue
498
499 if previous:
500 # Filter out entries that were not found.
501 logging.warning('Removed %d lost files', len(previous))
502 for filename in previous:
503 self._lru.pop(filename)
504 self._save()
505
506 # What remains to be done is to hash every single item to
507 # detect corruption, then save to ensure state.json is up to date.
508 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
509 # TODO(maruel): Let's revisit once directory metadata is stored in
510 # state.json so only the files that had been mapped since the last cleanup()
511 # call are manually verified.
512 #
513 #with self._lock:
514 # for digest in self._lru:
515 # if not isolated_format.is_valid_hash(
516 # self._path(digest), self.hash_algo):
517 # self.evict(digest)
518 # logging.info('Deleted corrupted item: %s', digest)
519
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000520 # ContentAddressedCache interface implementation.
521
522 def __contains__(self, digest):
523 with self._lock:
524 return digest in self._lru
525
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 def touch(self, digest, size):
527 """Verifies an actual file is valid and bumps its LRU position.
528
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000529 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400530
531 Note that is doesn't compute the hash so it could still be corrupted if the
532 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400533 """
534 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000535 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536
537 # Update its LRU position.
538 with self._lock:
539 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000540 if looks_valid:
541 # Exists but not in the LRU anymore.
542 self._delete_file(digest, size)
543 return False
544 if not looks_valid:
545 self._lru.pop(digest)
546 # Exists but not in the LRU anymore.
547 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400548 return False
549 self._lru.touch(digest)
550 self._protected = self._protected or digest
551 return True
552
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400553 def getfileobj(self, digest):
554 try:
555 f = fs.open(self._path(digest), 'rb')
556 with self._lock:
557 self._used.append(self._lru[digest])
558 return f
559 except IOError:
560 raise CacheMiss(digest)
561
562 def write(self, digest, content):
563 assert content is not None
564 with self._lock:
565 self._protected = self._protected or digest
566 path = self._path(digest)
567 # A stale broken file may remain. It is possible for the file to have write
568 # access bit removed which would cause the file_write() call to fail to open
569 # in write mode. Take no chance here.
570 file_path.try_remove(path)
571 try:
572 size = file_write(path, content)
573 except:
574 # There are two possible places were an exception can occur:
575 # 1) Inside |content| generator in case of network or unzipping errors.
576 # 2) Inside file_write itself in case of disk IO errors.
577 # In any case delete an incomplete file and propagate the exception to
578 # caller, it will be logged there.
579 file_path.try_remove(path)
580 raise
581 # Make the file read-only in the cache. This has a few side-effects since
582 # the file node is modified, so every directory entries to this file becomes
583 # read-only. It's fine here because it is a new file.
584 file_path.set_read_only(path, True)
585 with self._lock:
586 self._add(digest, size)
587 return digest
588
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000589 # Internal functions.
590
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400591 def _load(self, trim, time_fn):
592 """Loads state of the cache from json file.
593
594 If cache_dir does not exist on disk, it is created.
595 """
596 self._lock.assert_locked()
597
598 if not fs.isfile(self.state_file):
599 if not fs.isdir(self.cache_dir):
600 fs.makedirs(self.cache_dir)
601 else:
602 # Load state of the cache.
603 try:
604 self._lru = lru.LRUDict.load(self.state_file)
605 except ValueError as err:
606 logging.error('Failed to load cache state: %s' % (err,))
607 # Don't want to keep broken state file.
608 file_path.try_remove(self.state_file)
609 if time_fn:
610 self._lru.time_fn = time_fn
611 if trim:
612 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400613
614 def _save(self):
615 """Saves the LRU ordering."""
616 self._lock.assert_locked()
617 if sys.platform != 'win32':
618 d = os.path.dirname(self.state_file)
619 if fs.isdir(d):
620 # Necessary otherwise the file can't be created.
621 file_path.set_read_only(d, False)
622 if fs.isfile(self.state_file):
623 file_path.set_read_only(self.state_file, False)
624 self._lru.save(self.state_file)
625
626 def _trim(self):
627 """Trims anything we don't know, make sure enough free space exists."""
628 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000629 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400630
631 # Trim old items.
632 if self.policies.max_age_secs:
633 cutoff = self._lru.time_fn() - self.policies.max_age_secs
634 while self._lru:
635 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000636 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400637 if oldest[1][1] >= cutoff:
638 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000639 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640
641 # Ensure maximum cache size.
642 if self.policies.max_cache_size:
643 total_size = sum(self._lru.itervalues())
644 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000645 e = self._remove_lru_file(True)
646 evicted.append(e)
647 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400648
649 # Ensure maximum number of items in the cache.
650 if self.policies.max_items and len(self._lru) > self.policies.max_items:
651 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000652 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400653
654 # Ensure enough free space.
655 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400656 while (
657 self.policies.min_free_space and
658 self._lru and
659 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000660 # self._free_disk is updated by this call.
661 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000663 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400664 total_usage = sum(self._lru.itervalues())
665 usage_percent = 0.
666 if total_usage:
667 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
668
669 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000670 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
671 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
672 '%.1fkb)',
673 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400674 self._free_disk / 1024.,
675 total_usage / 1024.,
676 usage_percent,
677 self.policies.max_cache_size / 1024.)
678 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680
681 def _path(self, digest):
682 """Returns the path to one item."""
683 return os.path.join(self.cache_dir, digest)
684
685 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 """Removes the lastest recently used file and returns its size.
687
688 Updates self._free_disk.
689 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690 self._lock.assert_locked()
691 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000692 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400693 if not allow_protected and digest == self._protected:
694 total_size = sum(self._lru.itervalues())+size
695 msg = (
696 'Not enough space to fetch the whole isolated tree.\n'
697 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
698 self.policies, total_size, len(self._lru)+1, self._free_disk)
699 raise NoMoreSpace(msg)
700 except KeyError:
701 # That means an internal error.
702 raise NoMoreSpace('Nothing to remove, can\'t happend')
703 digest, (size, _) = self._lru.pop_oldest()
704 logging.debug('Removing LRU file %s', digest)
705 self._delete_file(digest, size)
706 return size
707
708 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
709 """Adds an item into LRU cache marking it as a newest one."""
710 self._lock.assert_locked()
711 if size == UNKNOWN_FILE_SIZE:
712 size = fs.stat(self._path(digest)).st_size
713 self._added.append(size)
714 self._lru.add(digest, size)
715 self._free_disk -= size
716 # Do a quicker version of self._trim(). It only enforces free disk space,
717 # not cache size limits. It doesn't actually look at real free disk space,
718 # only uses its cache values. self._trim() will be called later to enforce
719 # real trimming but doing this quick version here makes it possible to map
720 # an isolated that is larger than the current amount of free disk space when
721 # the cache size is already large.
722 while (
723 self.policies.min_free_space and
724 self._lru and
725 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000726 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400727 if self._remove_lru_file(False) == -1:
728 break
729
730 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000731 """Deletes cache file from the file system.
732
733 Updates self._free_disk.
734 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400735 self._lock.assert_locked()
736 try:
737 if size == UNKNOWN_FILE_SIZE:
738 try:
739 size = fs.stat(self._path(digest)).st_size
740 except OSError:
741 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000742 if file_path.try_remove(self._path(digest)):
743 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 except OSError as e:
745 if e.errno != errno.ENOENT:
746 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400747
748
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400749class NamedCache(Cache):
750 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400751
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400752 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400753 name is a short identifier that describes the contents of the cache, e.g.
754 "git_v8" could be all git repositories required by v8 builds, or
755 "build_chromium" could be build artefacts of the Chromium.
756 path is a directory path relative to the task run dir. Cache installation
757 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400758 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400759 _DIR_ALPHABET = string.ascii_letters + string.digits
760 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000761 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400762
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400763 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400764 """Initializes NamedCaches.
765
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400766 Arguments:
767 - cache_dir is a directory for persistent cache storage.
768 - policies is a CachePolicies instance.
769 - time_fn is a function that returns timestamp (float) and used to take
770 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400771 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400772 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400773 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000774 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400775 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
776 self._lru = lru.LRUDict()
777 if not fs.isdir(self.cache_dir):
778 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000779 elif os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000780 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400781 self._lru = lru.LRUDict.load(self.state_file)
782 except ValueError:
783 logging.exception('failed to load named cache state file')
784 logging.warning('deleting named caches')
785 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000786 with self._lock:
787 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400788 if time_fn:
789 self._lru.time_fn = time_fn
790
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400791 @property
792 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000793 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400794 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000795 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400796
797 def install(self, path, name):
798 """Moves the directory for the specified named cache to |path|.
799
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000800 path must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400801
802 Raises NamedCacheError if cannot install the cache.
803 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400804 logging.info('Installing named cache %r to %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400805 with self._lock:
806 try:
807 if os.path.isdir(path):
808 raise NamedCacheError(
809 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000811 link_name = os.path.join(self.cache_dir, name)
812 if fs.exists(link_name):
813 fs.rmtree(link_name)
814
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000815 if name in self._lru:
816 rel_cache, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400817 abs_cache = os.path.join(self.cache_dir, rel_cache)
818 if os.path.isdir(abs_cache):
819 logging.info('Moving %r to %r', abs_cache, path)
820 file_path.ensure_tree(os.path.dirname(path))
821 fs.rename(abs_cache, path)
822 self._remove(name)
823 return
824
825 logging.warning(
826 'directory for named cache %r does not exist at %s', name,
827 rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400829
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 # The named cache does not exist, create an empty directory.
831 # When uninstalling, we will move it back to the cache and create an
832 # an entry.
833 file_path.ensure_tree(path)
834 except (IOError, OSError) as ex:
835 raise NamedCacheError(
836 'cannot install cache named %r at %r: %s' % (
837 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000838 finally:
839 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840
841 def uninstall(self, path, name):
842 """Moves the cache directory back. Opposite to install().
843
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000844 path must be absolute and unicode.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400845
846 Raises NamedCacheError if cannot uninstall the cache.
847 """
848 logging.info('Uninstalling named cache %r from %r', name, path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 with self._lock:
850 try:
851 if not os.path.isdir(path):
852 logging.warning(
853 'Directory %r does not exist anymore. Cache lost.', path)
854 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000856 if name in self._lru:
857 # This shouldn't happen but just remove the preexisting one and move
858 # on.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400859 logging.warning('overwriting an existing named cache %r', name)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000860 self._remove(name)
861 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862
863 # Move the dir and create an entry for the named cache.
864 abs_cache = os.path.join(self.cache_dir, rel_cache)
865 logging.info('Moving %r to %r', path, abs_cache)
866 file_path.ensure_tree(os.path.dirname(abs_cache))
867 fs.rename(path, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000869 # That succeeded, calculate its new size.
870 size = _get_recursive_size(abs_cache)
871 if not size:
872 # Do not save empty named cache.
873 return
874 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000875 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000876
877 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
878 # for user convenience.
879 named_path = self._get_named_path(name)
880 if os.path.exists(named_path):
881 file_path.remove(named_path)
882 else:
883 file_path.ensure_tree(os.path.dirname(named_path))
884
885 try:
886 fs.symlink(abs_cache, named_path)
887 logging.info('Created symlink %r to %r', named_path, abs_cache)
888 except OSError:
889 # Ignore on Windows. It happens when running as a normal user or when
890 # UAC is enabled and the user is a filtered administrator account.
891 if sys.platform != 'win32':
892 raise
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 except (IOError, OSError) as ex:
894 raise NamedCacheError(
895 'cannot uninstall cache named %r at %r: %s' % (
896 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000897 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000898 # Call save() at every uninstall. The assumptions are:
899 # - The total the number of named caches is low, so the state.json file
900 # is small, so the time it takes to write it to disk is short.
901 # - The number of mapped named caches per task is low, so the number of
902 # times save() is called on tear-down isn't high enough to be
903 # significant.
904 # - uninstall() sometimes throws due to file locking on Windows or
905 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000906 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000908 # Cache interface implementation.
909
910 def __len__(self):
911 with self._lock:
912 return len(self._lru)
913
914 def __iter__(self):
915 # This is not thread-safe.
916 return self._lru.__iter__()
917
918 def __contains__(self, digest):
919 with self._lock:
920 return digest in self._lru
921
922 @property
923 def total_size(self):
924 with self._lock:
925 return sum(size for _rel_path, size in self._lru.itervalues())
926
927 def get_oldest(self):
928 with self._lock:
929 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000930 # (key, (value, ts))
931 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000932 except KeyError:
933 return None
934
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000935 def remove_oldest(self):
936 with self._lock:
937 # TODO(maruel): Update self._added.
938 return self._remove_lru_item()
939
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000940 def save(self):
941 with self._lock:
942 return self._save()
943
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400944 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000945 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 with self._lock:
947 if not os.path.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000948 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400949
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400950 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000951 if self._policies.max_items:
952 while len(self._lru) > self._policies.max_items:
953 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400954
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400955 # Trim according to maximum age.
956 if self._policies.max_age_secs:
957 cutoff = self._lru.time_fn() - self._policies.max_age_secs
958 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000959 _name, (_data, ts) = self._lru.get_oldest()
960 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400961 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000962 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400963
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400964 # Trim according to minimum free space.
965 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000966 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400967 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000968 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400969 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000970 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400971
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000972 # Trim according to maximum total size.
973 if self._policies.max_cache_size:
974 while self._lru:
975 total = sum(size for _rel_cache, size in self._lru.itervalues())
976 if total <= self._policies.max_cache_size:
977 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000978 evicted.append(self._remove_lru_item())
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400979
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +0000980 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000981 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400982
983 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000984 """Removes unknown directories.
985
986 Does not recalculate the cache size since it's surprisingly slow on some
987 OSes.
988 """
989 success = True
990 with self._lock:
991 try:
992 actual = set(fs.listdir(self.cache_dir))
993 actual.discard(self.NAMED_DIR)
994 actual.discard(self.STATE_FILE)
995 expected = {v[0]: k for k, v in self._lru.iteritems()}
996 # First, handle the actual cache content.
997 # Remove missing entries.
998 for missing in (set(expected) - actual):
999 self._lru.pop(expected[missing])
1000 # Remove unexpected items.
1001 for unexpected in (actual - set(expected)):
1002 try:
1003 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001004 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001005 file_path.rmtree(p)
1006 else:
1007 fs.remove(p)
1008 except (IOError, OSError) as e:
1009 logging.error('Failed to remove %s: %s', unexpected, e)
1010 success = False
1011
1012 # Second, fix named cache links.
1013 named = os.path.join(self.cache_dir, self.NAMED_DIR)
1014 if os.path.isdir(named):
1015 actual = set(fs.listdir(named))
1016 expected = set(self._lru)
1017 # Confirm entries. Do not add missing ones for now.
1018 for name in expected.intersection(actual):
1019 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
1020 expected_link = os.path.join(self.cache_dir, self._lru[name][0])
1021 if fs.islink(p):
Marc-Antoine Ruel5d5ec6d2018-06-18 13:31:34 +00001022 if sys.platform == 'win32':
1023 # TODO(maruel): Implement readlink() on Windows in fs.py, then
1024 # remove this condition.
1025 # https://crbug.com/853721
1026 continue
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001027 link = fs.readlink(p)
1028 if expected_link == link:
1029 continue
1030 logging.warning(
1031 'Unexpected symlink for cache %s: %s, expected %s',
1032 name, link, expected_link)
1033 else:
1034 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001035 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001036 file_path.rmtree(p)
1037 else:
1038 fs.remove(p)
1039 # Remove unexpected items.
1040 for unexpected in (actual - expected):
1041 try:
1042 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1043 if fs.isdir(p):
1044 file_path.rmtree(p)
1045 else:
1046 fs.remove(p)
1047 except (IOError, OSError) as e:
1048 logging.error('Failed to remove %s: %s', unexpected, e)
1049 success = False
1050 finally:
1051 self._save()
1052 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001053
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001054 # Internal functions.
1055
1056 def _try_upgrade(self):
1057 """Upgrades from the old format to the new one if necessary.
1058
1059 This code can be removed so all bots are known to have the right new format.
1060 """
1061 if not self._lru:
1062 return
1063 _name, (data, _ts) = self._lru.get_oldest()
1064 if isinstance(data, (list, tuple)):
1065 return
1066 # Update to v2.
1067 def upgrade(_name, rel_cache):
1068 abs_cache = os.path.join(self.cache_dir, rel_cache)
1069 return rel_cache, _get_recursive_size(abs_cache)
1070 self._lru.transform(upgrade)
1071 self._save()
1072
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001073 def _remove_lru_item(self):
1074 """Removes the oldest LRU entry. LRU must not be empty."""
1075 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1076 logging.info('Removing named cache %r', name)
1077 self._remove(name)
1078 return size
1079
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001080 def _allocate_dir(self):
1081 """Creates and returns relative path of a new cache directory."""
1082 # We randomly generate directory names that have two lower/upper case
1083 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1084 abc_len = len(self._DIR_ALPHABET)
1085 tried = set()
1086 while len(tried) < 1000:
1087 i = random.randint(0, abc_len * abc_len - 1)
1088 rel_path = (
1089 self._DIR_ALPHABET[i / abc_len] +
1090 self._DIR_ALPHABET[i % abc_len])
1091 if rel_path in tried:
1092 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001093 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001094 if not fs.exists(abs_path):
1095 return rel_path
1096 tried.add(rel_path)
1097 raise NamedCacheError(
1098 'could not allocate a new cache dir, too many cache dirs')
1099
1100 def _remove(self, name):
1101 """Removes a cache directory and entry.
1102
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001103 Returns:
1104 Number of caches deleted.
1105 """
1106 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001107 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001108 named_dir = self._get_named_path(name)
1109 if fs.islink(named_dir):
1110 fs.unlink(named_dir)
1111
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001112 # Then remove the actual data.
1113 if name not in self._lru:
1114 return
1115 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001116 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001117 if os.path.isdir(abs_path):
1118 file_path.rmtree(abs_path)
1119 self._lru.pop(name)
1120
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001121 def _save(self):
1122 self._lock.assert_locked()
1123 file_path.ensure_tree(self.cache_dir)
1124 self._lru.save(self.state_file)
1125
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001126 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001127 return os.path.join(self.cache_dir, self.NAMED_DIR, name)