blob: 6cb0d6baa58ef82bac203413de4de214d62ffe34 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
22
23
24# The file size to be used when we don't know the correct file size,
25# generally used for .isolated files.
26UNKNOWN_FILE_SIZE = None
27
28
29def file_write(path, content_generator):
30 """Writes file content as generated by content_generator.
31
32 Creates the intermediary directory as needed.
33
34 Returns the number of bytes written.
35
36 Meant to be mocked out in unit tests.
37 """
38 file_path.ensure_tree(os.path.dirname(path))
39 total = 0
40 with fs.open(path, 'wb') as f:
41 for d in content_generator:
42 total += len(d)
43 f.write(d)
44 return total
45
46
47def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000048 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040049
50 Currently it just checks the file exists and its size matches the expectation.
51 """
52 if size == UNKNOWN_FILE_SIZE:
53 return fs.isfile(path)
54 try:
55 actual_size = fs.stat(path).st_size
56 except OSError as e:
57 logging.warning(
58 'Can\'t read item %s, assuming it\'s invalid: %s',
59 os.path.basename(path), e)
60 return False
61 if size != actual_size:
62 logging.warning(
63 'Found invalid item %s; %d != %d',
64 os.path.basename(path), actual_size, size)
65 return False
66 return True
67
68
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000069def trim_caches(caches, path, min_free_space, max_age_secs):
70 """Trims multiple caches.
71
72 The goal here is to coherently trim all caches in a coherent LRU fashion,
73 deleting older items independent of which container they belong to.
74
75 Two policies are enforced first:
76 - max_age_secs
77 - min_free_space
78
79 Once that's done, then we enforce each cache's own policies.
80
81 Returns:
82 Slice containing the size of all items evicted.
83 """
84 min_ts = time.time() - max_age_secs if max_age_secs else 0
85 free_disk = file_path.get_free_space(path) if min_free_space else 0
86 total = []
87 if min_ts or free_disk:
88 while True:
89 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
90 if not oldest:
91 break
92 oldest.sort(key=lambda (_, ts): ts)
93 c, ts = oldest[0]
94 if ts >= min_ts and free_disk >= min_free_space:
95 break
96 total.append(c.remove_oldest())
97 if min_free_space:
98 free_disk = file_path.get_free_space(path)
99 # Evaluate each cache's own policies.
100 for c in caches:
101 total.extend(c.trim())
102 return total
103
104
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000105def _get_recursive_size(path):
106 """Returns the total data size for the specified path.
107
108 This function can be surprisingly slow on OSX, so its output should be cached.
109 """
110 try:
111 total = 0
112 for root, _, files in fs.walk(path):
113 for f in files:
114 total += fs.lstat(os.path.join(root, f)).st_size
115 return total
116 except (IOError, OSError, UnicodeEncodeError) as exc:
117 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
118 return None
119
120
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400121class NamedCacheError(Exception):
122 """Named cache specific error."""
123
124
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400125class NoMoreSpace(Exception):
126 """Not enough space to map the whole directory."""
127 pass
128
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400129
130class CachePolicies(object):
131 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
132 """Common caching policies for the multiple caches (isolated, named, cipd).
133
134 Arguments:
135 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
136 cache is effectively a leak.
137 - min_free_space: Trim if disk free space becomes lower than this value. If
138 0, it will unconditionally fill the disk.
139 - max_items: Maximum number of items to keep in the cache. If 0, do not
140 enforce a limit.
141 - max_age_secs: Maximum age an item is kept in the cache until it is
142 automatically evicted. Having a lot of dead luggage slows
143 everything down.
144 """
145 self.max_cache_size = max_cache_size
146 self.min_free_space = min_free_space
147 self.max_items = max_items
148 self.max_age_secs = max_age_secs
149
150 def __str__(self):
151 return (
152 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
153 'max_age_secs=%s)') % (
154 self.max_cache_size, self.max_items, self.min_free_space,
155 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400156
157
158class CacheMiss(Exception):
159 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400160 def __init__(self, digest):
161 self.digest = digest
162 super(CacheMiss, self).__init__(
163 'Item with digest %r is not found in cache' % digest)
164
165
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166class Cache(object):
167 def __init__(self, cache_dir):
168 if cache_dir is not None:
169 assert isinstance(cache_dir, unicode), cache_dir
170 assert file_path.isabs(cache_dir), cache_dir
171 self.cache_dir = cache_dir
172 self._lock = threading_utils.LockWithAssert()
173 # Profiling values.
174 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400175 self._used = []
176
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000177 def __nonzero__(self):
178 """A cache is always True.
179
180 Otherwise it falls back to __len__, which is surprising.
181 """
182 return True
183
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000184 def __len__(self):
185 """Returns the number of entries in the cache."""
186 raise NotImplementedError()
187
188 def __iter__(self):
189 """Iterates over all the entries names."""
190 raise NotImplementedError()
191
192 def __contains__(self, name):
193 """Returns if an entry is in the cache."""
194 raise NotImplementedError()
195
196 @property
197 def total_size(self):
198 """Returns the total size of the cache in bytes."""
199 raise NotImplementedError()
200
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400201 @property
202 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000203 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400204 with self._lock:
205 return self._added[:]
206
207 @property
208 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000209 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400210 with self._lock:
211 return self._used[:]
212
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000213 def get_oldest(self):
214 """Returns timestamp of oldest cache entry or None.
215
216 Returns:
217 Timestamp of the oldest item.
218
219 Used for manual trimming.
220 """
221 raise NotImplementedError()
222
223 def remove_oldest(self):
224 """Removes the oldest item from the cache.
225
226 Returns:
227 Size of the oldest item.
228
229 Used for manual trimming.
230 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400231 raise NotImplementedError()
232
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000233 def save(self):
234 """Saves the current cache to disk."""
235 raise NotImplementedError()
236
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400237 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000238 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400239
240 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000241 Slice with the size of evicted items.
242 """
243 raise NotImplementedError()
244
245 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000246 """Deletes any corrupted item from the cache, then calls trim(), then
247 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000248
249 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400250 """
251 raise NotImplementedError()
252
253
254class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400255 """Content addressed cache that stores objects temporarily.
256
257 It can be accessed concurrently from multiple threads, so it should protect
258 its internal state with some lock.
259 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400260
261 def __enter__(self):
262 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000263 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400264 return self
265
266 def __exit__(self, _exc_type, _exec_value, _traceback):
267 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000268 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400269 return False
270
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400271 def touch(self, digest, size):
272 """Ensures item is not corrupted and updates its LRU position.
273
274 Arguments:
275 digest: hash digest of item to check.
276 size: expected size of this item.
277
278 Returns:
279 True if item is in cache and not corrupted.
280 """
281 raise NotImplementedError()
282
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400283 def getfileobj(self, digest):
284 """Returns a readable file like object.
285
286 If file exists on the file system it will have a .name attribute with an
287 absolute path to the file.
288 """
289 raise NotImplementedError()
290
291 def write(self, digest, content):
292 """Reads data from |content| generator and stores it in cache.
293
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000294 It is possible to write to an object that already exists. It may be
295 ignored (sent to /dev/null) but the timestamp is still updated.
296
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297 Returns digest to simplify chaining.
298 """
299 raise NotImplementedError()
300
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301
302class MemoryContentAddressedCache(ContentAddressedCache):
303 """ContentAddressedCache implementation that stores everything in memory."""
304
305 def __init__(self, file_mode_mask=0500):
306 """Args:
307 file_mode_mask: bit mask to AND file mode with. Default value will make
308 all mapped files to be read only.
309 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400310 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400311 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000312 # Items in a LRU lookup dict(digest: size).
313 self._lru = lru.LRUDict()
314
315 # Cache interface implementation.
316
317 def __len__(self):
318 with self._lock:
319 return len(self._lru)
320
321 def __iter__(self):
322 # This is not thread-safe.
323 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400324
325 def __contains__(self, digest):
326 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000327 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400328
329 @property
330 def total_size(self):
331 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000332 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000334 def get_oldest(self):
335 with self._lock:
336 try:
337 # (key, (value, ts))
338 return self._lru.get_oldest()[1][1]
339 except KeyError:
340 return None
341
342 def remove_oldest(self):
343 with self._lock:
344 # TODO(maruel): Update self._added.
345 # (key, (value, ts))
346 return len(self._lru.pop_oldest()[1][0])
347
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000348 def save(self):
349 pass
350
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000351 def trim(self):
352 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000353 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400354
355 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000356 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357 pass
358
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000359 # ContentAddressedCache interface implementation.
360
361 def __contains__(self, digest):
362 with self._lock:
363 return digest in self._lru
364
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365 def touch(self, digest, size):
366 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000367 try:
368 self._lru.touch(digest)
369 except KeyError:
370 return False
371 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400373 def getfileobj(self, digest):
374 with self._lock:
375 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000376 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377 except KeyError:
378 raise CacheMiss(digest)
379 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000380 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400381 return io.BytesIO(d)
382
383 def write(self, digest, content):
384 # Assemble whole stream before taking the lock.
385 data = ''.join(content)
386 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000387 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400388 self._added.append(len(data))
389 return digest
390
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392class DiskContentAddressedCache(ContentAddressedCache):
393 """Stateful LRU cache in a flat hash table in a directory.
394
395 Saves its state as json file.
396 """
397 STATE_FILE = u'state.json'
398
399 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
400 """
401 Arguments:
402 cache_dir: directory where to place the cache.
403 policies: CachePolicies instance, cache retention policies.
404 algo: hashing algorithm used.
405 trim: if True to enforce |policies| right away.
406 It can be done later by calling trim() explicitly.
407 """
408 # All protected methods (starting with '_') except _path should be called
409 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400410 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400411 self.policies = policies
412 self.hash_algo = hash_algo
413 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
414 # Items in a LRU lookup dict(digest: size).
415 self._lru = lru.LRUDict()
416 # Current cached free disk space. It is updated by self._trim().
417 file_path.ensure_tree(self.cache_dir)
418 self._free_disk = file_path.get_free_space(self.cache_dir)
419 # The first item in the LRU cache that must not be evicted during this run
420 # since it was referenced. All items more recent that _protected in the LRU
421 # cache are also inherently protected. It could be a set() of all items
422 # referenced but this increases memory usage without a use case.
423 self._protected = None
424 # Cleanup operations done by self._load(), if any.
425 self._operations = []
426 with tools.Profiler('Setup'):
427 with self._lock:
428 self._load(trim, time_fn)
429
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000430 # Cache interface implementation.
431
432 def __len__(self):
433 with self._lock:
434 return len(self._lru)
435
436 def __iter__(self):
437 # This is not thread-safe.
438 return self._lru.__iter__()
439
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400440 def __contains__(self, digest):
441 with self._lock:
442 return digest in self._lru
443
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400444 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445 def total_size(self):
446 with self._lock:
447 return sum(self._lru.itervalues())
448
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000449 def get_oldest(self):
450 with self._lock:
451 try:
452 # (key, (value, ts))
453 return self._lru.get_oldest()[1][1]
454 except KeyError:
455 return None
456
457 def remove_oldest(self):
458 with self._lock:
459 # TODO(maruel): Update self._added.
460 return self._remove_lru_file(True)
461
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000462 def save(self):
463 with self._lock:
464 return self._save()
465
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000466 def trim(self):
467 """Forces retention policies."""
468 with self._lock:
469 return self._trim()
470
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400471 def cleanup(self):
472 """Cleans up the cache directory.
473
474 Ensures there is no unknown files in cache_dir.
475 Ensures the read-only bits are set correctly.
476
477 At that point, the cache was already loaded, trimmed to respect cache
478 policies.
479 """
480 with self._lock:
481 fs.chmod(self.cache_dir, 0700)
482 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000483 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400484 # It'd be faster if there were a readdir() function.
485 for filename in fs.listdir(self.cache_dir):
486 if filename == self.STATE_FILE:
487 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
488 continue
489 if filename in previous:
490 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
491 previous.remove(filename)
492 continue
493
494 # An untracked file. Delete it.
495 logging.warning('Removing unknown file %s from cache', filename)
496 p = self._path(filename)
497 if fs.isdir(p):
498 try:
499 file_path.rmtree(p)
500 except OSError:
501 pass
502 else:
503 file_path.try_remove(p)
504 continue
505
506 if previous:
507 # Filter out entries that were not found.
508 logging.warning('Removed %d lost files', len(previous))
509 for filename in previous:
510 self._lru.pop(filename)
511 self._save()
512
513 # What remains to be done is to hash every single item to
514 # detect corruption, then save to ensure state.json is up to date.
515 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
516 # TODO(maruel): Let's revisit once directory metadata is stored in
517 # state.json so only the files that had been mapped since the last cleanup()
518 # call are manually verified.
519 #
520 #with self._lock:
521 # for digest in self._lru:
522 # if not isolated_format.is_valid_hash(
523 # self._path(digest), self.hash_algo):
524 # self.evict(digest)
525 # logging.info('Deleted corrupted item: %s', digest)
526
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000527 # ContentAddressedCache interface implementation.
528
529 def __contains__(self, digest):
530 with self._lock:
531 return digest in self._lru
532
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400533 def touch(self, digest, size):
534 """Verifies an actual file is valid and bumps its LRU position.
535
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000536 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400537
538 Note that is doesn't compute the hash so it could still be corrupted if the
539 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400540 """
541 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000542 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400543
544 # Update its LRU position.
545 with self._lock:
546 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000547 if looks_valid:
548 # Exists but not in the LRU anymore.
549 self._delete_file(digest, size)
550 return False
551 if not looks_valid:
552 self._lru.pop(digest)
553 # Exists but not in the LRU anymore.
554 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400555 return False
556 self._lru.touch(digest)
557 self._protected = self._protected or digest
558 return True
559
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400560 def getfileobj(self, digest):
561 try:
562 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400563 except IOError:
564 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000565 with self._lock:
566 try:
567 self._used.append(self._lru[digest])
568 except KeyError:
569 # If the digest is not actually in _lru, assume it is a cache miss.
570 # Existing file will be overwritten by whoever uses the cache and added
571 # to _lru.
572 f.close()
573 raise CacheMiss(digest)
574 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575
576 def write(self, digest, content):
577 assert content is not None
578 with self._lock:
579 self._protected = self._protected or digest
580 path = self._path(digest)
581 # A stale broken file may remain. It is possible for the file to have write
582 # access bit removed which would cause the file_write() call to fail to open
583 # in write mode. Take no chance here.
584 file_path.try_remove(path)
585 try:
586 size = file_write(path, content)
587 except:
588 # There are two possible places were an exception can occur:
589 # 1) Inside |content| generator in case of network or unzipping errors.
590 # 2) Inside file_write itself in case of disk IO errors.
591 # In any case delete an incomplete file and propagate the exception to
592 # caller, it will be logged there.
593 file_path.try_remove(path)
594 raise
595 # Make the file read-only in the cache. This has a few side-effects since
596 # the file node is modified, so every directory entries to this file becomes
597 # read-only. It's fine here because it is a new file.
598 file_path.set_read_only(path, True)
599 with self._lock:
600 self._add(digest, size)
601 return digest
602
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000603 # Internal functions.
604
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400605 def _load(self, trim, time_fn):
606 """Loads state of the cache from json file.
607
608 If cache_dir does not exist on disk, it is created.
609 """
610 self._lock.assert_locked()
611
612 if not fs.isfile(self.state_file):
613 if not fs.isdir(self.cache_dir):
614 fs.makedirs(self.cache_dir)
615 else:
616 # Load state of the cache.
617 try:
618 self._lru = lru.LRUDict.load(self.state_file)
619 except ValueError as err:
620 logging.error('Failed to load cache state: %s' % (err,))
621 # Don't want to keep broken state file.
622 file_path.try_remove(self.state_file)
623 if time_fn:
624 self._lru.time_fn = time_fn
625 if trim:
626 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400627
628 def _save(self):
629 """Saves the LRU ordering."""
630 self._lock.assert_locked()
631 if sys.platform != 'win32':
632 d = os.path.dirname(self.state_file)
633 if fs.isdir(d):
634 # Necessary otherwise the file can't be created.
635 file_path.set_read_only(d, False)
636 if fs.isfile(self.state_file):
637 file_path.set_read_only(self.state_file, False)
638 self._lru.save(self.state_file)
639
640 def _trim(self):
641 """Trims anything we don't know, make sure enough free space exists."""
642 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000643 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400644
645 # Trim old items.
646 if self.policies.max_age_secs:
647 cutoff = self._lru.time_fn() - self.policies.max_age_secs
648 while self._lru:
649 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000650 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400651 if oldest[1][1] >= cutoff:
652 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000653 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400654
655 # Ensure maximum cache size.
656 if self.policies.max_cache_size:
657 total_size = sum(self._lru.itervalues())
658 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000659 e = self._remove_lru_file(True)
660 evicted.append(e)
661 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662
663 # Ensure maximum number of items in the cache.
664 if self.policies.max_items and len(self._lru) > self.policies.max_items:
665 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000666 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400667
668 # Ensure enough free space.
669 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400670 while (
671 self.policies.min_free_space and
672 self._lru and
673 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000674 # self._free_disk is updated by this call.
675 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400676
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000677 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 total_usage = sum(self._lru.itervalues())
679 usage_percent = 0.
680 if total_usage:
681 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
682
683 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000684 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
685 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
686 '%.1fkb)',
687 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400688 self._free_disk / 1024.,
689 total_usage / 1024.,
690 usage_percent,
691 self.policies.max_cache_size / 1024.)
692 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000693 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694
695 def _path(self, digest):
696 """Returns the path to one item."""
697 return os.path.join(self.cache_dir, digest)
698
699 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000700 """Removes the lastest recently used file and returns its size.
701
702 Updates self._free_disk.
703 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400704 self._lock.assert_locked()
705 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000706 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400707 if not allow_protected and digest == self._protected:
708 total_size = sum(self._lru.itervalues())+size
709 msg = (
710 'Not enough space to fetch the whole isolated tree.\n'
711 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
712 self.policies, total_size, len(self._lru)+1, self._free_disk)
713 raise NoMoreSpace(msg)
714 except KeyError:
715 # That means an internal error.
716 raise NoMoreSpace('Nothing to remove, can\'t happend')
717 digest, (size, _) = self._lru.pop_oldest()
718 logging.debug('Removing LRU file %s', digest)
719 self._delete_file(digest, size)
720 return size
721
722 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
723 """Adds an item into LRU cache marking it as a newest one."""
724 self._lock.assert_locked()
725 if size == UNKNOWN_FILE_SIZE:
726 size = fs.stat(self._path(digest)).st_size
727 self._added.append(size)
728 self._lru.add(digest, size)
729 self._free_disk -= size
730 # Do a quicker version of self._trim(). It only enforces free disk space,
731 # not cache size limits. It doesn't actually look at real free disk space,
732 # only uses its cache values. self._trim() will be called later to enforce
733 # real trimming but doing this quick version here makes it possible to map
734 # an isolated that is larger than the current amount of free disk space when
735 # the cache size is already large.
736 while (
737 self.policies.min_free_space and
738 self._lru and
739 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000740 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400741 if self._remove_lru_file(False) == -1:
742 break
743
744 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745 """Deletes cache file from the file system.
746
747 Updates self._free_disk.
748 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400749 self._lock.assert_locked()
750 try:
751 if size == UNKNOWN_FILE_SIZE:
752 try:
753 size = fs.stat(self._path(digest)).st_size
754 except OSError:
755 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000756 if file_path.try_remove(self._path(digest)):
757 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400758 except OSError as e:
759 if e.errno != errno.ENOENT:
760 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400761
762
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400763class NamedCache(Cache):
764 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400765
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400766 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400767 name is a short identifier that describes the contents of the cache, e.g.
768 "git_v8" could be all git repositories required by v8 builds, or
769 "build_chromium" could be build artefacts of the Chromium.
770 path is a directory path relative to the task run dir. Cache installation
771 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400772 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400773 _DIR_ALPHABET = string.ascii_letters + string.digits
774 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000775 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400776
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400777 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400778 """Initializes NamedCaches.
779
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400780 Arguments:
781 - cache_dir is a directory for persistent cache storage.
782 - policies is a CachePolicies instance.
783 - time_fn is a function that returns timestamp (float) and used to take
784 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400785 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400786 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400787 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000788 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400789 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
790 self._lru = lru.LRUDict()
791 if not fs.isdir(self.cache_dir):
792 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000793 elif os.path.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000794 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400795 self._lru = lru.LRUDict.load(self.state_file)
796 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000797 logging.exception(
798 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400799 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000800 with self._lock:
801 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802 if time_fn:
803 self._lru.time_fn = time_fn
804
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805 @property
806 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000807 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400808 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000809 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810
811 def install(self, path, name):
812 """Moves the directory for the specified named cache to |path|.
813
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000814 path must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815
816 Raises NamedCacheError if cannot install the cache.
817 """
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000818 logging.info('NamedCache.install(%r, %r)', path, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400819 with self._lock:
820 try:
821 if os.path.isdir(path):
822 raise NamedCacheError(
823 'installation directory %r already exists' % path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000825 link_name = os.path.join(self.cache_dir, name)
826 if fs.exists(link_name):
827 fs.rmtree(link_name)
828
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000829 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000830 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400831 abs_cache = os.path.join(self.cache_dir, rel_cache)
832 if os.path.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000833 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 file_path.ensure_tree(os.path.dirname(path))
835 fs.rename(abs_cache, path)
836 self._remove(name)
837 return
838
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000839 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000842 # The named cache does not exist, create an empty directory. When
843 # uninstalling, we will move it back to the cache and create an an
844 # entry.
845 logging.info('- creating new directory')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 file_path.ensure_tree(path)
847 except (IOError, OSError) as ex:
848 raise NamedCacheError(
849 'cannot install cache named %r at %r: %s' % (
850 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000851 finally:
852 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400853
854 def uninstall(self, path, name):
855 """Moves the cache directory back. Opposite to install().
856
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000857 path must be absolute and unicode.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858
859 Raises NamedCacheError if cannot uninstall the cache.
860 """
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000861 logging.info('NamedCache.uninstall(%r, %r)', path, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862 with self._lock:
863 try:
864 if not os.path.isdir(path):
865 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000866 'NamedCache: Directory %r does not exist anymore. Cache lost.',
867 path)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400868 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400869
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000870 if name in self._lru:
871 # This shouldn't happen but just remove the preexisting one and move
872 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000873 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000874 self._remove(name)
875 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876
877 # Move the dir and create an entry for the named cache.
878 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000879 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400880 file_path.ensure_tree(os.path.dirname(abs_cache))
881 fs.rename(path, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000883 # That succeeded, calculate its new size.
884 size = _get_recursive_size(abs_cache)
885 if not size:
886 # Do not save empty named cache.
887 return
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000888 logging.info('- Size is %d', size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000889 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000890 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000891
892 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
893 # for user convenience.
894 named_path = self._get_named_path(name)
895 if os.path.exists(named_path):
896 file_path.remove(named_path)
897 else:
898 file_path.ensure_tree(os.path.dirname(named_path))
899
900 try:
901 fs.symlink(abs_cache, named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000902 logging.info(
903 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000904 except OSError:
905 # Ignore on Windows. It happens when running as a normal user or when
906 # UAC is enabled and the user is a filtered administrator account.
907 if sys.platform != 'win32':
908 raise
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400909 except (IOError, OSError) as ex:
910 raise NamedCacheError(
911 'cannot uninstall cache named %r at %r: %s' % (
912 name, path, ex))
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000913 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000914 # Call save() at every uninstall. The assumptions are:
915 # - The total the number of named caches is low, so the state.json file
916 # is small, so the time it takes to write it to disk is short.
917 # - The number of mapped named caches per task is low, so the number of
918 # times save() is called on tear-down isn't high enough to be
919 # significant.
920 # - uninstall() sometimes throws due to file locking on Windows or
921 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000922 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400923
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000924 # Cache interface implementation.
925
926 def __len__(self):
927 with self._lock:
928 return len(self._lru)
929
930 def __iter__(self):
931 # This is not thread-safe.
932 return self._lru.__iter__()
933
934 def __contains__(self, digest):
935 with self._lock:
936 return digest in self._lru
937
938 @property
939 def total_size(self):
940 with self._lock:
941 return sum(size for _rel_path, size in self._lru.itervalues())
942
943 def get_oldest(self):
944 with self._lock:
945 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000946 # (key, (value, ts))
947 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000948 except KeyError:
949 return None
950
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000951 def remove_oldest(self):
952 with self._lock:
953 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000954 _name, size = self._remove_lru_item()
955 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000956
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000957 def save(self):
958 with self._lock:
959 return self._save()
960
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400961 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000962 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400963 with self._lock:
964 if not os.path.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000965 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400966
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400967 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000968 if self._policies.max_items:
969 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000970 name, size = self._remove_lru_item()
971 evicted.append(size)
972 logging.info(
973 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
974 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400975
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400976 # Trim according to maximum age.
977 if self._policies.max_age_secs:
978 cutoff = self._lru.time_fn() - self._policies.max_age_secs
979 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000980 _name, (_data, ts) = self._lru.get_oldest()
981 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400982 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000983 name, size = self._remove_lru_item()
984 evicted.append(size)
985 logging.info(
986 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
987 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400988
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400989 # Trim according to minimum free space.
990 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000991 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400992 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000993 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400994 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000995 name, size = self._remove_lru_item()
996 evicted.append(size)
997 logging.info(
998 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
999 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001000
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001001 # Trim according to maximum total size.
1002 if self._policies.max_cache_size:
1003 while self._lru:
1004 total = sum(size for _rel_cache, size in self._lru.itervalues())
1005 if total <= self._policies.max_cache_size:
1006 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001007 name, size = self._remove_lru_item()
1008 evicted.append(size)
1009 logging.info(
1010 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1011 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001012
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001013 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001014 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001015
1016 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001017 """Removes unknown directories.
1018
1019 Does not recalculate the cache size since it's surprisingly slow on some
1020 OSes.
1021 """
1022 success = True
1023 with self._lock:
1024 try:
1025 actual = set(fs.listdir(self.cache_dir))
1026 actual.discard(self.NAMED_DIR)
1027 actual.discard(self.STATE_FILE)
1028 expected = {v[0]: k for k, v in self._lru.iteritems()}
1029 # First, handle the actual cache content.
1030 # Remove missing entries.
1031 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001032 name, size = self._lru.pop(expected[missing])
1033 logging.warning(
1034 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001035 # Remove unexpected items.
1036 for unexpected in (actual - set(expected)):
1037 try:
1038 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001039 logging.warning(
1040 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001041 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001042 file_path.rmtree(p)
1043 else:
1044 fs.remove(p)
1045 except (IOError, OSError) as e:
1046 logging.error('Failed to remove %s: %s', unexpected, e)
1047 success = False
1048
1049 # Second, fix named cache links.
1050 named = os.path.join(self.cache_dir, self.NAMED_DIR)
1051 if os.path.isdir(named):
1052 actual = set(fs.listdir(named))
1053 expected = set(self._lru)
1054 # Confirm entries. Do not add missing ones for now.
1055 for name in expected.intersection(actual):
1056 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
1057 expected_link = os.path.join(self.cache_dir, self._lru[name][0])
1058 if fs.islink(p):
Marc-Antoine Ruel5d5ec6d2018-06-18 13:31:34 +00001059 if sys.platform == 'win32':
1060 # TODO(maruel): Implement readlink() on Windows in fs.py, then
1061 # remove this condition.
1062 # https://crbug.com/853721
1063 continue
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001064 link = fs.readlink(p)
1065 if expected_link == link:
1066 continue
1067 logging.warning(
1068 'Unexpected symlink for cache %s: %s, expected %s',
1069 name, link, expected_link)
1070 else:
1071 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001072 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001073 file_path.rmtree(p)
1074 else:
1075 fs.remove(p)
1076 # Remove unexpected items.
1077 for unexpected in (actual - expected):
1078 try:
1079 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1080 if fs.isdir(p):
1081 file_path.rmtree(p)
1082 else:
1083 fs.remove(p)
1084 except (IOError, OSError) as e:
1085 logging.error('Failed to remove %s: %s', unexpected, e)
1086 success = False
1087 finally:
1088 self._save()
1089 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001090
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001091 # Internal functions.
1092
1093 def _try_upgrade(self):
1094 """Upgrades from the old format to the new one if necessary.
1095
1096 This code can be removed so all bots are known to have the right new format.
1097 """
1098 if not self._lru:
1099 return
1100 _name, (data, _ts) = self._lru.get_oldest()
1101 if isinstance(data, (list, tuple)):
1102 return
1103 # Update to v2.
1104 def upgrade(_name, rel_cache):
1105 abs_cache = os.path.join(self.cache_dir, rel_cache)
1106 return rel_cache, _get_recursive_size(abs_cache)
1107 self._lru.transform(upgrade)
1108 self._save()
1109
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001110 def _remove_lru_item(self):
1111 """Removes the oldest LRU entry. LRU must not be empty."""
1112 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1113 logging.info('Removing named cache %r', name)
1114 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001115 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001116
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001117 def _allocate_dir(self):
1118 """Creates and returns relative path of a new cache directory."""
1119 # We randomly generate directory names that have two lower/upper case
1120 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1121 abc_len = len(self._DIR_ALPHABET)
1122 tried = set()
1123 while len(tried) < 1000:
1124 i = random.randint(0, abc_len * abc_len - 1)
1125 rel_path = (
1126 self._DIR_ALPHABET[i / abc_len] +
1127 self._DIR_ALPHABET[i % abc_len])
1128 if rel_path in tried:
1129 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001130 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001131 if not fs.exists(abs_path):
1132 return rel_path
1133 tried.add(rel_path)
1134 raise NamedCacheError(
1135 'could not allocate a new cache dir, too many cache dirs')
1136
1137 def _remove(self, name):
1138 """Removes a cache directory and entry.
1139
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001140 Returns:
1141 Number of caches deleted.
1142 """
1143 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001144 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001145 named_dir = self._get_named_path(name)
1146 if fs.islink(named_dir):
1147 fs.unlink(named_dir)
1148
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001149 # Then remove the actual data.
1150 if name not in self._lru:
1151 return
1152 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001153 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001154 if os.path.isdir(abs_path):
1155 file_path.rmtree(abs_path)
1156 self._lru.pop(name)
1157
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001158 def _save(self):
1159 self._lock.assert_locked()
1160 file_path.ensure_tree(self.cache_dir)
1161 self._lru.save(self.state_file)
1162
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001163 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001164 return os.path.join(self.cache_dir, self.NAMED_DIR, name)