blob: f5e7213d0ce9b189c11475fc24c4ac1c040b17c3 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
22
23
24# The file size to be used when we don't know the correct file size,
25# generally used for .isolated files.
26UNKNOWN_FILE_SIZE = None
27
28
29def file_write(path, content_generator):
30 """Writes file content as generated by content_generator.
31
32 Creates the intermediary directory as needed.
33
34 Returns the number of bytes written.
35
36 Meant to be mocked out in unit tests.
37 """
38 file_path.ensure_tree(os.path.dirname(path))
39 total = 0
40 with fs.open(path, 'wb') as f:
41 for d in content_generator:
42 total += len(d)
43 f.write(d)
44 return total
45
46
47def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000048 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040049
50 Currently it just checks the file exists and its size matches the expectation.
51 """
52 if size == UNKNOWN_FILE_SIZE:
53 return fs.isfile(path)
54 try:
55 actual_size = fs.stat(path).st_size
56 except OSError as e:
57 logging.warning(
58 'Can\'t read item %s, assuming it\'s invalid: %s',
59 os.path.basename(path), e)
60 return False
61 if size != actual_size:
62 logging.warning(
63 'Found invalid item %s; %d != %d',
64 os.path.basename(path), actual_size, size)
65 return False
66 return True
67
68
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000069def trim_caches(caches, path, min_free_space, max_age_secs):
70 """Trims multiple caches.
71
72 The goal here is to coherently trim all caches in a coherent LRU fashion,
73 deleting older items independent of which container they belong to.
74
75 Two policies are enforced first:
76 - max_age_secs
77 - min_free_space
78
79 Once that's done, then we enforce each cache's own policies.
80
81 Returns:
82 Slice containing the size of all items evicted.
83 """
84 min_ts = time.time() - max_age_secs if max_age_secs else 0
85 free_disk = file_path.get_free_space(path) if min_free_space else 0
86 total = []
87 if min_ts or free_disk:
88 while True:
89 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
90 if not oldest:
91 break
92 oldest.sort(key=lambda (_, ts): ts)
93 c, ts = oldest[0]
94 if ts >= min_ts and free_disk >= min_free_space:
95 break
96 total.append(c.remove_oldest())
97 if min_free_space:
98 free_disk = file_path.get_free_space(path)
99 # Evaluate each cache's own policies.
100 for c in caches:
101 total.extend(c.trim())
102 return total
103
104
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000105def _get_recursive_size(path):
106 """Returns the total data size for the specified path.
107
108 This function can be surprisingly slow on OSX, so its output should be cached.
109 """
110 try:
111 total = 0
112 for root, _, files in fs.walk(path):
113 for f in files:
114 total += fs.lstat(os.path.join(root, f)).st_size
115 return total
116 except (IOError, OSError, UnicodeEncodeError) as exc:
117 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
118 return None
119
120
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400121class NamedCacheError(Exception):
122 """Named cache specific error."""
123
124
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400125class NoMoreSpace(Exception):
126 """Not enough space to map the whole directory."""
127 pass
128
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400129
130class CachePolicies(object):
131 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
132 """Common caching policies for the multiple caches (isolated, named, cipd).
133
134 Arguments:
135 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
136 cache is effectively a leak.
137 - min_free_space: Trim if disk free space becomes lower than this value. If
138 0, it will unconditionally fill the disk.
139 - max_items: Maximum number of items to keep in the cache. If 0, do not
140 enforce a limit.
141 - max_age_secs: Maximum age an item is kept in the cache until it is
142 automatically evicted. Having a lot of dead luggage slows
143 everything down.
144 """
145 self.max_cache_size = max_cache_size
146 self.min_free_space = min_free_space
147 self.max_items = max_items
148 self.max_age_secs = max_age_secs
149
150 def __str__(self):
151 return (
152 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
153 'max_age_secs=%s)') % (
154 self.max_cache_size, self.max_items, self.min_free_space,
155 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400156
157
158class CacheMiss(Exception):
159 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400160 def __init__(self, digest):
161 self.digest = digest
162 super(CacheMiss, self).__init__(
163 'Item with digest %r is not found in cache' % digest)
164
165
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400166class Cache(object):
167 def __init__(self, cache_dir):
168 if cache_dir is not None:
169 assert isinstance(cache_dir, unicode), cache_dir
170 assert file_path.isabs(cache_dir), cache_dir
171 self.cache_dir = cache_dir
172 self._lock = threading_utils.LockWithAssert()
173 # Profiling values.
174 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400175 self._used = []
176
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000177 def __nonzero__(self):
178 """A cache is always True.
179
180 Otherwise it falls back to __len__, which is surprising.
181 """
182 return True
183
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000184 def __len__(self):
185 """Returns the number of entries in the cache."""
186 raise NotImplementedError()
187
188 def __iter__(self):
189 """Iterates over all the entries names."""
190 raise NotImplementedError()
191
192 def __contains__(self, name):
193 """Returns if an entry is in the cache."""
194 raise NotImplementedError()
195
196 @property
197 def total_size(self):
198 """Returns the total size of the cache in bytes."""
199 raise NotImplementedError()
200
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400201 @property
202 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000203 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400204 with self._lock:
205 return self._added[:]
206
207 @property
208 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000209 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400210 with self._lock:
211 return self._used[:]
212
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000213 def get_oldest(self):
214 """Returns timestamp of oldest cache entry or None.
215
216 Returns:
217 Timestamp of the oldest item.
218
219 Used for manual trimming.
220 """
221 raise NotImplementedError()
222
223 def remove_oldest(self):
224 """Removes the oldest item from the cache.
225
226 Returns:
227 Size of the oldest item.
228
229 Used for manual trimming.
230 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400231 raise NotImplementedError()
232
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000233 def save(self):
234 """Saves the current cache to disk."""
235 raise NotImplementedError()
236
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400237 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000238 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400239
240 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000241 Slice with the size of evicted items.
242 """
243 raise NotImplementedError()
244
245 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000246 """Deletes any corrupted item from the cache, then calls trim(), then
247 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000248
249 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400250 """
251 raise NotImplementedError()
252
253
254class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400255 """Content addressed cache that stores objects temporarily.
256
257 It can be accessed concurrently from multiple threads, so it should protect
258 its internal state with some lock.
259 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400260
261 def __enter__(self):
262 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000263 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400264 return self
265
266 def __exit__(self, _exc_type, _exec_value, _traceback):
267 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000268 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400269 return False
270
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400271 def touch(self, digest, size):
272 """Ensures item is not corrupted and updates its LRU position.
273
274 Arguments:
275 digest: hash digest of item to check.
276 size: expected size of this item.
277
278 Returns:
279 True if item is in cache and not corrupted.
280 """
281 raise NotImplementedError()
282
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400283 def getfileobj(self, digest):
284 """Returns a readable file like object.
285
286 If file exists on the file system it will have a .name attribute with an
287 absolute path to the file.
288 """
289 raise NotImplementedError()
290
291 def write(self, digest, content):
292 """Reads data from |content| generator and stores it in cache.
293
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000294 It is possible to write to an object that already exists. It may be
295 ignored (sent to /dev/null) but the timestamp is still updated.
296
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297 Returns digest to simplify chaining.
298 """
299 raise NotImplementedError()
300
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301
302class MemoryContentAddressedCache(ContentAddressedCache):
303 """ContentAddressedCache implementation that stores everything in memory."""
304
305 def __init__(self, file_mode_mask=0500):
306 """Args:
307 file_mode_mask: bit mask to AND file mode with. Default value will make
308 all mapped files to be read only.
309 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400310 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400311 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000312 # Items in a LRU lookup dict(digest: size).
313 self._lru = lru.LRUDict()
314
315 # Cache interface implementation.
316
317 def __len__(self):
318 with self._lock:
319 return len(self._lru)
320
321 def __iter__(self):
322 # This is not thread-safe.
323 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400324
325 def __contains__(self, digest):
326 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000327 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400328
329 @property
330 def total_size(self):
331 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000332 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000334 def get_oldest(self):
335 with self._lock:
336 try:
337 # (key, (value, ts))
338 return self._lru.get_oldest()[1][1]
339 except KeyError:
340 return None
341
342 def remove_oldest(self):
343 with self._lock:
344 # TODO(maruel): Update self._added.
345 # (key, (value, ts))
346 return len(self._lru.pop_oldest()[1][0])
347
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000348 def save(self):
349 pass
350
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000351 def trim(self):
352 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000353 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400354
355 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000356 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357 pass
358
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000359 # ContentAddressedCache interface implementation.
360
361 def __contains__(self, digest):
362 with self._lock:
363 return digest in self._lru
364
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365 def touch(self, digest, size):
366 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000367 try:
368 self._lru.touch(digest)
369 except KeyError:
370 return False
371 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400373 def getfileobj(self, digest):
374 with self._lock:
375 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000376 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400377 except KeyError:
378 raise CacheMiss(digest)
379 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000380 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400381 return io.BytesIO(d)
382
383 def write(self, digest, content):
384 # Assemble whole stream before taking the lock.
385 data = ''.join(content)
386 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000387 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400388 self._added.append(len(data))
389 return digest
390
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392class DiskContentAddressedCache(ContentAddressedCache):
393 """Stateful LRU cache in a flat hash table in a directory.
394
395 Saves its state as json file.
396 """
397 STATE_FILE = u'state.json'
398
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000399 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400400 """
401 Arguments:
402 cache_dir: directory where to place the cache.
403 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000405 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400406 """
407 # All protected methods (starting with '_') except _path should be called
408 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400409 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400411 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
412 # Items in a LRU lookup dict(digest: size).
413 self._lru = lru.LRUDict()
414 # Current cached free disk space. It is updated by self._trim().
415 file_path.ensure_tree(self.cache_dir)
416 self._free_disk = file_path.get_free_space(self.cache_dir)
417 # The first item in the LRU cache that must not be evicted during this run
418 # since it was referenced. All items more recent that _protected in the LRU
419 # cache are also inherently protected. It could be a set() of all items
420 # referenced but this increases memory usage without a use case.
421 self._protected = None
422 # Cleanup operations done by self._load(), if any.
423 self._operations = []
424 with tools.Profiler('Setup'):
425 with self._lock:
426 self._load(trim, time_fn)
427
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000428 # Cache interface implementation.
429
430 def __len__(self):
431 with self._lock:
432 return len(self._lru)
433
434 def __iter__(self):
435 # This is not thread-safe.
436 return self._lru.__iter__()
437
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 def __contains__(self, digest):
439 with self._lock:
440 return digest in self._lru
441
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400442 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 def total_size(self):
444 with self._lock:
445 return sum(self._lru.itervalues())
446
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000447 def get_oldest(self):
448 with self._lock:
449 try:
450 # (key, (value, ts))
451 return self._lru.get_oldest()[1][1]
452 except KeyError:
453 return None
454
455 def remove_oldest(self):
456 with self._lock:
457 # TODO(maruel): Update self._added.
458 return self._remove_lru_file(True)
459
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000460 def save(self):
461 with self._lock:
462 return self._save()
463
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000464 def trim(self):
465 """Forces retention policies."""
466 with self._lock:
467 return self._trim()
468
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400469 def cleanup(self):
470 """Cleans up the cache directory.
471
472 Ensures there is no unknown files in cache_dir.
473 Ensures the read-only bits are set correctly.
474
475 At that point, the cache was already loaded, trimmed to respect cache
476 policies.
477 """
478 with self._lock:
479 fs.chmod(self.cache_dir, 0700)
480 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000481 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400482 # It'd be faster if there were a readdir() function.
483 for filename in fs.listdir(self.cache_dir):
484 if filename == self.STATE_FILE:
485 fs.chmod(os.path.join(self.cache_dir, filename), 0600)
486 continue
487 if filename in previous:
488 fs.chmod(os.path.join(self.cache_dir, filename), 0400)
489 previous.remove(filename)
490 continue
491
492 # An untracked file. Delete it.
493 logging.warning('Removing unknown file %s from cache', filename)
494 p = self._path(filename)
495 if fs.isdir(p):
496 try:
497 file_path.rmtree(p)
498 except OSError:
499 pass
500 else:
501 file_path.try_remove(p)
502 continue
503
504 if previous:
505 # Filter out entries that were not found.
506 logging.warning('Removed %d lost files', len(previous))
507 for filename in previous:
508 self._lru.pop(filename)
509 self._save()
510
511 # What remains to be done is to hash every single item to
512 # detect corruption, then save to ensure state.json is up to date.
513 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
514 # TODO(maruel): Let's revisit once directory metadata is stored in
515 # state.json so only the files that had been mapped since the last cleanup()
516 # call are manually verified.
517 #
518 #with self._lock:
519 # for digest in self._lru:
520 # if not isolated_format.is_valid_hash(
521 # self._path(digest), self.hash_algo):
522 # self.evict(digest)
523 # logging.info('Deleted corrupted item: %s', digest)
524
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000525 # ContentAddressedCache interface implementation.
526
527 def __contains__(self, digest):
528 with self._lock:
529 return digest in self._lru
530
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400531 def touch(self, digest, size):
532 """Verifies an actual file is valid and bumps its LRU position.
533
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000534 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400535
536 Note that is doesn't compute the hash so it could still be corrupted if the
537 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400538 """
539 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000540 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400541
542 # Update its LRU position.
543 with self._lock:
544 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000545 if looks_valid:
546 # Exists but not in the LRU anymore.
547 self._delete_file(digest, size)
548 return False
549 if not looks_valid:
550 self._lru.pop(digest)
551 # Exists but not in the LRU anymore.
552 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400553 return False
554 self._lru.touch(digest)
555 self._protected = self._protected or digest
556 return True
557
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400558 def getfileobj(self, digest):
559 try:
560 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400561 except IOError:
562 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000563 with self._lock:
564 try:
565 self._used.append(self._lru[digest])
566 except KeyError:
567 # If the digest is not actually in _lru, assume it is a cache miss.
568 # Existing file will be overwritten by whoever uses the cache and added
569 # to _lru.
570 f.close()
571 raise CacheMiss(digest)
572 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400573
574 def write(self, digest, content):
575 assert content is not None
576 with self._lock:
577 self._protected = self._protected or digest
578 path = self._path(digest)
579 # A stale broken file may remain. It is possible for the file to have write
580 # access bit removed which would cause the file_write() call to fail to open
581 # in write mode. Take no chance here.
582 file_path.try_remove(path)
583 try:
584 size = file_write(path, content)
585 except:
586 # There are two possible places were an exception can occur:
587 # 1) Inside |content| generator in case of network or unzipping errors.
588 # 2) Inside file_write itself in case of disk IO errors.
589 # In any case delete an incomplete file and propagate the exception to
590 # caller, it will be logged there.
591 file_path.try_remove(path)
592 raise
593 # Make the file read-only in the cache. This has a few side-effects since
594 # the file node is modified, so every directory entries to this file becomes
595 # read-only. It's fine here because it is a new file.
596 file_path.set_read_only(path, True)
597 with self._lock:
598 self._add(digest, size)
599 return digest
600
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000601 # Internal functions.
602
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400603 def _load(self, trim, time_fn):
604 """Loads state of the cache from json file.
605
606 If cache_dir does not exist on disk, it is created.
607 """
608 self._lock.assert_locked()
609
610 if not fs.isfile(self.state_file):
611 if not fs.isdir(self.cache_dir):
612 fs.makedirs(self.cache_dir)
613 else:
614 # Load state of the cache.
615 try:
616 self._lru = lru.LRUDict.load(self.state_file)
617 except ValueError as err:
618 logging.error('Failed to load cache state: %s' % (err,))
619 # Don't want to keep broken state file.
620 file_path.try_remove(self.state_file)
621 if time_fn:
622 self._lru.time_fn = time_fn
623 if trim:
624 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400625
626 def _save(self):
627 """Saves the LRU ordering."""
628 self._lock.assert_locked()
629 if sys.platform != 'win32':
630 d = os.path.dirname(self.state_file)
631 if fs.isdir(d):
632 # Necessary otherwise the file can't be created.
633 file_path.set_read_only(d, False)
634 if fs.isfile(self.state_file):
635 file_path.set_read_only(self.state_file, False)
636 self._lru.save(self.state_file)
637
638 def _trim(self):
639 """Trims anything we don't know, make sure enough free space exists."""
640 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000641 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400642
643 # Trim old items.
644 if self.policies.max_age_secs:
645 cutoff = self._lru.time_fn() - self.policies.max_age_secs
646 while self._lru:
647 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000648 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400649 if oldest[1][1] >= cutoff:
650 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000651 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400652
653 # Ensure maximum cache size.
654 if self.policies.max_cache_size:
655 total_size = sum(self._lru.itervalues())
656 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000657 e = self._remove_lru_file(True)
658 evicted.append(e)
659 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400660
661 # Ensure maximum number of items in the cache.
662 if self.policies.max_items and len(self._lru) > self.policies.max_items:
663 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000664 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400665
666 # Ensure enough free space.
667 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400668 while (
669 self.policies.min_free_space and
670 self._lru and
671 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000672 # self._free_disk is updated by this call.
673 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400674
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000675 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400676 total_usage = sum(self._lru.itervalues())
677 usage_percent = 0.
678 if total_usage:
679 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
680
681 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000682 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
683 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
684 '%.1fkb)',
685 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400686 self._free_disk / 1024.,
687 total_usage / 1024.,
688 usage_percent,
689 self.policies.max_cache_size / 1024.)
690 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000691 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400692
693 def _path(self, digest):
694 """Returns the path to one item."""
695 return os.path.join(self.cache_dir, digest)
696
697 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 """Removes the lastest recently used file and returns its size.
699
700 Updates self._free_disk.
701 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400702 self._lock.assert_locked()
703 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000704 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 if not allow_protected and digest == self._protected:
706 total_size = sum(self._lru.itervalues())+size
707 msg = (
708 'Not enough space to fetch the whole isolated tree.\n'
709 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
710 self.policies, total_size, len(self._lru)+1, self._free_disk)
711 raise NoMoreSpace(msg)
712 except KeyError:
713 # That means an internal error.
714 raise NoMoreSpace('Nothing to remove, can\'t happend')
715 digest, (size, _) = self._lru.pop_oldest()
716 logging.debug('Removing LRU file %s', digest)
717 self._delete_file(digest, size)
718 return size
719
720 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
721 """Adds an item into LRU cache marking it as a newest one."""
722 self._lock.assert_locked()
723 if size == UNKNOWN_FILE_SIZE:
724 size = fs.stat(self._path(digest)).st_size
725 self._added.append(size)
726 self._lru.add(digest, size)
727 self._free_disk -= size
728 # Do a quicker version of self._trim(). It only enforces free disk space,
729 # not cache size limits. It doesn't actually look at real free disk space,
730 # only uses its cache values. self._trim() will be called later to enforce
731 # real trimming but doing this quick version here makes it possible to map
732 # an isolated that is larger than the current amount of free disk space when
733 # the cache size is already large.
734 while (
735 self.policies.min_free_space and
736 self._lru and
737 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000738 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400739 if self._remove_lru_file(False) == -1:
740 break
741
742 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000743 """Deletes cache file from the file system.
744
745 Updates self._free_disk.
746 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400747 self._lock.assert_locked()
748 try:
749 if size == UNKNOWN_FILE_SIZE:
750 try:
751 size = fs.stat(self._path(digest)).st_size
752 except OSError:
753 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000754 if file_path.try_remove(self._path(digest)):
755 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400756 except OSError as e:
757 if e.errno != errno.ENOENT:
758 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400759
760
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400761class NamedCache(Cache):
762 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400763
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400764 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400765 name is a short identifier that describes the contents of the cache, e.g.
766 "git_v8" could be all git repositories required by v8 builds, or
767 "build_chromium" could be build artefacts of the Chromium.
768 path is a directory path relative to the task run dir. Cache installation
769 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400770 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400771 _DIR_ALPHABET = string.ascii_letters + string.digits
772 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000773 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400774
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400775 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400776 """Initializes NamedCaches.
777
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400778 Arguments:
779 - cache_dir is a directory for persistent cache storage.
780 - policies is a CachePolicies instance.
781 - time_fn is a function that returns timestamp (float) and used to take
782 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400783 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400784 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400785 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000786 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400787 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
788 self._lru = lru.LRUDict()
789 if not fs.isdir(self.cache_dir):
790 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000791 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000792 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400793 self._lru = lru.LRUDict.load(self.state_file)
794 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000795 logging.exception(
796 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400797 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000798 with self._lock:
799 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400800 if time_fn:
801 self._lru.time_fn = time_fn
802
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803 @property
804 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000805 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400806 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000807 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000809 def install(self, dst, name):
810 """Creates the directory |dst| and moves a previous named cache |name| if it
811 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400812
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000813 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400814
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000815 Returns the reused named cache size in bytes, or 0 if none was present.
816
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817 Raises NamedCacheError if cannot install the cache.
818 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000819 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400820 with self._lock:
821 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000822 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000824 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000826 # Remove the named symlink if it exists.
827 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000828 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000829 # Remove the symlink itself, not its destination.
830 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000831
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000832 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000833 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000835 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000836 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000837 file_path.ensure_tree(os.path.dirname(dst))
838 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400839 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000840 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400841
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000842 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400843 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400844
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000845 # The named cache does not exist, create an empty directory. When
846 # uninstalling, we will move it back to the cache and create an an
847 # entry.
848 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000849 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000850 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400851 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000852 # Raise using the original traceback.
853 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000854 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000855 raise exc, None, sys.exc_info()[2]
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000856 finally:
857 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000859 def uninstall(self, src, name):
860 """Moves the cache directory back into the named cache hive for an eventual
861 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400862
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000863 The opposite of install().
864
865 src must be absolute and unicode. Its content is moved back into the local
866 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400867
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000868 Returns the named cache size in bytes.
869
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400870 Raises NamedCacheError if cannot uninstall the cache.
871 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000872 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400873 with self._lock:
874 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000875 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000877 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000878 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400880
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000881 if name in self._lru:
882 # This shouldn't happen but just remove the preexisting one and move
883 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000884 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000885 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000886
887 # Calculate the size of the named cache to keep. It's important because
888 # if size is zero (it's empty), we do not want to add it back to the
889 # named caches cache.
890 size = _get_recursive_size(src)
891 logging.info('- Size is %d', size)
892 if not size:
893 # Do not save empty named cache.
894 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400895
896 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000897 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400898 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000899 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400900 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000901 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400902
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000903 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000904 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000905
906 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
907 # for user convenience.
908 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000909 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000910 file_path.remove(named_path)
911 else:
912 file_path.ensure_tree(os.path.dirname(named_path))
913
914 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000915 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000916 logging.info(
917 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000918 except OSError:
919 # Ignore on Windows. It happens when running as a normal user or when
920 # UAC is enabled and the user is a filtered administrator account.
921 if sys.platform != 'win32':
922 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000923 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400924 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000925 # Raise using the original traceback.
926 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000927 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000928 raise exc, None, sys.exc_info()[2]
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000929 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000930 # Call save() at every uninstall. The assumptions are:
931 # - The total the number of named caches is low, so the state.json file
932 # is small, so the time it takes to write it to disk is short.
933 # - The number of mapped named caches per task is low, so the number of
934 # times save() is called on tear-down isn't high enough to be
935 # significant.
936 # - uninstall() sometimes throws due to file locking on Windows or
937 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000938 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400939
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000940 # Cache interface implementation.
941
942 def __len__(self):
943 with self._lock:
944 return len(self._lru)
945
946 def __iter__(self):
947 # This is not thread-safe.
948 return self._lru.__iter__()
949
950 def __contains__(self, digest):
951 with self._lock:
952 return digest in self._lru
953
954 @property
955 def total_size(self):
956 with self._lock:
957 return sum(size for _rel_path, size in self._lru.itervalues())
958
959 def get_oldest(self):
960 with self._lock:
961 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000962 # (key, (value, ts))
963 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000964 except KeyError:
965 return None
966
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000967 def remove_oldest(self):
968 with self._lock:
969 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000970 _name, size = self._remove_lru_item()
971 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000972
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000973 def save(self):
974 with self._lock:
975 return self._save()
976
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400977 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000978 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400979 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000980 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000981 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400982
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400983 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000984 if self._policies.max_items:
985 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000986 name, size = self._remove_lru_item()
987 evicted.append(size)
988 logging.info(
989 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
990 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400991
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400992 # Trim according to maximum age.
993 if self._policies.max_age_secs:
994 cutoff = self._lru.time_fn() - self._policies.max_age_secs
995 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000996 _name, (_data, ts) = self._lru.get_oldest()
997 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400998 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000999 name, size = self._remove_lru_item()
1000 evicted.append(size)
1001 logging.info(
1002 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1003 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001004
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001005 # Trim according to minimum free space.
1006 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001007 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001008 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001009 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001010 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001011 name, size = self._remove_lru_item()
1012 evicted.append(size)
1013 logging.info(
1014 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1015 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001016
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001017 # Trim according to maximum total size.
1018 if self._policies.max_cache_size:
1019 while self._lru:
1020 total = sum(size for _rel_cache, size in self._lru.itervalues())
1021 if total <= self._policies.max_cache_size:
1022 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001023 name, size = self._remove_lru_item()
1024 evicted.append(size)
1025 logging.info(
1026 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1027 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001028
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001029 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001030 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001031
1032 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001033 """Removes unknown directories.
1034
1035 Does not recalculate the cache size since it's surprisingly slow on some
1036 OSes.
1037 """
1038 success = True
1039 with self._lock:
1040 try:
1041 actual = set(fs.listdir(self.cache_dir))
1042 actual.discard(self.NAMED_DIR)
1043 actual.discard(self.STATE_FILE)
1044 expected = {v[0]: k for k, v in self._lru.iteritems()}
1045 # First, handle the actual cache content.
1046 # Remove missing entries.
1047 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001048 name, size = self._lru.pop(expected[missing])
1049 logging.warning(
1050 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001051 # Remove unexpected items.
1052 for unexpected in (actual - set(expected)):
1053 try:
1054 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001055 logging.warning(
1056 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001057 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001058 file_path.rmtree(p)
1059 else:
1060 fs.remove(p)
1061 except (IOError, OSError) as e:
1062 logging.error('Failed to remove %s: %s', unexpected, e)
1063 success = False
1064
1065 # Second, fix named cache links.
1066 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001067 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001068 actual = set(fs.listdir(named))
1069 expected = set(self._lru)
1070 # Confirm entries. Do not add missing ones for now.
1071 for name in expected.intersection(actual):
1072 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001073 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001074 if fs.islink(p):
1075 link = fs.readlink(p)
1076 if expected_link == link:
1077 continue
1078 logging.warning(
1079 'Unexpected symlink for cache %s: %s, expected %s',
1080 name, link, expected_link)
1081 else:
1082 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001083 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001084 file_path.rmtree(p)
1085 else:
1086 fs.remove(p)
1087 # Remove unexpected items.
1088 for unexpected in (actual - expected):
1089 try:
1090 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1091 if fs.isdir(p):
1092 file_path.rmtree(p)
1093 else:
1094 fs.remove(p)
1095 except (IOError, OSError) as e:
1096 logging.error('Failed to remove %s: %s', unexpected, e)
1097 success = False
1098 finally:
1099 self._save()
1100 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001101
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001102 # Internal functions.
1103
1104 def _try_upgrade(self):
1105 """Upgrades from the old format to the new one if necessary.
1106
1107 This code can be removed so all bots are known to have the right new format.
1108 """
1109 if not self._lru:
1110 return
1111 _name, (data, _ts) = self._lru.get_oldest()
1112 if isinstance(data, (list, tuple)):
1113 return
1114 # Update to v2.
1115 def upgrade(_name, rel_cache):
1116 abs_cache = os.path.join(self.cache_dir, rel_cache)
1117 return rel_cache, _get_recursive_size(abs_cache)
1118 self._lru.transform(upgrade)
1119 self._save()
1120
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001121 def _remove_lru_item(self):
1122 """Removes the oldest LRU entry. LRU must not be empty."""
1123 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1124 logging.info('Removing named cache %r', name)
1125 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001126 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001127
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001128 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001129 """Creates and returns relative path of a new cache directory.
1130
1131 In practice, it is a 2-letter string.
1132 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001133 # We randomly generate directory names that have two lower/upper case
1134 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1135 abc_len = len(self._DIR_ALPHABET)
1136 tried = set()
1137 while len(tried) < 1000:
1138 i = random.randint(0, abc_len * abc_len - 1)
1139 rel_path = (
1140 self._DIR_ALPHABET[i / abc_len] +
1141 self._DIR_ALPHABET[i % abc_len])
1142 if rel_path in tried:
1143 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001144 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001145 if not fs.exists(abs_path):
1146 return rel_path
1147 tried.add(rel_path)
1148 raise NamedCacheError(
1149 'could not allocate a new cache dir, too many cache dirs')
1150
1151 def _remove(self, name):
1152 """Removes a cache directory and entry.
1153
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001154 Returns:
1155 Number of caches deleted.
1156 """
1157 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001158 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001159 named_dir = self._get_named_path(name)
1160 if fs.islink(named_dir):
1161 fs.unlink(named_dir)
1162
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001163 # Then remove the actual data.
1164 if name not in self._lru:
1165 return
1166 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001167 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001168 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001169 file_path.rmtree(abs_path)
1170 self._lru.pop(name)
1171
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001172 def _save(self):
1173 self._lock.assert_locked()
1174 file_path.ensure_tree(self.cache_dir)
1175 self._lru.save(self.state_file)
1176
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001177 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001178 return os.path.join(self.cache_dir, self.NAMED_DIR, name)