blob: 283c811929e794c7b2ce3cc4c35cc7e621ba410e [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
13import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040014import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000015import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016
17from utils import file_path
18from utils import fs
19from utils import lru
20from utils import threading_utils
21from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000022tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040023
Lei Leife202df2019-06-11 17:33:34 +000024# third_party/
25import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040026
27# The file size to be used when we don't know the correct file size,
28# generally used for .isolated files.
29UNKNOWN_FILE_SIZE = None
30
31
32def file_write(path, content_generator):
33 """Writes file content as generated by content_generator.
34
35 Creates the intermediary directory as needed.
36
37 Returns the number of bytes written.
38
39 Meant to be mocked out in unit tests.
40 """
41 file_path.ensure_tree(os.path.dirname(path))
42 total = 0
43 with fs.open(path, 'wb') as f:
44 for d in content_generator:
45 total += len(d)
46 f.write(d)
47 return total
48
49
50def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000051 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040052
53 Currently it just checks the file exists and its size matches the expectation.
54 """
55 if size == UNKNOWN_FILE_SIZE:
56 return fs.isfile(path)
57 try:
58 actual_size = fs.stat(path).st_size
59 except OSError as e:
60 logging.warning(
61 'Can\'t read item %s, assuming it\'s invalid: %s',
62 os.path.basename(path), e)
63 return False
64 if size != actual_size:
65 logging.warning(
66 'Found invalid item %s; %d != %d',
67 os.path.basename(path), actual_size, size)
68 return False
69 return True
70
71
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000072def trim_caches(caches, path, min_free_space, max_age_secs):
73 """Trims multiple caches.
74
75 The goal here is to coherently trim all caches in a coherent LRU fashion,
76 deleting older items independent of which container they belong to.
77
78 Two policies are enforced first:
79 - max_age_secs
80 - min_free_space
81
82 Once that's done, then we enforce each cache's own policies.
83
84 Returns:
85 Slice containing the size of all items evicted.
86 """
87 min_ts = time.time() - max_age_secs if max_age_secs else 0
88 free_disk = file_path.get_free_space(path) if min_free_space else 0
89 total = []
90 if min_ts or free_disk:
91 while True:
92 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
93 if not oldest:
94 break
Lei Leife202df2019-06-11 17:33:34 +000095 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000096 c, ts = oldest[0]
97 if ts >= min_ts and free_disk >= min_free_space:
98 break
99 total.append(c.remove_oldest())
100 if min_free_space:
101 free_disk = file_path.get_free_space(path)
102 # Evaluate each cache's own policies.
103 for c in caches:
104 total.extend(c.trim())
105 return total
106
107
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000108def _get_recursive_size(path):
109 """Returns the total data size for the specified path.
110
111 This function can be surprisingly slow on OSX, so its output should be cached.
112 """
113 try:
114 total = 0
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000115 for root, _, files in fs.walk(path):
116 for f in files:
117 total += fs.lstat(os.path.join(root, f)).st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000118 return total
119 except (IOError, OSError, UnicodeEncodeError) as exc:
120 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
121 return None
122
123
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400124class NamedCacheError(Exception):
125 """Named cache specific error."""
126
127
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400128class NoMoreSpace(Exception):
129 """Not enough space to map the whole directory."""
130 pass
131
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400132
133class CachePolicies(object):
134 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
135 """Common caching policies for the multiple caches (isolated, named, cipd).
136
137 Arguments:
138 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
139 cache is effectively a leak.
140 - min_free_space: Trim if disk free space becomes lower than this value. If
141 0, it will unconditionally fill the disk.
142 - max_items: Maximum number of items to keep in the cache. If 0, do not
143 enforce a limit.
144 - max_age_secs: Maximum age an item is kept in the cache until it is
145 automatically evicted. Having a lot of dead luggage slows
146 everything down.
147 """
148 self.max_cache_size = max_cache_size
149 self.min_free_space = min_free_space
150 self.max_items = max_items
151 self.max_age_secs = max_age_secs
152
153 def __str__(self):
154 return (
155 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
156 'max_age_secs=%s)') % (
157 self.max_cache_size, self.max_items, self.min_free_space,
158 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400159
160
161class CacheMiss(Exception):
162 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400163 def __init__(self, digest):
164 self.digest = digest
165 super(CacheMiss, self).__init__(
166 'Item with digest %r is not found in cache' % digest)
167
168
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400169class Cache(object):
170 def __init__(self, cache_dir):
171 if cache_dir is not None:
172 assert isinstance(cache_dir, unicode), cache_dir
173 assert file_path.isabs(cache_dir), cache_dir
174 self.cache_dir = cache_dir
175 self._lock = threading_utils.LockWithAssert()
176 # Profiling values.
177 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400178 self._used = []
179
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000180 def __nonzero__(self):
181 """A cache is always True.
182
183 Otherwise it falls back to __len__, which is surprising.
184 """
185 return True
186
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000187 def __len__(self):
188 """Returns the number of entries in the cache."""
189 raise NotImplementedError()
190
191 def __iter__(self):
192 """Iterates over all the entries names."""
193 raise NotImplementedError()
194
195 def __contains__(self, name):
196 """Returns if an entry is in the cache."""
197 raise NotImplementedError()
198
199 @property
200 def total_size(self):
201 """Returns the total size of the cache in bytes."""
202 raise NotImplementedError()
203
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400204 @property
205 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000206 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 with self._lock:
208 return self._added[:]
209
210 @property
211 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000212 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400213 with self._lock:
214 return self._used[:]
215
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000216 def get_oldest(self):
217 """Returns timestamp of oldest cache entry or None.
218
219 Returns:
220 Timestamp of the oldest item.
221
222 Used for manual trimming.
223 """
224 raise NotImplementedError()
225
226 def remove_oldest(self):
227 """Removes the oldest item from the cache.
228
229 Returns:
230 Size of the oldest item.
231
232 Used for manual trimming.
233 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400234 raise NotImplementedError()
235
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000236 def save(self):
237 """Saves the current cache to disk."""
238 raise NotImplementedError()
239
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400240 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000241 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400242
243 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000244 Slice with the size of evicted items.
245 """
246 raise NotImplementedError()
247
248 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000249 """Deletes any corrupted item from the cache, then calls trim(), then
250 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000251
252 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400253 """
254 raise NotImplementedError()
255
256
257class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400258 """Content addressed cache that stores objects temporarily.
259
260 It can be accessed concurrently from multiple threads, so it should protect
261 its internal state with some lock.
262 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400263
264 def __enter__(self):
265 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000266 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267 return self
268
269 def __exit__(self, _exc_type, _exec_value, _traceback):
270 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000271 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400272 return False
273
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400274 def touch(self, digest, size):
275 """Ensures item is not corrupted and updates its LRU position.
276
277 Arguments:
278 digest: hash digest of item to check.
279 size: expected size of this item.
280
281 Returns:
282 True if item is in cache and not corrupted.
283 """
284 raise NotImplementedError()
285
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400286 def getfileobj(self, digest):
287 """Returns a readable file like object.
288
289 If file exists on the file system it will have a .name attribute with an
290 absolute path to the file.
291 """
292 raise NotImplementedError()
293
294 def write(self, digest, content):
295 """Reads data from |content| generator and stores it in cache.
296
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000297 It is possible to write to an object that already exists. It may be
298 ignored (sent to /dev/null) but the timestamp is still updated.
299
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400300 Returns digest to simplify chaining.
301 """
302 raise NotImplementedError()
303
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304
305class MemoryContentAddressedCache(ContentAddressedCache):
306 """ContentAddressedCache implementation that stores everything in memory."""
307
Lei Leife202df2019-06-11 17:33:34 +0000308 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400309 """Args:
310 file_mode_mask: bit mask to AND file mode with. Default value will make
311 all mapped files to be read only.
312 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400313 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400314 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000315 # Items in a LRU lookup dict(digest: size).
316 self._lru = lru.LRUDict()
317
318 # Cache interface implementation.
319
320 def __len__(self):
321 with self._lock:
322 return len(self._lru)
323
324 def __iter__(self):
325 # This is not thread-safe.
326 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400327
328 def __contains__(self, digest):
329 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000330 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400331
332 @property
333 def total_size(self):
334 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000335 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400336
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000337 def get_oldest(self):
338 with self._lock:
339 try:
340 # (key, (value, ts))
341 return self._lru.get_oldest()[1][1]
342 except KeyError:
343 return None
344
345 def remove_oldest(self):
346 with self._lock:
347 # TODO(maruel): Update self._added.
348 # (key, (value, ts))
349 return len(self._lru.pop_oldest()[1][0])
350
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000351 def save(self):
352 pass
353
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000354 def trim(self):
355 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000356 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400357
358 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000359 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400360 pass
361
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000362 # ContentAddressedCache interface implementation.
363
364 def __contains__(self, digest):
365 with self._lock:
366 return digest in self._lru
367
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400368 def touch(self, digest, size):
369 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000370 try:
371 self._lru.touch(digest)
372 except KeyError:
373 return False
374 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400375
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400376 def getfileobj(self, digest):
377 with self._lock:
378 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000379 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 except KeyError:
381 raise CacheMiss(digest)
382 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000383 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384 return io.BytesIO(d)
385
386 def write(self, digest, content):
387 # Assemble whole stream before taking the lock.
388 data = ''.join(content)
389 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000390 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391 self._added.append(len(data))
392 return digest
393
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394
395class DiskContentAddressedCache(ContentAddressedCache):
396 """Stateful LRU cache in a flat hash table in a directory.
397
398 Saves its state as json file.
399 """
400 STATE_FILE = u'state.json'
401
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000402 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400403 """
404 Arguments:
405 cache_dir: directory where to place the cache.
406 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000408 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409 """
410 # All protected methods (starting with '_') except _path should be called
411 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400412 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400413 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
415 # Items in a LRU lookup dict(digest: size).
416 self._lru = lru.LRUDict()
417 # Current cached free disk space. It is updated by self._trim().
418 file_path.ensure_tree(self.cache_dir)
419 self._free_disk = file_path.get_free_space(self.cache_dir)
420 # The first item in the LRU cache that must not be evicted during this run
421 # since it was referenced. All items more recent that _protected in the LRU
422 # cache are also inherently protected. It could be a set() of all items
423 # referenced but this increases memory usage without a use case.
424 self._protected = None
425 # Cleanup operations done by self._load(), if any.
426 self._operations = []
427 with tools.Profiler('Setup'):
428 with self._lock:
429 self._load(trim, time_fn)
430
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000431 # Cache interface implementation.
432
433 def __len__(self):
434 with self._lock:
435 return len(self._lru)
436
437 def __iter__(self):
438 # This is not thread-safe.
439 return self._lru.__iter__()
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 def __contains__(self, digest):
442 with self._lock:
443 return digest in self._lru
444
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400446 def total_size(self):
447 with self._lock:
448 return sum(self._lru.itervalues())
449
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000450 def get_oldest(self):
451 with self._lock:
452 try:
453 # (key, (value, ts))
454 return self._lru.get_oldest()[1][1]
455 except KeyError:
456 return None
457
458 def remove_oldest(self):
459 with self._lock:
460 # TODO(maruel): Update self._added.
461 return self._remove_lru_file(True)
462
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000463 def save(self):
464 with self._lock:
465 return self._save()
466
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000467 def trim(self):
468 """Forces retention policies."""
469 with self._lock:
470 return self._trim()
471
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400472 def cleanup(self):
473 """Cleans up the cache directory.
474
475 Ensures there is no unknown files in cache_dir.
476 Ensures the read-only bits are set correctly.
477
478 At that point, the cache was already loaded, trimmed to respect cache
479 policies.
480 """
481 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000482 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000484 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485 # It'd be faster if there were a readdir() function.
486 for filename in fs.listdir(self.cache_dir):
487 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000488 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400489 continue
490 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000491 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 previous.remove(filename)
493 continue
494
495 # An untracked file. Delete it.
496 logging.warning('Removing unknown file %s from cache', filename)
497 p = self._path(filename)
498 if fs.isdir(p):
499 try:
500 file_path.rmtree(p)
501 except OSError:
502 pass
503 else:
504 file_path.try_remove(p)
505 continue
506
507 if previous:
508 # Filter out entries that were not found.
509 logging.warning('Removed %d lost files', len(previous))
510 for filename in previous:
511 self._lru.pop(filename)
512 self._save()
513
514 # What remains to be done is to hash every single item to
515 # detect corruption, then save to ensure state.json is up to date.
516 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
517 # TODO(maruel): Let's revisit once directory metadata is stored in
518 # state.json so only the files that had been mapped since the last cleanup()
519 # call are manually verified.
520 #
521 #with self._lock:
522 # for digest in self._lru:
523 # if not isolated_format.is_valid_hash(
524 # self._path(digest), self.hash_algo):
525 # self.evict(digest)
526 # logging.info('Deleted corrupted item: %s', digest)
527
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000528 # ContentAddressedCache interface implementation.
529
530 def __contains__(self, digest):
531 with self._lock:
532 return digest in self._lru
533
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400534 def touch(self, digest, size):
535 """Verifies an actual file is valid and bumps its LRU position.
536
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000537 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400538
539 Note that is doesn't compute the hash so it could still be corrupted if the
540 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400541 """
542 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000543 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400544
545 # Update its LRU position.
546 with self._lock:
547 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000548 if looks_valid:
549 # Exists but not in the LRU anymore.
550 self._delete_file(digest, size)
551 return False
552 if not looks_valid:
553 self._lru.pop(digest)
554 # Exists but not in the LRU anymore.
555 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400556 return False
557 self._lru.touch(digest)
558 self._protected = self._protected or digest
559 return True
560
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400561 def getfileobj(self, digest):
562 try:
563 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400564 except IOError:
565 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000566 with self._lock:
567 try:
568 self._used.append(self._lru[digest])
569 except KeyError:
570 # If the digest is not actually in _lru, assume it is a cache miss.
571 # Existing file will be overwritten by whoever uses the cache and added
572 # to _lru.
573 f.close()
574 raise CacheMiss(digest)
575 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400576
577 def write(self, digest, content):
578 assert content is not None
579 with self._lock:
580 self._protected = self._protected or digest
581 path = self._path(digest)
582 # A stale broken file may remain. It is possible for the file to have write
583 # access bit removed which would cause the file_write() call to fail to open
584 # in write mode. Take no chance here.
585 file_path.try_remove(path)
586 try:
587 size = file_write(path, content)
588 except:
589 # There are two possible places were an exception can occur:
590 # 1) Inside |content| generator in case of network or unzipping errors.
591 # 2) Inside file_write itself in case of disk IO errors.
592 # In any case delete an incomplete file and propagate the exception to
593 # caller, it will be logged there.
594 file_path.try_remove(path)
595 raise
596 # Make the file read-only in the cache. This has a few side-effects since
597 # the file node is modified, so every directory entries to this file becomes
598 # read-only. It's fine here because it is a new file.
599 file_path.set_read_only(path, True)
600 with self._lock:
601 self._add(digest, size)
602 return digest
603
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000604 # Internal functions.
605
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400606 def _load(self, trim, time_fn):
607 """Loads state of the cache from json file.
608
609 If cache_dir does not exist on disk, it is created.
610 """
611 self._lock.assert_locked()
612
613 if not fs.isfile(self.state_file):
614 if not fs.isdir(self.cache_dir):
615 fs.makedirs(self.cache_dir)
616 else:
617 # Load state of the cache.
618 try:
619 self._lru = lru.LRUDict.load(self.state_file)
620 except ValueError as err:
621 logging.error('Failed to load cache state: %s' % (err,))
622 # Don't want to keep broken state file.
623 file_path.try_remove(self.state_file)
624 if time_fn:
625 self._lru.time_fn = time_fn
626 if trim:
627 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 def _save(self):
630 """Saves the LRU ordering."""
631 self._lock.assert_locked()
632 if sys.platform != 'win32':
633 d = os.path.dirname(self.state_file)
634 if fs.isdir(d):
635 # Necessary otherwise the file can't be created.
636 file_path.set_read_only(d, False)
637 if fs.isfile(self.state_file):
638 file_path.set_read_only(self.state_file, False)
639 self._lru.save(self.state_file)
640
641 def _trim(self):
642 """Trims anything we don't know, make sure enough free space exists."""
643 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000644 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400645
646 # Trim old items.
647 if self.policies.max_age_secs:
648 cutoff = self._lru.time_fn() - self.policies.max_age_secs
649 while self._lru:
650 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000651 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400652 if oldest[1][1] >= cutoff:
653 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000654 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400655
656 # Ensure maximum cache size.
657 if self.policies.max_cache_size:
658 total_size = sum(self._lru.itervalues())
659 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000660 e = self._remove_lru_file(True)
661 evicted.append(e)
662 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400663
664 # Ensure maximum number of items in the cache.
665 if self.policies.max_items and len(self._lru) > self.policies.max_items:
666 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000667 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400668
669 # Ensure enough free space.
670 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400671 while (
672 self.policies.min_free_space and
673 self._lru and
674 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000675 # self._free_disk is updated by this call.
676 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400677
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400679 total_usage = sum(self._lru.itervalues())
680 usage_percent = 0.
681 if total_usage:
682 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
683
684 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000685 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
686 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
687 '%.1fkb)',
688 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400689 self._free_disk / 1024.,
690 total_usage / 1024.,
691 usage_percent,
692 self.policies.max_cache_size / 1024.)
693 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695
696 def _path(self, digest):
697 """Returns the path to one item."""
698 return os.path.join(self.cache_dir, digest)
699
700 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000701 """Removes the lastest recently used file and returns its size.
702
703 Updates self._free_disk.
704 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 self._lock.assert_locked()
706 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000707 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400708 if not allow_protected and digest == self._protected:
709 total_size = sum(self._lru.itervalues())+size
710 msg = (
711 'Not enough space to fetch the whole isolated tree.\n'
712 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
713 self.policies, total_size, len(self._lru)+1, self._free_disk)
714 raise NoMoreSpace(msg)
715 except KeyError:
716 # That means an internal error.
717 raise NoMoreSpace('Nothing to remove, can\'t happend')
718 digest, (size, _) = self._lru.pop_oldest()
719 logging.debug('Removing LRU file %s', digest)
720 self._delete_file(digest, size)
721 return size
722
723 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
724 """Adds an item into LRU cache marking it as a newest one."""
725 self._lock.assert_locked()
726 if size == UNKNOWN_FILE_SIZE:
727 size = fs.stat(self._path(digest)).st_size
728 self._added.append(size)
729 self._lru.add(digest, size)
730 self._free_disk -= size
731 # Do a quicker version of self._trim(). It only enforces free disk space,
732 # not cache size limits. It doesn't actually look at real free disk space,
733 # only uses its cache values. self._trim() will be called later to enforce
734 # real trimming but doing this quick version here makes it possible to map
735 # an isolated that is larger than the current amount of free disk space when
736 # the cache size is already large.
737 while (
738 self.policies.min_free_space and
739 self._lru and
740 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000741 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400742 if self._remove_lru_file(False) == -1:
743 break
744
745 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000746 """Deletes cache file from the file system.
747
748 Updates self._free_disk.
749 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400750 self._lock.assert_locked()
751 try:
752 if size == UNKNOWN_FILE_SIZE:
753 try:
754 size = fs.stat(self._path(digest)).st_size
755 except OSError:
756 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000757 if file_path.try_remove(self._path(digest)):
758 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 except OSError as e:
760 if e.errno != errno.ENOENT:
761 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400762
763
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400764class NamedCache(Cache):
765 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400766
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400767 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400768 name is a short identifier that describes the contents of the cache, e.g.
769 "git_v8" could be all git repositories required by v8 builds, or
770 "build_chromium" could be build artefacts of the Chromium.
771 path is a directory path relative to the task run dir. Cache installation
772 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400773 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400774 _DIR_ALPHABET = string.ascii_letters + string.digits
775 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000776 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400777
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400778 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400779 """Initializes NamedCaches.
780
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400781 Arguments:
782 - cache_dir is a directory for persistent cache storage.
783 - policies is a CachePolicies instance.
784 - time_fn is a function that returns timestamp (float) and used to take
785 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400786 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400787 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400788 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000789 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400790 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
791 self._lru = lru.LRUDict()
792 if not fs.isdir(self.cache_dir):
793 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000794 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000795 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400796 self._lru = lru.LRUDict.load(self.state_file)
797 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000798 logging.exception(
799 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400800 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000801 with self._lock:
802 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400803 if time_fn:
804 self._lru.time_fn = time_fn
805
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400806 @property
807 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000808 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000810 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400811
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000812 def install(self, dst, name):
813 """Creates the directory |dst| and moves a previous named cache |name| if it
814 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000816 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000818 Returns the reused named cache size in bytes, or 0 if none was present.
819
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400820 Raises NamedCacheError if cannot install the cache.
821 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000822 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 with self._lock:
824 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000825 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000827 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000829 # Remove the named symlink if it exists.
830 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000831 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000832 # Remove the symlink itself, not its destination.
833 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000834
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000835 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000836 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000838 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000839 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000840 file_path.ensure_tree(os.path.dirname(dst))
841 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400842 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000843 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400844
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000845 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400846 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400847
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000848 # The named cache does not exist, create an empty directory. When
849 # uninstalling, we will move it back to the cache and create an an
850 # entry.
851 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000852 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000853 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400854 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000855 # Raise using the original traceback.
856 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000857 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000858 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000859 finally:
860 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400861
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000862 def uninstall(self, src, name):
863 """Moves the cache directory back into the named cache hive for an eventual
864 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000866 The opposite of install().
867
868 src must be absolute and unicode. Its content is moved back into the local
869 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400870
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000871 Returns the named cache size in bytes.
872
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400873 Raises NamedCacheError if cannot uninstall the cache.
874 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000875 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 with self._lock:
877 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000878 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000880 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000881 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400883
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000884 if name in self._lru:
885 # This shouldn't happen but just remove the preexisting one and move
886 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000887 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000888 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000889
890 # Calculate the size of the named cache to keep. It's important because
891 # if size is zero (it's empty), we do not want to add it back to the
892 # named caches cache.
893 size = _get_recursive_size(src)
894 logging.info('- Size is %d', size)
895 if not size:
896 # Do not save empty named cache.
897 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400898
899 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000902 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400903 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000904 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000906 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000907 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000908
909 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
910 # for user convenience.
911 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000912 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000913 file_path.remove(named_path)
914 else:
915 file_path.ensure_tree(os.path.dirname(named_path))
916
917 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000918 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000919 logging.info(
920 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000921 except OSError:
922 # Ignore on Windows. It happens when running as a normal user or when
923 # UAC is enabled and the user is a filtered administrator account.
924 if sys.platform != 'win32':
925 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000926 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400927 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000928 # Raise using the original traceback.
929 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000930 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000931 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000932 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000933 # Call save() at every uninstall. The assumptions are:
934 # - The total the number of named caches is low, so the state.json file
935 # is small, so the time it takes to write it to disk is short.
936 # - The number of mapped named caches per task is low, so the number of
937 # times save() is called on tear-down isn't high enough to be
938 # significant.
939 # - uninstall() sometimes throws due to file locking on Windows or
940 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000941 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400942
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000943 # Cache interface implementation.
944
945 def __len__(self):
946 with self._lock:
947 return len(self._lru)
948
949 def __iter__(self):
950 # This is not thread-safe.
951 return self._lru.__iter__()
952
953 def __contains__(self, digest):
954 with self._lock:
955 return digest in self._lru
956
957 @property
958 def total_size(self):
959 with self._lock:
960 return sum(size for _rel_path, size in self._lru.itervalues())
961
962 def get_oldest(self):
963 with self._lock:
964 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000965 # (key, (value, ts))
966 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000967 except KeyError:
968 return None
969
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000970 def remove_oldest(self):
971 with self._lock:
972 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000973 _name, size = self._remove_lru_item()
974 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000975
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000976 def save(self):
977 with self._lock:
978 return self._save()
979
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400980 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000981 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400982 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000983 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000984 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400985
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400986 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000987 if self._policies.max_items:
988 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000989 name, size = self._remove_lru_item()
990 evicted.append(size)
991 logging.info(
992 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
993 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400994
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400995 # Trim according to maximum age.
996 if self._policies.max_age_secs:
997 cutoff = self._lru.time_fn() - self._policies.max_age_secs
998 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000999 _name, (_data, ts) = self._lru.get_oldest()
1000 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001001 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001002 name, size = self._remove_lru_item()
1003 evicted.append(size)
1004 logging.info(
1005 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1006 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001007
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001008 # Trim according to minimum free space.
1009 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001010 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001011 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001012 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001013 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001014 name, size = self._remove_lru_item()
1015 evicted.append(size)
1016 logging.info(
1017 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1018 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001019
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001020 # Trim according to maximum total size.
1021 if self._policies.max_cache_size:
1022 while self._lru:
1023 total = sum(size for _rel_cache, size in self._lru.itervalues())
1024 if total <= self._policies.max_cache_size:
1025 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001026 name, size = self._remove_lru_item()
1027 evicted.append(size)
1028 logging.info(
1029 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1030 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001031
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001032 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001033 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001034
1035 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001036 """Removes unknown directories.
1037
1038 Does not recalculate the cache size since it's surprisingly slow on some
1039 OSes.
1040 """
1041 success = True
1042 with self._lock:
1043 try:
1044 actual = set(fs.listdir(self.cache_dir))
1045 actual.discard(self.NAMED_DIR)
1046 actual.discard(self.STATE_FILE)
1047 expected = {v[0]: k for k, v in self._lru.iteritems()}
1048 # First, handle the actual cache content.
1049 # Remove missing entries.
1050 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001051 name, size = self._lru.pop(expected[missing])
1052 logging.warning(
1053 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001054 # Remove unexpected items.
1055 for unexpected in (actual - set(expected)):
1056 try:
1057 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001058 logging.warning(
1059 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001060 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001061 file_path.rmtree(p)
1062 else:
1063 fs.remove(p)
1064 except (IOError, OSError) as e:
1065 logging.error('Failed to remove %s: %s', unexpected, e)
1066 success = False
1067
1068 # Second, fix named cache links.
1069 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001070 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001071 actual = set(fs.listdir(named))
1072 expected = set(self._lru)
1073 # Confirm entries. Do not add missing ones for now.
1074 for name in expected.intersection(actual):
1075 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001076 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001077 if fs.islink(p):
1078 link = fs.readlink(p)
1079 if expected_link == link:
1080 continue
1081 logging.warning(
1082 'Unexpected symlink for cache %s: %s, expected %s',
1083 name, link, expected_link)
1084 else:
1085 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001086 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001087 file_path.rmtree(p)
1088 else:
1089 fs.remove(p)
1090 # Remove unexpected items.
1091 for unexpected in (actual - expected):
1092 try:
1093 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1094 if fs.isdir(p):
1095 file_path.rmtree(p)
1096 else:
1097 fs.remove(p)
1098 except (IOError, OSError) as e:
1099 logging.error('Failed to remove %s: %s', unexpected, e)
1100 success = False
1101 finally:
1102 self._save()
1103 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001104
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001105 # Internal functions.
1106
1107 def _try_upgrade(self):
1108 """Upgrades from the old format to the new one if necessary.
1109
1110 This code can be removed so all bots are known to have the right new format.
1111 """
1112 if not self._lru:
1113 return
1114 _name, (data, _ts) = self._lru.get_oldest()
1115 if isinstance(data, (list, tuple)):
1116 return
1117 # Update to v2.
1118 def upgrade(_name, rel_cache):
1119 abs_cache = os.path.join(self.cache_dir, rel_cache)
1120 return rel_cache, _get_recursive_size(abs_cache)
1121 self._lru.transform(upgrade)
1122 self._save()
1123
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001124 def _remove_lru_item(self):
1125 """Removes the oldest LRU entry. LRU must not be empty."""
1126 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1127 logging.info('Removing named cache %r', name)
1128 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001129 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001130
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001131 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001132 """Creates and returns relative path of a new cache directory.
1133
1134 In practice, it is a 2-letter string.
1135 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001136 # We randomly generate directory names that have two lower/upper case
1137 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1138 abc_len = len(self._DIR_ALPHABET)
1139 tried = set()
1140 while len(tried) < 1000:
1141 i = random.randint(0, abc_len * abc_len - 1)
1142 rel_path = (
1143 self._DIR_ALPHABET[i / abc_len] +
1144 self._DIR_ALPHABET[i % abc_len])
1145 if rel_path in tried:
1146 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001147 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001148 if not fs.exists(abs_path):
1149 return rel_path
1150 tried.add(rel_path)
1151 raise NamedCacheError(
1152 'could not allocate a new cache dir, too many cache dirs')
1153
1154 def _remove(self, name):
1155 """Removes a cache directory and entry.
1156
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001157 Returns:
1158 Number of caches deleted.
1159 """
1160 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001161 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001162 named_dir = self._get_named_path(name)
1163 if fs.islink(named_dir):
1164 fs.unlink(named_dir)
1165
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001166 # Then remove the actual data.
1167 if name not in self._lru:
1168 return
1169 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001170 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001171 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001172 file_path.rmtree(abs_path)
1173 self._lru.pop(name)
1174
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001175 def _save(self):
1176 self._lock.assert_locked()
1177 file_path.ensure_tree(self.cache_dir)
1178 self._lru.save(self.state_file)
1179
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001180 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001181 return os.path.join(self.cache_dir, self.NAMED_DIR, name)