blob: 9f3de70d3b433da0aa7dd67c7070a15bd11e44e1 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
26import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040027
28# The file size to be used when we don't know the correct file size,
29# generally used for .isolated files.
30UNKNOWN_FILE_SIZE = None
31
32
33def file_write(path, content_generator):
34 """Writes file content as generated by content_generator.
35
36 Creates the intermediary directory as needed.
37
38 Returns the number of bytes written.
39
40 Meant to be mocked out in unit tests.
41 """
42 file_path.ensure_tree(os.path.dirname(path))
43 total = 0
44 with fs.open(path, 'wb') as f:
45 for d in content_generator:
46 total += len(d)
47 f.write(d)
48 return total
49
50
51def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000052 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040053
54 Currently it just checks the file exists and its size matches the expectation.
55 """
56 if size == UNKNOWN_FILE_SIZE:
57 return fs.isfile(path)
58 try:
59 actual_size = fs.stat(path).st_size
60 except OSError as e:
61 logging.warning(
62 'Can\'t read item %s, assuming it\'s invalid: %s',
63 os.path.basename(path), e)
64 return False
65 if size != actual_size:
66 logging.warning(
67 'Found invalid item %s; %d != %d',
68 os.path.basename(path), actual_size, size)
69 return False
70 return True
71
72
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000073def trim_caches(caches, path, min_free_space, max_age_secs):
74 """Trims multiple caches.
75
76 The goal here is to coherently trim all caches in a coherent LRU fashion,
77 deleting older items independent of which container they belong to.
78
79 Two policies are enforced first:
80 - max_age_secs
81 - min_free_space
82
83 Once that's done, then we enforce each cache's own policies.
84
85 Returns:
86 Slice containing the size of all items evicted.
87 """
88 min_ts = time.time() - max_age_secs if max_age_secs else 0
89 free_disk = file_path.get_free_space(path) if min_free_space else 0
90 total = []
91 if min_ts or free_disk:
92 while True:
93 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
94 if not oldest:
95 break
Lei Leife202df2019-06-11 17:33:34 +000096 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000097 c, ts = oldest[0]
98 if ts >= min_ts and free_disk >= min_free_space:
99 break
100 total.append(c.remove_oldest())
101 if min_free_space:
102 free_disk = file_path.get_free_space(path)
103 # Evaluate each cache's own policies.
104 for c in caches:
105 total.extend(c.trim())
106 return total
107
108
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000109def _get_recursive_size(path):
110 """Returns the total data size for the specified path.
111
112 This function can be surprisingly slow on OSX, so its output should be cached.
113 """
114 try:
115 total = 0
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000116 for root, _, files in fs.walk(path):
117 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000118 st = fs.lstat(os.path.join(root, f))
119 if stat.S_ISLNK(st.st_mode):
120 continue
121 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000122 return total
123 except (IOError, OSError, UnicodeEncodeError) as exc:
124 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
125 return None
126
127
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400128class NamedCacheError(Exception):
129 """Named cache specific error."""
130
131
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400132class NoMoreSpace(Exception):
133 """Not enough space to map the whole directory."""
134 pass
135
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400136
137class CachePolicies(object):
138 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
139 """Common caching policies for the multiple caches (isolated, named, cipd).
140
141 Arguments:
142 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
143 cache is effectively a leak.
144 - min_free_space: Trim if disk free space becomes lower than this value. If
145 0, it will unconditionally fill the disk.
146 - max_items: Maximum number of items to keep in the cache. If 0, do not
147 enforce a limit.
148 - max_age_secs: Maximum age an item is kept in the cache until it is
149 automatically evicted. Having a lot of dead luggage slows
150 everything down.
151 """
152 self.max_cache_size = max_cache_size
153 self.min_free_space = min_free_space
154 self.max_items = max_items
155 self.max_age_secs = max_age_secs
156
157 def __str__(self):
158 return (
159 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
160 'max_age_secs=%s)') % (
161 self.max_cache_size, self.max_items, self.min_free_space,
162 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400163
164
165class CacheMiss(Exception):
166 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400167 def __init__(self, digest):
168 self.digest = digest
169 super(CacheMiss, self).__init__(
170 'Item with digest %r is not found in cache' % digest)
171
172
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400173class Cache(object):
174 def __init__(self, cache_dir):
175 if cache_dir is not None:
176 assert isinstance(cache_dir, unicode), cache_dir
177 assert file_path.isabs(cache_dir), cache_dir
178 self.cache_dir = cache_dir
179 self._lock = threading_utils.LockWithAssert()
180 # Profiling values.
181 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400182 self._used = []
183
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000184 def __nonzero__(self):
185 """A cache is always True.
186
187 Otherwise it falls back to __len__, which is surprising.
188 """
189 return True
190
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000191 def __len__(self):
192 """Returns the number of entries in the cache."""
193 raise NotImplementedError()
194
195 def __iter__(self):
196 """Iterates over all the entries names."""
197 raise NotImplementedError()
198
199 def __contains__(self, name):
200 """Returns if an entry is in the cache."""
201 raise NotImplementedError()
202
203 @property
204 def total_size(self):
205 """Returns the total size of the cache in bytes."""
206 raise NotImplementedError()
207
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208 @property
209 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000210 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400211 with self._lock:
212 return self._added[:]
213
214 @property
215 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000216 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400217 with self._lock:
218 return self._used[:]
219
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000220 def get_oldest(self):
221 """Returns timestamp of oldest cache entry or None.
222
223 Returns:
224 Timestamp of the oldest item.
225
226 Used for manual trimming.
227 """
228 raise NotImplementedError()
229
230 def remove_oldest(self):
231 """Removes the oldest item from the cache.
232
233 Returns:
234 Size of the oldest item.
235
236 Used for manual trimming.
237 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 raise NotImplementedError()
239
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000240 def save(self):
241 """Saves the current cache to disk."""
242 raise NotImplementedError()
243
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400244 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000245 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400246
247 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000248 Slice with the size of evicted items.
249 """
250 raise NotImplementedError()
251
252 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000253 """Deletes any corrupted item from the cache, then calls trim(), then
254 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000255
256 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400257 """
258 raise NotImplementedError()
259
260
261class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400262 """Content addressed cache that stores objects temporarily.
263
264 It can be accessed concurrently from multiple threads, so it should protect
265 its internal state with some lock.
266 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267
268 def __enter__(self):
269 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000270 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400271 return self
272
273 def __exit__(self, _exc_type, _exec_value, _traceback):
274 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000275 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400276 return False
277
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400278 def touch(self, digest, size):
279 """Ensures item is not corrupted and updates its LRU position.
280
281 Arguments:
282 digest: hash digest of item to check.
283 size: expected size of this item.
284
285 Returns:
286 True if item is in cache and not corrupted.
287 """
288 raise NotImplementedError()
289
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400290 def getfileobj(self, digest):
291 """Returns a readable file like object.
292
293 If file exists on the file system it will have a .name attribute with an
294 absolute path to the file.
295 """
296 raise NotImplementedError()
297
298 def write(self, digest, content):
299 """Reads data from |content| generator and stores it in cache.
300
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000301 It is possible to write to an object that already exists. It may be
302 ignored (sent to /dev/null) but the timestamp is still updated.
303
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400304 Returns digest to simplify chaining.
305 """
306 raise NotImplementedError()
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308
309class MemoryContentAddressedCache(ContentAddressedCache):
310 """ContentAddressedCache implementation that stores everything in memory."""
311
Lei Leife202df2019-06-11 17:33:34 +0000312 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400313 """Args:
314 file_mode_mask: bit mask to AND file mode with. Default value will make
315 all mapped files to be read only.
316 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400317 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400318 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000319 # Items in a LRU lookup dict(digest: size).
320 self._lru = lru.LRUDict()
321
322 # Cache interface implementation.
323
324 def __len__(self):
325 with self._lock:
326 return len(self._lru)
327
328 def __iter__(self):
329 # This is not thread-safe.
330 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400331
332 def __contains__(self, digest):
333 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000334 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400335
336 @property
337 def total_size(self):
338 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000339 return sum(len(i) for i in self._lru.itervalues())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400340
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000341 def get_oldest(self):
342 with self._lock:
343 try:
344 # (key, (value, ts))
345 return self._lru.get_oldest()[1][1]
346 except KeyError:
347 return None
348
349 def remove_oldest(self):
350 with self._lock:
351 # TODO(maruel): Update self._added.
352 # (key, (value, ts))
353 return len(self._lru.pop_oldest()[1][0])
354
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000355 def save(self):
356 pass
357
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000358 def trim(self):
359 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000360 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000363 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400364 pass
365
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000366 # ContentAddressedCache interface implementation.
367
368 def __contains__(self, digest):
369 with self._lock:
370 return digest in self._lru
371
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372 def touch(self, digest, size):
373 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000374 try:
375 self._lru.touch(digest)
376 except KeyError:
377 return False
378 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400379
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400380 def getfileobj(self, digest):
381 with self._lock:
382 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000383 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400384 except KeyError:
385 raise CacheMiss(digest)
386 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000387 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400388 return io.BytesIO(d)
389
390 def write(self, digest, content):
391 # Assemble whole stream before taking the lock.
392 data = ''.join(content)
393 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000394 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400395 self._added.append(len(data))
396 return digest
397
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400398
399class DiskContentAddressedCache(ContentAddressedCache):
400 """Stateful LRU cache in a flat hash table in a directory.
401
402 Saves its state as json file.
403 """
404 STATE_FILE = u'state.json'
405
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000406 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 """
408 Arguments:
409 cache_dir: directory where to place the cache.
410 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400411 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000412 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400413 """
414 # All protected methods (starting with '_') except _path should be called
415 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400416 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400417 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
419 # Items in a LRU lookup dict(digest: size).
420 self._lru = lru.LRUDict()
421 # Current cached free disk space. It is updated by self._trim().
422 file_path.ensure_tree(self.cache_dir)
423 self._free_disk = file_path.get_free_space(self.cache_dir)
424 # The first item in the LRU cache that must not be evicted during this run
425 # since it was referenced. All items more recent that _protected in the LRU
426 # cache are also inherently protected. It could be a set() of all items
427 # referenced but this increases memory usage without a use case.
428 self._protected = None
429 # Cleanup operations done by self._load(), if any.
430 self._operations = []
431 with tools.Profiler('Setup'):
432 with self._lock:
433 self._load(trim, time_fn)
434
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000435 # Cache interface implementation.
436
437 def __len__(self):
438 with self._lock:
439 return len(self._lru)
440
441 def __iter__(self):
442 # This is not thread-safe.
443 return self._lru.__iter__()
444
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445 def __contains__(self, digest):
446 with self._lock:
447 return digest in self._lru
448
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400449 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 def total_size(self):
451 with self._lock:
452 return sum(self._lru.itervalues())
453
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000454 def get_oldest(self):
455 with self._lock:
456 try:
457 # (key, (value, ts))
458 return self._lru.get_oldest()[1][1]
459 except KeyError:
460 return None
461
462 def remove_oldest(self):
463 with self._lock:
464 # TODO(maruel): Update self._added.
465 return self._remove_lru_file(True)
466
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000467 def save(self):
468 with self._lock:
469 return self._save()
470
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000471 def trim(self):
472 """Forces retention policies."""
473 with self._lock:
474 return self._trim()
475
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400476 def cleanup(self):
477 """Cleans up the cache directory.
478
479 Ensures there is no unknown files in cache_dir.
480 Ensures the read-only bits are set correctly.
481
482 At that point, the cache was already loaded, trimmed to respect cache
483 policies.
484 """
485 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000486 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400487 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000488 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400489 # It'd be faster if there were a readdir() function.
490 for filename in fs.listdir(self.cache_dir):
491 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000492 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 continue
494 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000495 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496 previous.remove(filename)
497 continue
498
499 # An untracked file. Delete it.
500 logging.warning('Removing unknown file %s from cache', filename)
501 p = self._path(filename)
502 if fs.isdir(p):
503 try:
504 file_path.rmtree(p)
505 except OSError:
506 pass
507 else:
508 file_path.try_remove(p)
509 continue
510
511 if previous:
512 # Filter out entries that were not found.
513 logging.warning('Removed %d lost files', len(previous))
514 for filename in previous:
515 self._lru.pop(filename)
516 self._save()
517
518 # What remains to be done is to hash every single item to
519 # detect corruption, then save to ensure state.json is up to date.
520 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
521 # TODO(maruel): Let's revisit once directory metadata is stored in
522 # state.json so only the files that had been mapped since the last cleanup()
523 # call are manually verified.
524 #
525 #with self._lock:
526 # for digest in self._lru:
527 # if not isolated_format.is_valid_hash(
528 # self._path(digest), self.hash_algo):
529 # self.evict(digest)
530 # logging.info('Deleted corrupted item: %s', digest)
531
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000532 # ContentAddressedCache interface implementation.
533
534 def __contains__(self, digest):
535 with self._lock:
536 return digest in self._lru
537
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400538 def touch(self, digest, size):
539 """Verifies an actual file is valid and bumps its LRU position.
540
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000541 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400542
543 Note that is doesn't compute the hash so it could still be corrupted if the
544 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400545 """
546 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000547 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400548
549 # Update its LRU position.
550 with self._lock:
551 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000552 if looks_valid:
553 # Exists but not in the LRU anymore.
554 self._delete_file(digest, size)
555 return False
556 if not looks_valid:
557 self._lru.pop(digest)
558 # Exists but not in the LRU anymore.
559 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400560 return False
561 self._lru.touch(digest)
562 self._protected = self._protected or digest
563 return True
564
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400565 def getfileobj(self, digest):
566 try:
567 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 except IOError:
569 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000570 with self._lock:
571 try:
572 self._used.append(self._lru[digest])
573 except KeyError:
574 # If the digest is not actually in _lru, assume it is a cache miss.
575 # Existing file will be overwritten by whoever uses the cache and added
576 # to _lru.
577 f.close()
578 raise CacheMiss(digest)
579 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400580
581 def write(self, digest, content):
582 assert content is not None
583 with self._lock:
584 self._protected = self._protected or digest
585 path = self._path(digest)
586 # A stale broken file may remain. It is possible for the file to have write
587 # access bit removed which would cause the file_write() call to fail to open
588 # in write mode. Take no chance here.
589 file_path.try_remove(path)
590 try:
591 size = file_write(path, content)
592 except:
593 # There are two possible places were an exception can occur:
594 # 1) Inside |content| generator in case of network or unzipping errors.
595 # 2) Inside file_write itself in case of disk IO errors.
596 # In any case delete an incomplete file and propagate the exception to
597 # caller, it will be logged there.
598 file_path.try_remove(path)
599 raise
600 # Make the file read-only in the cache. This has a few side-effects since
601 # the file node is modified, so every directory entries to this file becomes
602 # read-only. It's fine here because it is a new file.
603 file_path.set_read_only(path, True)
604 with self._lock:
605 self._add(digest, size)
606 return digest
607
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000608 # Internal functions.
609
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610 def _load(self, trim, time_fn):
611 """Loads state of the cache from json file.
612
613 If cache_dir does not exist on disk, it is created.
614 """
615 self._lock.assert_locked()
616
617 if not fs.isfile(self.state_file):
618 if not fs.isdir(self.cache_dir):
619 fs.makedirs(self.cache_dir)
620 else:
621 # Load state of the cache.
622 try:
623 self._lru = lru.LRUDict.load(self.state_file)
624 except ValueError as err:
625 logging.error('Failed to load cache state: %s' % (err,))
626 # Don't want to keep broken state file.
627 file_path.try_remove(self.state_file)
628 if time_fn:
629 self._lru.time_fn = time_fn
630 if trim:
631 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400632
633 def _save(self):
634 """Saves the LRU ordering."""
635 self._lock.assert_locked()
636 if sys.platform != 'win32':
637 d = os.path.dirname(self.state_file)
638 if fs.isdir(d):
639 # Necessary otherwise the file can't be created.
640 file_path.set_read_only(d, False)
641 if fs.isfile(self.state_file):
642 file_path.set_read_only(self.state_file, False)
643 self._lru.save(self.state_file)
644
645 def _trim(self):
646 """Trims anything we don't know, make sure enough free space exists."""
647 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000648 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400649
650 # Trim old items.
651 if self.policies.max_age_secs:
652 cutoff = self._lru.time_fn() - self.policies.max_age_secs
653 while self._lru:
654 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000655 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400656 if oldest[1][1] >= cutoff:
657 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000658 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400659
660 # Ensure maximum cache size.
661 if self.policies.max_cache_size:
662 total_size = sum(self._lru.itervalues())
663 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000664 e = self._remove_lru_file(True)
665 evicted.append(e)
666 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400667
668 # Ensure maximum number of items in the cache.
669 if self.policies.max_items and len(self._lru) > self.policies.max_items:
670 for _ in xrange(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000671 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400672
673 # Ensure enough free space.
674 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400675 while (
676 self.policies.min_free_space and
677 self._lru and
678 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 # self._free_disk is updated by this call.
680 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400681
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000682 if evicted:
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400683 total_usage = sum(self._lru.itervalues())
684 usage_percent = 0.
685 if total_usage:
686 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
687
688 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000689 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
690 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
691 '%.1fkb)',
692 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400693 self._free_disk / 1024.,
694 total_usage / 1024.,
695 usage_percent,
696 self.policies.max_cache_size / 1024.)
697 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699
700 def _path(self, digest):
701 """Returns the path to one item."""
702 return os.path.join(self.cache_dir, digest)
703
704 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000705 """Removes the lastest recently used file and returns its size.
706
707 Updates self._free_disk.
708 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709 self._lock.assert_locked()
710 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000711 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712 if not allow_protected and digest == self._protected:
713 total_size = sum(self._lru.itervalues())+size
714 msg = (
715 'Not enough space to fetch the whole isolated tree.\n'
716 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
717 self.policies, total_size, len(self._lru)+1, self._free_disk)
718 raise NoMoreSpace(msg)
719 except KeyError:
720 # That means an internal error.
721 raise NoMoreSpace('Nothing to remove, can\'t happend')
722 digest, (size, _) = self._lru.pop_oldest()
723 logging.debug('Removing LRU file %s', digest)
724 self._delete_file(digest, size)
725 return size
726
727 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
728 """Adds an item into LRU cache marking it as a newest one."""
729 self._lock.assert_locked()
730 if size == UNKNOWN_FILE_SIZE:
731 size = fs.stat(self._path(digest)).st_size
732 self._added.append(size)
733 self._lru.add(digest, size)
734 self._free_disk -= size
735 # Do a quicker version of self._trim(). It only enforces free disk space,
736 # not cache size limits. It doesn't actually look at real free disk space,
737 # only uses its cache values. self._trim() will be called later to enforce
738 # real trimming but doing this quick version here makes it possible to map
739 # an isolated that is larger than the current amount of free disk space when
740 # the cache size is already large.
741 while (
742 self.policies.min_free_space and
743 self._lru and
744 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746 if self._remove_lru_file(False) == -1:
747 break
748
749 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000750 """Deletes cache file from the file system.
751
752 Updates self._free_disk.
753 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400754 self._lock.assert_locked()
755 try:
756 if size == UNKNOWN_FILE_SIZE:
757 try:
758 size = fs.stat(self._path(digest)).st_size
759 except OSError:
760 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000761 if file_path.try_remove(self._path(digest)):
762 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400763 except OSError as e:
764 if e.errno != errno.ENOENT:
765 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400766
767
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400768class NamedCache(Cache):
769 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400770
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400771 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400772 name is a short identifier that describes the contents of the cache, e.g.
773 "git_v8" could be all git repositories required by v8 builds, or
774 "build_chromium" could be build artefacts of the Chromium.
775 path is a directory path relative to the task run dir. Cache installation
776 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400777 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400778 _DIR_ALPHABET = string.ascii_letters + string.digits
779 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000780 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400781
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400782 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400783 """Initializes NamedCaches.
784
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400785 Arguments:
786 - cache_dir is a directory for persistent cache storage.
787 - policies is a CachePolicies instance.
788 - time_fn is a function that returns timestamp (float) and used to take
789 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400790 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400791 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400792 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000793 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400794 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
795 self._lru = lru.LRUDict()
796 if not fs.isdir(self.cache_dir):
797 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000798 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000799 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400800 self._lru = lru.LRUDict.load(self.state_file)
801 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000802 logging.exception(
803 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000805 with self._lock:
806 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400807 if time_fn:
808 self._lru.time_fn = time_fn
809
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810 @property
811 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000812 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400813 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000814 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000816 def install(self, dst, name):
817 """Creates the directory |dst| and moves a previous named cache |name| if it
818 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400819
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000820 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400821
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000822 Returns the reused named cache size in bytes, or 0 if none was present.
823
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 Raises NamedCacheError if cannot install the cache.
825 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000826 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400827 with self._lock:
828 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000829 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000831 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400832
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000833 # Remove the named symlink if it exists.
834 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000835 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000836 # Remove the symlink itself, not its destination.
837 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000838
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000839 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000840 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400841 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000842 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000843 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000844 file_path.ensure_tree(os.path.dirname(dst))
845 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000847 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400848
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000849 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400851
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000852 # The named cache does not exist, create an empty directory. When
853 # uninstalling, we will move it back to the cache and create an an
854 # entry.
855 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000856 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000857 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000859 # Raise using the original traceback.
860 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000861 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000862 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000863 finally:
864 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000866 def uninstall(self, src, name):
867 """Moves the cache directory back into the named cache hive for an eventual
868 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400869
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000870 The opposite of install().
871
872 src must be absolute and unicode. Its content is moved back into the local
873 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400874
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000875 Returns the named cache size in bytes.
876
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877 Raises NamedCacheError if cannot uninstall the cache.
878 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000879 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400880 with self._lock:
881 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000882 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400883 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000884 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000885 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400886 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000888 if name in self._lru:
889 # This shouldn't happen but just remove the preexisting one and move
890 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000891 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000892 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000893
894 # Calculate the size of the named cache to keep. It's important because
895 # if size is zero (it's empty), we do not want to add it back to the
896 # named caches cache.
897 size = _get_recursive_size(src)
898 logging.info('- Size is %d', size)
899 if not size:
900 # Do not save empty named cache.
901 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400902
903 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000904 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000906 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400907 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000908 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400909
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000910 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000911 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000912
913 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
914 # for user convenience.
915 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000916 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000917 file_path.remove(named_path)
918 else:
919 file_path.ensure_tree(os.path.dirname(named_path))
920
921 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000922 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000923 logging.info(
924 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000925 except OSError:
926 # Ignore on Windows. It happens when running as a normal user or when
927 # UAC is enabled and the user is a filtered administrator account.
928 if sys.platform != 'win32':
929 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000930 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400931 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000932 # Raise using the original traceback.
933 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000934 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000935 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000936 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000937 # Call save() at every uninstall. The assumptions are:
938 # - The total the number of named caches is low, so the state.json file
939 # is small, so the time it takes to write it to disk is short.
940 # - The number of mapped named caches per task is low, so the number of
941 # times save() is called on tear-down isn't high enough to be
942 # significant.
943 # - uninstall() sometimes throws due to file locking on Windows or
944 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000945 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400946
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000947 # Cache interface implementation.
948
949 def __len__(self):
950 with self._lock:
951 return len(self._lru)
952
953 def __iter__(self):
954 # This is not thread-safe.
955 return self._lru.__iter__()
956
957 def __contains__(self, digest):
958 with self._lock:
959 return digest in self._lru
960
961 @property
962 def total_size(self):
963 with self._lock:
964 return sum(size for _rel_path, size in self._lru.itervalues())
965
966 def get_oldest(self):
967 with self._lock:
968 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000969 # (key, (value, ts))
970 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000971 except KeyError:
972 return None
973
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000974 def remove_oldest(self):
975 with self._lock:
976 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000977 _name, size = self._remove_lru_item()
978 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000979
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000980 def save(self):
981 with self._lock:
982 return self._save()
983
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400984 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000985 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400986 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000987 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000988 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400989
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400990 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000991 if self._policies.max_items:
992 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000993 name, size = self._remove_lru_item()
994 evicted.append(size)
995 logging.info(
996 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
997 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400998
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400999 # Trim according to maximum age.
1000 if self._policies.max_age_secs:
1001 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1002 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001003 _name, (_data, ts) = self._lru.get_oldest()
1004 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001005 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001006 name, size = self._remove_lru_item()
1007 evicted.append(size)
1008 logging.info(
1009 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1010 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001011
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001012 # Trim according to minimum free space.
1013 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001014 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001015 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001016 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001017 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001018 name, size = self._remove_lru_item()
1019 evicted.append(size)
1020 logging.info(
1021 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1022 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001023
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001024 # Trim according to maximum total size.
1025 if self._policies.max_cache_size:
1026 while self._lru:
1027 total = sum(size for _rel_cache, size in self._lru.itervalues())
1028 if total <= self._policies.max_cache_size:
1029 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001030 name, size = self._remove_lru_item()
1031 evicted.append(size)
1032 logging.info(
1033 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1034 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001035
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001036 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001037 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001038
1039 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001040 """Removes unknown directories.
1041
1042 Does not recalculate the cache size since it's surprisingly slow on some
1043 OSes.
1044 """
1045 success = True
1046 with self._lock:
1047 try:
1048 actual = set(fs.listdir(self.cache_dir))
1049 actual.discard(self.NAMED_DIR)
1050 actual.discard(self.STATE_FILE)
1051 expected = {v[0]: k for k, v in self._lru.iteritems()}
1052 # First, handle the actual cache content.
1053 # Remove missing entries.
1054 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001055 name, size = self._lru.pop(expected[missing])
1056 logging.warning(
1057 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001058 # Remove unexpected items.
1059 for unexpected in (actual - set(expected)):
1060 try:
1061 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001062 logging.warning(
1063 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001064 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001065 file_path.rmtree(p)
1066 else:
1067 fs.remove(p)
1068 except (IOError, OSError) as e:
1069 logging.error('Failed to remove %s: %s', unexpected, e)
1070 success = False
1071
1072 # Second, fix named cache links.
1073 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001074 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001075 actual = set(fs.listdir(named))
1076 expected = set(self._lru)
1077 # Confirm entries. Do not add missing ones for now.
1078 for name in expected.intersection(actual):
1079 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001080 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001081 if fs.islink(p):
1082 link = fs.readlink(p)
1083 if expected_link == link:
1084 continue
1085 logging.warning(
1086 'Unexpected symlink for cache %s: %s, expected %s',
1087 name, link, expected_link)
1088 else:
1089 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001090 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001091 file_path.rmtree(p)
1092 else:
1093 fs.remove(p)
1094 # Remove unexpected items.
1095 for unexpected in (actual - expected):
1096 try:
1097 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1098 if fs.isdir(p):
1099 file_path.rmtree(p)
1100 else:
1101 fs.remove(p)
1102 except (IOError, OSError) as e:
1103 logging.error('Failed to remove %s: %s', unexpected, e)
1104 success = False
1105 finally:
1106 self._save()
1107 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001108
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001109 # Internal functions.
1110
1111 def _try_upgrade(self):
1112 """Upgrades from the old format to the new one if necessary.
1113
1114 This code can be removed so all bots are known to have the right new format.
1115 """
1116 if not self._lru:
1117 return
1118 _name, (data, _ts) = self._lru.get_oldest()
1119 if isinstance(data, (list, tuple)):
1120 return
1121 # Update to v2.
1122 def upgrade(_name, rel_cache):
1123 abs_cache = os.path.join(self.cache_dir, rel_cache)
1124 return rel_cache, _get_recursive_size(abs_cache)
1125 self._lru.transform(upgrade)
1126 self._save()
1127
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001128 def _remove_lru_item(self):
1129 """Removes the oldest LRU entry. LRU must not be empty."""
1130 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1131 logging.info('Removing named cache %r', name)
1132 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001133 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001134
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001135 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001136 """Creates and returns relative path of a new cache directory.
1137
1138 In practice, it is a 2-letter string.
1139 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001140 # We randomly generate directory names that have two lower/upper case
1141 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1142 abc_len = len(self._DIR_ALPHABET)
1143 tried = set()
1144 while len(tried) < 1000:
1145 i = random.randint(0, abc_len * abc_len - 1)
1146 rel_path = (
1147 self._DIR_ALPHABET[i / abc_len] +
1148 self._DIR_ALPHABET[i % abc_len])
1149 if rel_path in tried:
1150 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001151 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001152 if not fs.exists(abs_path):
1153 return rel_path
1154 tried.add(rel_path)
1155 raise NamedCacheError(
1156 'could not allocate a new cache dir, too many cache dirs')
1157
1158 def _remove(self, name):
1159 """Removes a cache directory and entry.
1160
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001161 Returns:
1162 Number of caches deleted.
1163 """
1164 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001165 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001166 named_dir = self._get_named_path(name)
1167 if fs.islink(named_dir):
1168 fs.unlink(named_dir)
1169
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001170 # Then remove the actual data.
1171 if name not in self._lru:
1172 return
1173 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001174 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001175 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001176 file_path.rmtree(abs_path)
1177 self._lru.pop(name)
1178
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001179 def _save(self):
1180 self._lock.assert_locked()
1181 file_path.ensure_tree(self.cache_dir)
1182 self._lru.save(self.state_file)
1183
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001184 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001185 return os.path.join(self.cache_dir, self.NAMED_DIR, name)