blob: bacbab98e9d28c7abe37d6a5fe5204c4ba780b2a [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000109def _use_scandir():
110 # Use scandir in windows for faster execution.
111 # Do not use in other OS due to crbug.com/989409
112 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000113
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000114def _get_recursive_size(path):
115 """Returns the total data size for the specified path.
116
117 This function can be surprisingly slow on OSX, so its output should be cached.
118 """
119 try:
120 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000121 if _use_scandir():
122
123 if sys.platform == 'win32':
124 def direntIsJunction(entry):
125 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
126 # windows-only symbols.
127 return bool(entry.stat().st_file_attributes &
128 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
129 else:
130 def direntIsJunction(_entry):
131 return False
132
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000133 stack = [path]
134 while stack:
135 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000136 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 continue
138 if entry.is_file():
139 total += entry.stat().st_size
140 elif entry.is_dir():
141 stack.append(entry.path)
142 else:
143 logging.warning('non directory/file entry: %s', entry)
144 return total
145
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000146 for root, _, files in fs.walk(path):
147 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000148 st = fs.lstat(os.path.join(root, f))
149 if stat.S_ISLNK(st.st_mode):
150 continue
151 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000152 return total
153 except (IOError, OSError, UnicodeEncodeError) as exc:
154 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
155 return None
156
157
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400158class NamedCacheError(Exception):
159 """Named cache specific error."""
160
161
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400162class NoMoreSpace(Exception):
163 """Not enough space to map the whole directory."""
164 pass
165
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400166
167class CachePolicies(object):
168 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
169 """Common caching policies for the multiple caches (isolated, named, cipd).
170
171 Arguments:
172 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
173 cache is effectively a leak.
174 - min_free_space: Trim if disk free space becomes lower than this value. If
175 0, it will unconditionally fill the disk.
176 - max_items: Maximum number of items to keep in the cache. If 0, do not
177 enforce a limit.
178 - max_age_secs: Maximum age an item is kept in the cache until it is
179 automatically evicted. Having a lot of dead luggage slows
180 everything down.
181 """
182 self.max_cache_size = max_cache_size
183 self.min_free_space = min_free_space
184 self.max_items = max_items
185 self.max_age_secs = max_age_secs
186
187 def __str__(self):
188 return (
189 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
190 'max_age_secs=%s)') % (
191 self.max_cache_size, self.max_items, self.min_free_space,
192 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193
194
195class CacheMiss(Exception):
196 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197 def __init__(self, digest):
198 self.digest = digest
199 super(CacheMiss, self).__init__(
200 'Item with digest %r is not found in cache' % digest)
201
202
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203class Cache(object):
204 def __init__(self, cache_dir):
205 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000206 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 assert file_path.isabs(cache_dir), cache_dir
208 self.cache_dir = cache_dir
209 self._lock = threading_utils.LockWithAssert()
210 # Profiling values.
211 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 self._used = []
213
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000214 def __nonzero__(self):
215 """A cache is always True.
216
217 Otherwise it falls back to __len__, which is surprising.
218 """
219 return True
220
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000221 def __len__(self):
222 """Returns the number of entries in the cache."""
223 raise NotImplementedError()
224
225 def __iter__(self):
226 """Iterates over all the entries names."""
227 raise NotImplementedError()
228
229 def __contains__(self, name):
230 """Returns if an entry is in the cache."""
231 raise NotImplementedError()
232
233 @property
234 def total_size(self):
235 """Returns the total size of the cache in bytes."""
236 raise NotImplementedError()
237
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 @property
239 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000240 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 with self._lock:
242 return self._added[:]
243
244 @property
245 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000246 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400247 with self._lock:
248 return self._used[:]
249
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000250 def get_oldest(self):
251 """Returns timestamp of oldest cache entry or None.
252
253 Returns:
254 Timestamp of the oldest item.
255
256 Used for manual trimming.
257 """
258 raise NotImplementedError()
259
260 def remove_oldest(self):
261 """Removes the oldest item from the cache.
262
263 Returns:
264 Size of the oldest item.
265
266 Used for manual trimming.
267 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400268 raise NotImplementedError()
269
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 def save(self):
271 """Saves the current cache to disk."""
272 raise NotImplementedError()
273
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276
277 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000278 Slice with the size of evicted items.
279 """
280 raise NotImplementedError()
281
282 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 """Deletes any corrupted item from the cache, then calls trim(), then
284 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000285
286 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 """
288 raise NotImplementedError()
289
290
291class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 """Content addressed cache that stores objects temporarily.
293
294 It can be accessed concurrently from multiple threads, so it should protect
295 its internal state with some lock.
296 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298 def __enter__(self):
299 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 return self
302
303 def __exit__(self, _exc_type, _exec_value, _traceback):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return False
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 def touch(self, digest, size):
309 """Ensures item is not corrupted and updates its LRU position.
310
311 Arguments:
312 digest: hash digest of item to check.
313 size: expected size of this item.
314
315 Returns:
316 True if item is in cache and not corrupted.
317 """
318 raise NotImplementedError()
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def getfileobj(self, digest):
321 """Returns a readable file like object.
322
323 If file exists on the file system it will have a .name attribute with an
324 absolute path to the file.
325 """
326 raise NotImplementedError()
327
328 def write(self, digest, content):
329 """Reads data from |content| generator and stores it in cache.
330
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 It is possible to write to an object that already exists. It may be
332 ignored (sent to /dev/null) but the timestamp is still updated.
333
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400334 Returns digest to simplify chaining.
335 """
336 raise NotImplementedError()
337
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
339class MemoryContentAddressedCache(ContentAddressedCache):
340 """ContentAddressedCache implementation that stores everything in memory."""
341
Lei Leife202df2019-06-11 17:33:34 +0000342 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343 """Args:
344 file_mode_mask: bit mask to AND file mode with. Default value will make
345 all mapped files to be read only.
346 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400347 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 # Items in a LRU lookup dict(digest: size).
350 self._lru = lru.LRUDict()
351
352 # Cache interface implementation.
353
354 def __len__(self):
355 with self._lock:
356 return len(self._lru)
357
358 def __iter__(self):
359 # This is not thread-safe.
360 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def __contains__(self, digest):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
366 @property
367 def total_size(self):
368 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000369 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000371 def get_oldest(self):
372 with self._lock:
373 try:
374 # (key, (value, ts))
375 return self._lru.get_oldest()[1][1]
376 except KeyError:
377 return None
378
379 def remove_oldest(self):
380 with self._lock:
381 # TODO(maruel): Update self._added.
382 # (key, (value, ts))
383 return len(self._lru.pop_oldest()[1][0])
384
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000385 def save(self):
386 pass
387
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000388 def trim(self):
389 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000390 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 pass
395
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000396 # ContentAddressedCache interface implementation.
397
398 def __contains__(self, digest):
399 with self._lock:
400 return digest in self._lru
401
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 def touch(self, digest, size):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 try:
405 self._lru.touch(digest)
406 except KeyError:
407 return False
408 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 def getfileobj(self, digest):
411 with self._lock:
412 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000413 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 except KeyError:
415 raise CacheMiss(digest)
416 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 return io.BytesIO(d)
419
420 def write(self, digest, content):
421 # Assemble whole stream before taking the lock.
422 data = ''.join(content)
423 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 self._added.append(len(data))
426 return digest
427
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428
429class DiskContentAddressedCache(ContentAddressedCache):
430 """Stateful LRU cache in a flat hash table in a directory.
431
432 Saves its state as json file.
433 """
434 STATE_FILE = u'state.json'
435
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000436 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 """
438 Arguments:
439 cache_dir: directory where to place the cache.
440 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000442 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 """
444 # All protected methods (starting with '_') except _path should be called
445 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400446 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
449 # Items in a LRU lookup dict(digest: size).
450 self._lru = lru.LRUDict()
451 # Current cached free disk space. It is updated by self._trim().
452 file_path.ensure_tree(self.cache_dir)
453 self._free_disk = file_path.get_free_space(self.cache_dir)
454 # The first item in the LRU cache that must not be evicted during this run
455 # since it was referenced. All items more recent that _protected in the LRU
456 # cache are also inherently protected. It could be a set() of all items
457 # referenced but this increases memory usage without a use case.
458 self._protected = None
459 # Cleanup operations done by self._load(), if any.
460 self._operations = []
461 with tools.Profiler('Setup'):
462 with self._lock:
463 self._load(trim, time_fn)
464
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000465 # Cache interface implementation.
466
467 def __len__(self):
468 with self._lock:
469 return len(self._lru)
470
471 def __iter__(self):
472 # This is not thread-safe.
473 return self._lru.__iter__()
474
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400475 def __contains__(self, digest):
476 with self._lock:
477 return digest in self._lru
478
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def total_size(self):
481 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000482 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000484 def get_oldest(self):
485 with self._lock:
486 try:
487 # (key, (value, ts))
488 return self._lru.get_oldest()[1][1]
489 except KeyError:
490 return None
491
492 def remove_oldest(self):
493 with self._lock:
494 # TODO(maruel): Update self._added.
495 return self._remove_lru_file(True)
496
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000497 def save(self):
498 with self._lock:
499 return self._save()
500
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 def cleanup(self):
507 """Cleans up the cache directory.
508
509 Ensures there is no unknown files in cache_dir.
510 Ensures the read-only bits are set correctly.
511
512 At that point, the cache was already loaded, trimmed to respect cache
513 policies.
514 """
515 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000516 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000518 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # It'd be faster if there were a readdir() function.
520 for filename in fs.listdir(self.cache_dir):
521 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000522 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523 continue
524 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000525 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 previous.remove(filename)
527 continue
528
529 # An untracked file. Delete it.
530 logging.warning('Removing unknown file %s from cache', filename)
531 p = self._path(filename)
532 if fs.isdir(p):
533 try:
534 file_path.rmtree(p)
535 except OSError:
536 pass
537 else:
538 file_path.try_remove(p)
539 continue
540
541 if previous:
542 # Filter out entries that were not found.
543 logging.warning('Removed %d lost files', len(previous))
544 for filename in previous:
545 self._lru.pop(filename)
546 self._save()
547
548 # What remains to be done is to hash every single item to
549 # detect corruption, then save to ensure state.json is up to date.
550 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
551 # TODO(maruel): Let's revisit once directory metadata is stored in
552 # state.json so only the files that had been mapped since the last cleanup()
553 # call are manually verified.
554 #
555 #with self._lock:
556 # for digest in self._lru:
557 # if not isolated_format.is_valid_hash(
558 # self._path(digest), self.hash_algo):
559 # self.evict(digest)
560 # logging.info('Deleted corrupted item: %s', digest)
561
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000562 # ContentAddressedCache interface implementation.
563
564 def __contains__(self, digest):
565 with self._lock:
566 return digest in self._lru
567
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 def touch(self, digest, size):
569 """Verifies an actual file is valid and bumps its LRU position.
570
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000571 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572
573 Note that is doesn't compute the hash so it could still be corrupted if the
574 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 """
576 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000577 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
579 # Update its LRU position.
580 with self._lock:
581 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000582 if looks_valid:
583 # Exists but not in the LRU anymore.
584 self._delete_file(digest, size)
585 return False
586 if not looks_valid:
587 self._lru.pop(digest)
588 # Exists but not in the LRU anymore.
589 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 return False
591 self._lru.touch(digest)
592 self._protected = self._protected or digest
593 return True
594
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 def getfileobj(self, digest):
596 try:
597 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400598 except IOError:
599 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000600 with self._lock:
601 try:
602 self._used.append(self._lru[digest])
603 except KeyError:
604 # If the digest is not actually in _lru, assume it is a cache miss.
605 # Existing file will be overwritten by whoever uses the cache and added
606 # to _lru.
607 f.close()
608 raise CacheMiss(digest)
609 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610
611 def write(self, digest, content):
612 assert content is not None
613 with self._lock:
614 self._protected = self._protected or digest
615 path = self._path(digest)
616 # A stale broken file may remain. It is possible for the file to have write
617 # access bit removed which would cause the file_write() call to fail to open
618 # in write mode. Take no chance here.
619 file_path.try_remove(path)
620 try:
621 size = file_write(path, content)
622 except:
623 # There are two possible places were an exception can occur:
624 # 1) Inside |content| generator in case of network or unzipping errors.
625 # 2) Inside file_write itself in case of disk IO errors.
626 # In any case delete an incomplete file and propagate the exception to
627 # caller, it will be logged there.
628 file_path.try_remove(path)
629 raise
630 # Make the file read-only in the cache. This has a few side-effects since
631 # the file node is modified, so every directory entries to this file becomes
632 # read-only. It's fine here because it is a new file.
633 file_path.set_read_only(path, True)
634 with self._lock:
635 self._add(digest, size)
636 return digest
637
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000638 # Internal functions.
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def _load(self, trim, time_fn):
641 """Loads state of the cache from json file.
642
643 If cache_dir does not exist on disk, it is created.
644 """
645 self._lock.assert_locked()
646
647 if not fs.isfile(self.state_file):
648 if not fs.isdir(self.cache_dir):
649 fs.makedirs(self.cache_dir)
650 else:
651 # Load state of the cache.
652 try:
653 self._lru = lru.LRUDict.load(self.state_file)
654 except ValueError as err:
655 logging.error('Failed to load cache state: %s' % (err,))
656 # Don't want to keep broken state file.
657 file_path.try_remove(self.state_file)
658 if time_fn:
659 self._lru.time_fn = time_fn
660 if trim:
661 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400662
663 def _save(self):
664 """Saves the LRU ordering."""
665 self._lock.assert_locked()
666 if sys.platform != 'win32':
667 d = os.path.dirname(self.state_file)
668 if fs.isdir(d):
669 # Necessary otherwise the file can't be created.
670 file_path.set_read_only(d, False)
671 if fs.isfile(self.state_file):
672 file_path.set_read_only(self.state_file, False)
673 self._lru.save(self.state_file)
674
675 def _trim(self):
676 """Trims anything we don't know, make sure enough free space exists."""
677 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000678 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400679
680 # Trim old items.
681 if self.policies.max_age_secs:
682 cutoff = self._lru.time_fn() - self.policies.max_age_secs
683 while self._lru:
684 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000685 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400686 if oldest[1][1] >= cutoff:
687 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000688 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400689
690 # Ensure maximum cache size.
691 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000692 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400693 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 e = self._remove_lru_file(True)
695 evicted.append(e)
696 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400697
698 # Ensure maximum number of items in the cache.
699 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000700 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000701 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400702
703 # Ensure enough free space.
704 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400705 while (
706 self.policies.min_free_space and
707 self._lru and
708 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000709 # self._free_disk is updated by this call.
710 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400711
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000712 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000713 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400714 usage_percent = 0.
715 if total_usage:
716 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
717
718 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000719 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
720 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
721 '%.1fkb)',
722 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400723 self._free_disk / 1024.,
724 total_usage / 1024.,
725 usage_percent,
726 self.policies.max_cache_size / 1024.)
727 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000728 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400729
730 def _path(self, digest):
731 """Returns the path to one item."""
732 return os.path.join(self.cache_dir, digest)
733
734 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000735 """Removes the lastest recently used file and returns its size.
736
737 Updates self._free_disk.
738 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400739 self._lock.assert_locked()
740 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000741 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400742 if not allow_protected and digest == self._protected:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000743 total_size = sum(self._lru.values())+size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 msg = (
745 'Not enough space to fetch the whole isolated tree.\n'
746 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
747 self.policies, total_size, len(self._lru)+1, self._free_disk)
748 raise NoMoreSpace(msg)
749 except KeyError:
750 # That means an internal error.
751 raise NoMoreSpace('Nothing to remove, can\'t happend')
752 digest, (size, _) = self._lru.pop_oldest()
753 logging.debug('Removing LRU file %s', digest)
754 self._delete_file(digest, size)
755 return size
756
757 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
758 """Adds an item into LRU cache marking it as a newest one."""
759 self._lock.assert_locked()
760 if size == UNKNOWN_FILE_SIZE:
761 size = fs.stat(self._path(digest)).st_size
762 self._added.append(size)
763 self._lru.add(digest, size)
764 self._free_disk -= size
765 # Do a quicker version of self._trim(). It only enforces free disk space,
766 # not cache size limits. It doesn't actually look at real free disk space,
767 # only uses its cache values. self._trim() will be called later to enforce
768 # real trimming but doing this quick version here makes it possible to map
769 # an isolated that is larger than the current amount of free disk space when
770 # the cache size is already large.
771 while (
772 self.policies.min_free_space and
773 self._lru and
774 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000775 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400776 if self._remove_lru_file(False) == -1:
777 break
778
779 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000780 """Deletes cache file from the file system.
781
782 Updates self._free_disk.
783 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400784 self._lock.assert_locked()
785 try:
786 if size == UNKNOWN_FILE_SIZE:
787 try:
788 size = fs.stat(self._path(digest)).st_size
789 except OSError:
790 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000791 if file_path.try_remove(self._path(digest)):
792 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400793 except OSError as e:
794 if e.errno != errno.ENOENT:
795 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400796
797
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400798class NamedCache(Cache):
799 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400800
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400801 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400802 name is a short identifier that describes the contents of the cache, e.g.
803 "git_v8" could be all git repositories required by v8 builds, or
804 "build_chromium" could be build artefacts of the Chromium.
805 path is a directory path relative to the task run dir. Cache installation
806 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400807 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400808 _DIR_ALPHABET = string.ascii_letters + string.digits
809 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000810 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400811
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400812 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813 """Initializes NamedCaches.
814
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400815 Arguments:
816 - cache_dir is a directory for persistent cache storage.
817 - policies is a CachePolicies instance.
818 - time_fn is a function that returns timestamp (float) and used to take
819 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400820 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400821 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400822 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000823 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400824 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
825 self._lru = lru.LRUDict()
826 if not fs.isdir(self.cache_dir):
827 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000828 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000829 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 self._lru = lru.LRUDict.load(self.state_file)
831 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000832 logging.exception(
833 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000835 with self._lock:
836 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 if time_fn:
838 self._lru.time_fn = time_fn
839
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400840 @property
841 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000842 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000844 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400845
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000846 def install(self, dst, name):
847 """Creates the directory |dst| and moves a previous named cache |name| if it
848 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400849
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000850 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400851
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000852 Returns the reused named cache size in bytes, or 0 if none was present.
853
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400854 Raises NamedCacheError if cannot install the cache.
855 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000856 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400857 with self._lock:
858 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000859 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400860 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000861 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400862
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000863 # Remove the named symlink if it exists.
864 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000865 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000866 # Remove the symlink itself, not its destination.
867 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000868
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000869 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000870 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400871 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000872 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000873 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000874 file_path.ensure_tree(os.path.dirname(dst))
875 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000877 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400878
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000879 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400880 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400881
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000882 # The named cache does not exist, create an empty directory. When
883 # uninstalling, we will move it back to the cache and create an an
884 # entry.
885 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000886 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000887 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400888 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000889 # Raise using the original traceback.
890 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000891 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000892 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000893 finally:
894 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400895
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000896 def uninstall(self, src, name):
897 """Moves the cache directory back into the named cache hive for an eventual
898 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400899
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000900 The opposite of install().
901
902 src must be absolute and unicode. Its content is moved back into the local
903 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000905 Returns the named cache size in bytes.
906
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907 Raises NamedCacheError if cannot uninstall the cache.
908 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000909 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400910 with self._lock:
911 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000912 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400913 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000914 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000915 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400916 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400917
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000918 if name in self._lru:
919 # This shouldn't happen but just remove the preexisting one and move
920 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000921 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000922 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000923
924 # Calculate the size of the named cache to keep. It's important because
925 # if size is zero (it's empty), we do not want to add it back to the
926 # named caches cache.
927 size = _get_recursive_size(src)
928 logging.info('- Size is %d', size)
929 if not size:
930 # Do not save empty named cache.
931 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400932
933 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000934 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400935 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000936 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000938 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400939
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000940 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000941 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942
943 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
944 # for user convenience.
945 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000946 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000947 file_path.remove(named_path)
948 else:
949 file_path.ensure_tree(os.path.dirname(named_path))
950
951 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000952 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000953 logging.info(
954 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000955 except OSError:
956 # Ignore on Windows. It happens when running as a normal user or when
957 # UAC is enabled and the user is a filtered administrator account.
958 if sys.platform != 'win32':
959 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000960 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400961 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000962 # Raise using the original traceback.
963 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000964 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000965 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000966 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000967 # Call save() at every uninstall. The assumptions are:
968 # - The total the number of named caches is low, so the state.json file
969 # is small, so the time it takes to write it to disk is short.
970 # - The number of mapped named caches per task is low, so the number of
971 # times save() is called on tear-down isn't high enough to be
972 # significant.
973 # - uninstall() sometimes throws due to file locking on Windows or
974 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000975 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400976
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000977 # Cache interface implementation.
978
979 def __len__(self):
980 with self._lock:
981 return len(self._lru)
982
983 def __iter__(self):
984 # This is not thread-safe.
985 return self._lru.__iter__()
986
987 def __contains__(self, digest):
988 with self._lock:
989 return digest in self._lru
990
991 @property
992 def total_size(self):
993 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000994 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000995
996 def get_oldest(self):
997 with self._lock:
998 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000999 # (key, (value, ts))
1000 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001001 except KeyError:
1002 return None
1003
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001004 def remove_oldest(self):
1005 with self._lock:
1006 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001007 _name, size = self._remove_lru_item()
1008 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001009
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001010 def save(self):
1011 with self._lock:
1012 return self._save()
1013
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001014 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001015 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001016 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001017 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001018 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001019
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001020 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001021 if self._policies.max_items:
1022 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001023 name, size = self._remove_lru_item()
1024 evicted.append(size)
1025 logging.info(
1026 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1027 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001028
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001029 # Trim according to maximum age.
1030 if self._policies.max_age_secs:
1031 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1032 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001033 _name, (_data, ts) = self._lru.get_oldest()
1034 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001035 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001036 name, size = self._remove_lru_item()
1037 evicted.append(size)
1038 logging.info(
1039 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1040 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001041
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001042 # Trim according to minimum free space.
1043 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001044 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001045 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001046 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001047 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001048 name, size = self._remove_lru_item()
1049 evicted.append(size)
1050 logging.info(
1051 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1052 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001053
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001054 # Trim according to maximum total size.
1055 if self._policies.max_cache_size:
1056 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001057 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001058 if total <= self._policies.max_cache_size:
1059 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001060 name, size = self._remove_lru_item()
1061 evicted.append(size)
1062 logging.info(
1063 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1064 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001065
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001066 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001067 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001068
1069 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001070 """Removes unknown directories.
1071
1072 Does not recalculate the cache size since it's surprisingly slow on some
1073 OSes.
1074 """
1075 success = True
1076 with self._lock:
1077 try:
1078 actual = set(fs.listdir(self.cache_dir))
1079 actual.discard(self.NAMED_DIR)
1080 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001081 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001082 # First, handle the actual cache content.
1083 # Remove missing entries.
1084 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001085 name, size = self._lru.pop(expected[missing])
1086 logging.warning(
1087 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001088 # Remove unexpected items.
1089 for unexpected in (actual - set(expected)):
1090 try:
1091 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001092 logging.warning(
1093 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001094 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001095 file_path.rmtree(p)
1096 else:
1097 fs.remove(p)
1098 except (IOError, OSError) as e:
1099 logging.error('Failed to remove %s: %s', unexpected, e)
1100 success = False
1101
1102 # Second, fix named cache links.
1103 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001104 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001105 actual = set(fs.listdir(named))
1106 expected = set(self._lru)
1107 # Confirm entries. Do not add missing ones for now.
1108 for name in expected.intersection(actual):
1109 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001110 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001111 if fs.islink(p):
1112 link = fs.readlink(p)
1113 if expected_link == link:
1114 continue
1115 logging.warning(
1116 'Unexpected symlink for cache %s: %s, expected %s',
1117 name, link, expected_link)
1118 else:
1119 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001120 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001121 file_path.rmtree(p)
1122 else:
1123 fs.remove(p)
1124 # Remove unexpected items.
1125 for unexpected in (actual - expected):
1126 try:
1127 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1128 if fs.isdir(p):
1129 file_path.rmtree(p)
1130 else:
1131 fs.remove(p)
1132 except (IOError, OSError) as e:
1133 logging.error('Failed to remove %s: %s', unexpected, e)
1134 success = False
1135 finally:
1136 self._save()
1137 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001138
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001139 # Internal functions.
1140
1141 def _try_upgrade(self):
1142 """Upgrades from the old format to the new one if necessary.
1143
1144 This code can be removed so all bots are known to have the right new format.
1145 """
1146 if not self._lru:
1147 return
1148 _name, (data, _ts) = self._lru.get_oldest()
1149 if isinstance(data, (list, tuple)):
1150 return
1151 # Update to v2.
1152 def upgrade(_name, rel_cache):
1153 abs_cache = os.path.join(self.cache_dir, rel_cache)
1154 return rel_cache, _get_recursive_size(abs_cache)
1155 self._lru.transform(upgrade)
1156 self._save()
1157
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001158 def _remove_lru_item(self):
1159 """Removes the oldest LRU entry. LRU must not be empty."""
1160 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1161 logging.info('Removing named cache %r', name)
1162 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001163 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001164
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001165 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001166 """Creates and returns relative path of a new cache directory.
1167
1168 In practice, it is a 2-letter string.
1169 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001170 # We randomly generate directory names that have two lower/upper case
1171 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1172 abc_len = len(self._DIR_ALPHABET)
1173 tried = set()
1174 while len(tried) < 1000:
1175 i = random.randint(0, abc_len * abc_len - 1)
1176 rel_path = (
1177 self._DIR_ALPHABET[i / abc_len] +
1178 self._DIR_ALPHABET[i % abc_len])
1179 if rel_path in tried:
1180 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001181 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001182 if not fs.exists(abs_path):
1183 return rel_path
1184 tried.add(rel_path)
1185 raise NamedCacheError(
1186 'could not allocate a new cache dir, too many cache dirs')
1187
1188 def _remove(self, name):
1189 """Removes a cache directory and entry.
1190
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001191 Returns:
1192 Number of caches deleted.
1193 """
1194 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001195 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001196 named_dir = self._get_named_path(name)
1197 if fs.islink(named_dir):
1198 fs.unlink(named_dir)
1199
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001200 # Then remove the actual data.
1201 if name not in self._lru:
1202 return
1203 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001204 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001205 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001206 file_path.rmtree(abs_path)
1207 self._lru.pop(name)
1208
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001209 def _save(self):
1210 self._lock.assert_locked()
1211 file_path.ensure_tree(self.cache_dir)
1212 self._lru.save(self.state_file)
1213
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001214 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001215 return os.path.join(self.cache_dir, self.NAMED_DIR, name)