blob: 74e7ed3b0e007e5bc7b1eddcc24501ee18852471 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000109def _use_scandir():
110 # Use scandir in windows for faster execution.
111 # Do not use in other OS due to crbug.com/989409
112 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000113
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000114def _get_recursive_size(path):
115 """Returns the total data size for the specified path.
116
117 This function can be surprisingly slow on OSX, so its output should be cached.
118 """
119 try:
120 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000121 if _use_scandir():
122
123 if sys.platform == 'win32':
124 def direntIsJunction(entry):
125 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
126 # windows-only symbols.
127 return bool(entry.stat().st_file_attributes &
128 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
129 else:
130 def direntIsJunction(_entry):
131 return False
132
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000133 stack = [path]
134 while stack:
135 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000136 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 continue
138 if entry.is_file():
139 total += entry.stat().st_size
140 elif entry.is_dir():
141 stack.append(entry.path)
142 else:
143 logging.warning('non directory/file entry: %s', entry)
144 return total
145
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000146 for root, _, files in fs.walk(path):
147 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000148 st = fs.lstat(os.path.join(root, f))
149 if stat.S_ISLNK(st.st_mode):
150 continue
151 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000152 return total
153 except (IOError, OSError, UnicodeEncodeError) as exc:
154 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
155 return None
156
157
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400158class NamedCacheError(Exception):
159 """Named cache specific error."""
160
161
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400162class NoMoreSpace(Exception):
163 """Not enough space to map the whole directory."""
164 pass
165
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400166
167class CachePolicies(object):
168 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
169 """Common caching policies for the multiple caches (isolated, named, cipd).
170
171 Arguments:
172 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
173 cache is effectively a leak.
174 - min_free_space: Trim if disk free space becomes lower than this value. If
175 0, it will unconditionally fill the disk.
176 - max_items: Maximum number of items to keep in the cache. If 0, do not
177 enforce a limit.
178 - max_age_secs: Maximum age an item is kept in the cache until it is
179 automatically evicted. Having a lot of dead luggage slows
180 everything down.
181 """
182 self.max_cache_size = max_cache_size
183 self.min_free_space = min_free_space
184 self.max_items = max_items
185 self.max_age_secs = max_age_secs
186
187 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000188 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
189 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
190 self.max_cache_size, float(self.max_cache_size) / 1024**3,
191 self.max_items, self.min_free_space,
192 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193
194
195class CacheMiss(Exception):
196 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197 def __init__(self, digest):
198 self.digest = digest
199 super(CacheMiss, self).__init__(
200 'Item with digest %r is not found in cache' % digest)
201
202
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203class Cache(object):
204 def __init__(self, cache_dir):
205 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000206 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 assert file_path.isabs(cache_dir), cache_dir
208 self.cache_dir = cache_dir
209 self._lock = threading_utils.LockWithAssert()
210 # Profiling values.
211 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 self._used = []
213
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000214 def __nonzero__(self):
215 """A cache is always True.
216
217 Otherwise it falls back to __len__, which is surprising.
218 """
219 return True
220
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000221 def __len__(self):
222 """Returns the number of entries in the cache."""
223 raise NotImplementedError()
224
225 def __iter__(self):
226 """Iterates over all the entries names."""
227 raise NotImplementedError()
228
229 def __contains__(self, name):
230 """Returns if an entry is in the cache."""
231 raise NotImplementedError()
232
233 @property
234 def total_size(self):
235 """Returns the total size of the cache in bytes."""
236 raise NotImplementedError()
237
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 @property
239 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000240 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 with self._lock:
242 return self._added[:]
243
244 @property
245 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000246 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400247 with self._lock:
248 return self._used[:]
249
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000250 def get_oldest(self):
251 """Returns timestamp of oldest cache entry or None.
252
253 Returns:
254 Timestamp of the oldest item.
255
256 Used for manual trimming.
257 """
258 raise NotImplementedError()
259
260 def remove_oldest(self):
261 """Removes the oldest item from the cache.
262
263 Returns:
264 Size of the oldest item.
265
266 Used for manual trimming.
267 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400268 raise NotImplementedError()
269
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 def save(self):
271 """Saves the current cache to disk."""
272 raise NotImplementedError()
273
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276
277 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000278 Slice with the size of evicted items.
279 """
280 raise NotImplementedError()
281
282 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 """Deletes any corrupted item from the cache, then calls trim(), then
284 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000285
286 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 """
288 raise NotImplementedError()
289
290
291class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 """Content addressed cache that stores objects temporarily.
293
294 It can be accessed concurrently from multiple threads, so it should protect
295 its internal state with some lock.
296 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298 def __enter__(self):
299 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 return self
302
303 def __exit__(self, _exc_type, _exec_value, _traceback):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return False
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 def touch(self, digest, size):
309 """Ensures item is not corrupted and updates its LRU position.
310
311 Arguments:
312 digest: hash digest of item to check.
313 size: expected size of this item.
314
315 Returns:
316 True if item is in cache and not corrupted.
317 """
318 raise NotImplementedError()
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def getfileobj(self, digest):
321 """Returns a readable file like object.
322
323 If file exists on the file system it will have a .name attribute with an
324 absolute path to the file.
325 """
326 raise NotImplementedError()
327
328 def write(self, digest, content):
329 """Reads data from |content| generator and stores it in cache.
330
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 It is possible to write to an object that already exists. It may be
332 ignored (sent to /dev/null) but the timestamp is still updated.
333
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400334 Returns digest to simplify chaining.
335 """
336 raise NotImplementedError()
337
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
339class MemoryContentAddressedCache(ContentAddressedCache):
340 """ContentAddressedCache implementation that stores everything in memory."""
341
Lei Leife202df2019-06-11 17:33:34 +0000342 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343 """Args:
344 file_mode_mask: bit mask to AND file mode with. Default value will make
345 all mapped files to be read only.
346 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400347 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 # Items in a LRU lookup dict(digest: size).
350 self._lru = lru.LRUDict()
351
352 # Cache interface implementation.
353
354 def __len__(self):
355 with self._lock:
356 return len(self._lru)
357
358 def __iter__(self):
359 # This is not thread-safe.
360 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def __contains__(self, digest):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
366 @property
367 def total_size(self):
368 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000369 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000371 def get_oldest(self):
372 with self._lock:
373 try:
374 # (key, (value, ts))
375 return self._lru.get_oldest()[1][1]
376 except KeyError:
377 return None
378
379 def remove_oldest(self):
380 with self._lock:
381 # TODO(maruel): Update self._added.
382 # (key, (value, ts))
383 return len(self._lru.pop_oldest()[1][0])
384
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000385 def save(self):
386 pass
387
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000388 def trim(self):
389 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000390 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 pass
395
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000396 # ContentAddressedCache interface implementation.
397
398 def __contains__(self, digest):
399 with self._lock:
400 return digest in self._lru
401
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 def touch(self, digest, size):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 try:
405 self._lru.touch(digest)
406 except KeyError:
407 return False
408 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 def getfileobj(self, digest):
411 with self._lock:
412 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000413 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 except KeyError:
415 raise CacheMiss(digest)
416 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 return io.BytesIO(d)
419
420 def write(self, digest, content):
421 # Assemble whole stream before taking the lock.
422 data = ''.join(content)
423 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 self._added.append(len(data))
426 return digest
427
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428
429class DiskContentAddressedCache(ContentAddressedCache):
430 """Stateful LRU cache in a flat hash table in a directory.
431
432 Saves its state as json file.
433 """
434 STATE_FILE = u'state.json'
435
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000436 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 """
438 Arguments:
439 cache_dir: directory where to place the cache.
440 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000442 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 """
444 # All protected methods (starting with '_') except _path should be called
445 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400446 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
449 # Items in a LRU lookup dict(digest: size).
450 self._lru = lru.LRUDict()
451 # Current cached free disk space. It is updated by self._trim().
452 file_path.ensure_tree(self.cache_dir)
453 self._free_disk = file_path.get_free_space(self.cache_dir)
454 # The first item in the LRU cache that must not be evicted during this run
455 # since it was referenced. All items more recent that _protected in the LRU
456 # cache are also inherently protected. It could be a set() of all items
457 # referenced but this increases memory usage without a use case.
458 self._protected = None
459 # Cleanup operations done by self._load(), if any.
460 self._operations = []
461 with tools.Profiler('Setup'):
462 with self._lock:
463 self._load(trim, time_fn)
464
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000465 # Cache interface implementation.
466
467 def __len__(self):
468 with self._lock:
469 return len(self._lru)
470
471 def __iter__(self):
472 # This is not thread-safe.
473 return self._lru.__iter__()
474
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400475 def __contains__(self, digest):
476 with self._lock:
477 return digest in self._lru
478
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def total_size(self):
481 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000482 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000484 def get_oldest(self):
485 with self._lock:
486 try:
487 # (key, (value, ts))
488 return self._lru.get_oldest()[1][1]
489 except KeyError:
490 return None
491
492 def remove_oldest(self):
493 with self._lock:
494 # TODO(maruel): Update self._added.
495 return self._remove_lru_file(True)
496
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000497 def save(self):
498 with self._lock:
499 return self._save()
500
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 def cleanup(self):
507 """Cleans up the cache directory.
508
509 Ensures there is no unknown files in cache_dir.
510 Ensures the read-only bits are set correctly.
511
512 At that point, the cache was already loaded, trimmed to respect cache
513 policies.
514 """
515 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000516 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000518 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # It'd be faster if there were a readdir() function.
520 for filename in fs.listdir(self.cache_dir):
521 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000522 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523 continue
524 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000525 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 previous.remove(filename)
527 continue
528
529 # An untracked file. Delete it.
530 logging.warning('Removing unknown file %s from cache', filename)
531 p = self._path(filename)
532 if fs.isdir(p):
533 try:
534 file_path.rmtree(p)
535 except OSError:
536 pass
537 else:
538 file_path.try_remove(p)
539 continue
540
541 if previous:
542 # Filter out entries that were not found.
543 logging.warning('Removed %d lost files', len(previous))
544 for filename in previous:
545 self._lru.pop(filename)
546 self._save()
547
548 # What remains to be done is to hash every single item to
549 # detect corruption, then save to ensure state.json is up to date.
550 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
551 # TODO(maruel): Let's revisit once directory metadata is stored in
552 # state.json so only the files that had been mapped since the last cleanup()
553 # call are manually verified.
554 #
555 #with self._lock:
556 # for digest in self._lru:
557 # if not isolated_format.is_valid_hash(
558 # self._path(digest), self.hash_algo):
559 # self.evict(digest)
560 # logging.info('Deleted corrupted item: %s', digest)
561
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000562 # ContentAddressedCache interface implementation.
563
564 def __contains__(self, digest):
565 with self._lock:
566 return digest in self._lru
567
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 def touch(self, digest, size):
569 """Verifies an actual file is valid and bumps its LRU position.
570
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000571 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572
573 Note that is doesn't compute the hash so it could still be corrupted if the
574 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 """
576 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000577 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
579 # Update its LRU position.
580 with self._lock:
581 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000582 if looks_valid:
583 # Exists but not in the LRU anymore.
584 self._delete_file(digest, size)
585 return False
586 if not looks_valid:
587 self._lru.pop(digest)
588 # Exists but not in the LRU anymore.
589 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 return False
591 self._lru.touch(digest)
592 self._protected = self._protected or digest
593 return True
594
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 def getfileobj(self, digest):
596 try:
597 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400598 except IOError:
599 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000600 with self._lock:
601 try:
602 self._used.append(self._lru[digest])
603 except KeyError:
604 # If the digest is not actually in _lru, assume it is a cache miss.
605 # Existing file will be overwritten by whoever uses the cache and added
606 # to _lru.
607 f.close()
608 raise CacheMiss(digest)
609 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610
611 def write(self, digest, content):
612 assert content is not None
613 with self._lock:
614 self._protected = self._protected or digest
615 path = self._path(digest)
616 # A stale broken file may remain. It is possible for the file to have write
617 # access bit removed which would cause the file_write() call to fail to open
618 # in write mode. Take no chance here.
619 file_path.try_remove(path)
620 try:
621 size = file_write(path, content)
622 except:
623 # There are two possible places were an exception can occur:
624 # 1) Inside |content| generator in case of network or unzipping errors.
625 # 2) Inside file_write itself in case of disk IO errors.
626 # In any case delete an incomplete file and propagate the exception to
627 # caller, it will be logged there.
628 file_path.try_remove(path)
629 raise
630 # Make the file read-only in the cache. This has a few side-effects since
631 # the file node is modified, so every directory entries to this file becomes
632 # read-only. It's fine here because it is a new file.
633 file_path.set_read_only(path, True)
634 with self._lock:
635 self._add(digest, size)
636 return digest
637
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000638 # Internal functions.
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def _load(self, trim, time_fn):
641 """Loads state of the cache from json file.
642
643 If cache_dir does not exist on disk, it is created.
644 """
645 self._lock.assert_locked()
646
647 if not fs.isfile(self.state_file):
648 if not fs.isdir(self.cache_dir):
649 fs.makedirs(self.cache_dir)
650 else:
651 # Load state of the cache.
652 try:
653 self._lru = lru.LRUDict.load(self.state_file)
654 except ValueError as err:
655 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000656 # Don't want to keep broken cache dir.
657 file_path.rmtree(self.cache_dir)
658 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400659 if time_fn:
660 self._lru.time_fn = time_fn
661 if trim:
662 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400663
664 def _save(self):
665 """Saves the LRU ordering."""
666 self._lock.assert_locked()
667 if sys.platform != 'win32':
668 d = os.path.dirname(self.state_file)
669 if fs.isdir(d):
670 # Necessary otherwise the file can't be created.
671 file_path.set_read_only(d, False)
672 if fs.isfile(self.state_file):
673 file_path.set_read_only(self.state_file, False)
674 self._lru.save(self.state_file)
675
676 def _trim(self):
677 """Trims anything we don't know, make sure enough free space exists."""
678 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680
681 # Trim old items.
682 if self.policies.max_age_secs:
683 cutoff = self._lru.time_fn() - self.policies.max_age_secs
684 while self._lru:
685 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400687 if oldest[1][1] >= cutoff:
688 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000689 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690
691 # Ensure maximum cache size.
692 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000693 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000695 e = self._remove_lru_file(True)
696 evicted.append(e)
697 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
699 # Ensure maximum number of items in the cache.
700 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000701 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000702 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703
704 # Ensure enough free space.
705 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 while (
707 self.policies.min_free_space and
708 self._lru and
709 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000710 # self._free_disk is updated by this call.
711 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000714 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400715 usage_percent = 0.
716 if total_usage:
717 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
718
719 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000720 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
721 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
722 '%.1fkb)',
723 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724 self._free_disk / 1024.,
725 total_usage / 1024.,
726 usage_percent,
727 self.policies.max_cache_size / 1024.)
728 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400730
731 def _path(self, digest):
732 """Returns the path to one item."""
733 return os.path.join(self.cache_dir, digest)
734
735 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000736 """Removes the lastest recently used file and returns its size.
737
738 Updates self._free_disk.
739 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400740 self._lock.assert_locked()
741 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000742 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400743 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000744 total_size = sum(self._lru.values())
745 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000746 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
747 '%s bytes (%.3f GiB) free_space') % (
748 self.policies, total_size, float(total_size) / 1024**3,
749 len(self._lru), self._free_disk,
750 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400751 raise NoMoreSpace(msg)
752 except KeyError:
753 # That means an internal error.
754 raise NoMoreSpace('Nothing to remove, can\'t happend')
755 digest, (size, _) = self._lru.pop_oldest()
756 logging.debug('Removing LRU file %s', digest)
757 self._delete_file(digest, size)
758 return size
759
760 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
761 """Adds an item into LRU cache marking it as a newest one."""
762 self._lock.assert_locked()
763 if size == UNKNOWN_FILE_SIZE:
764 size = fs.stat(self._path(digest)).st_size
765 self._added.append(size)
766 self._lru.add(digest, size)
767 self._free_disk -= size
768 # Do a quicker version of self._trim(). It only enforces free disk space,
769 # not cache size limits. It doesn't actually look at real free disk space,
770 # only uses its cache values. self._trim() will be called later to enforce
771 # real trimming but doing this quick version here makes it possible to map
772 # an isolated that is larger than the current amount of free disk space when
773 # the cache size is already large.
774 while (
775 self.policies.min_free_space and
776 self._lru and
777 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000778 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400779 if self._remove_lru_file(False) == -1:
780 break
781
782 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000783 """Deletes cache file from the file system.
784
785 Updates self._free_disk.
786 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400787 self._lock.assert_locked()
788 try:
789 if size == UNKNOWN_FILE_SIZE:
790 try:
791 size = fs.stat(self._path(digest)).st_size
792 except OSError:
793 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000794 if file_path.try_remove(self._path(digest)):
795 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400796 except OSError as e:
797 if e.errno != errno.ENOENT:
798 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400799
800
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400801class NamedCache(Cache):
802 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805 name is a short identifier that describes the contents of the cache, e.g.
806 "git_v8" could be all git repositories required by v8 builds, or
807 "build_chromium" could be build artefacts of the Chromium.
808 path is a directory path relative to the task run dir. Cache installation
809 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400811 _DIR_ALPHABET = string.ascii_letters + string.digits
812 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000813 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400814
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400815 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400816 """Initializes NamedCaches.
817
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818 Arguments:
819 - cache_dir is a directory for persistent cache storage.
820 - policies is a CachePolicies instance.
821 - time_fn is a function that returns timestamp (float) and used to take
822 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400823 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400824 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000826 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400827 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
828 self._lru = lru.LRUDict()
829 if not fs.isdir(self.cache_dir):
830 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000831 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000832 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400833 self._lru = lru.LRUDict.load(self.state_file)
834 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000835 logging.exception(
836 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000838 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000839 with self._lock:
840 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400841 if time_fn:
842 self._lru.time_fn = time_fn
843
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400844 @property
845 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000846 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400847 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000848 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400849
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000850 def install(self, dst, name):
851 """Creates the directory |dst| and moves a previous named cache |name| if it
852 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400853
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000854 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000856 Returns the reused named cache size in bytes, or 0 if none was present.
857
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400858 Raises NamedCacheError if cannot install the cache.
859 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000860 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400861 with self._lock:
862 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000863 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400864 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000865 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400866
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000867 # Remove the named symlink if it exists.
868 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000869 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000870 # Remove the symlink itself, not its destination.
871 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000872
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000873 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000874 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400875 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000876 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000877 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000878 file_path.ensure_tree(os.path.dirname(dst))
879 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400880 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000881 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400882
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000883 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400884 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400885
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000886 # The named cache does not exist, create an empty directory. When
887 # uninstalling, we will move it back to the cache and create an an
888 # entry.
889 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000890 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000891 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400892 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000893 # Raise using the original traceback.
894 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000895 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000896 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000897 finally:
898 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400899
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000900 def uninstall(self, src, name):
901 """Moves the cache directory back into the named cache hive for an eventual
902 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400903
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000904 The opposite of install().
905
906 src must be absolute and unicode. Its content is moved back into the local
907 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400908
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000909 Returns the named cache size in bytes.
910
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400911 Raises NamedCacheError if cannot uninstall the cache.
912 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000913 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400914 with self._lock:
915 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000916 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400917 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000918 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000919 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400920 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400921
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000922 if name in self._lru:
923 # This shouldn't happen but just remove the preexisting one and move
924 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000925 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000926 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000927
928 # Calculate the size of the named cache to keep. It's important because
929 # if size is zero (it's empty), we do not want to add it back to the
930 # named caches cache.
931 size = _get_recursive_size(src)
932 logging.info('- Size is %d', size)
933 if not size:
934 # Do not save empty named cache.
935 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400936
937 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000938 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400939 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000940 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400941 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000942 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400943
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000944 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000945 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000946
947 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
948 # for user convenience.
949 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000950 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000951 file_path.remove(named_path)
952 else:
953 file_path.ensure_tree(os.path.dirname(named_path))
954
955 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000956 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000957 logging.info(
958 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000959 except OSError:
960 # Ignore on Windows. It happens when running as a normal user or when
961 # UAC is enabled and the user is a filtered administrator account.
962 if sys.platform != 'win32':
963 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000964 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400965 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000966 # Raise using the original traceback.
967 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000968 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000969 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000970 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000971 # Call save() at every uninstall. The assumptions are:
972 # - The total the number of named caches is low, so the state.json file
973 # is small, so the time it takes to write it to disk is short.
974 # - The number of mapped named caches per task is low, so the number of
975 # times save() is called on tear-down isn't high enough to be
976 # significant.
977 # - uninstall() sometimes throws due to file locking on Windows or
978 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000979 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400980
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000981 # Cache interface implementation.
982
983 def __len__(self):
984 with self._lock:
985 return len(self._lru)
986
987 def __iter__(self):
988 # This is not thread-safe.
989 return self._lru.__iter__()
990
991 def __contains__(self, digest):
992 with self._lock:
993 return digest in self._lru
994
995 @property
996 def total_size(self):
997 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000998 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000999
1000 def get_oldest(self):
1001 with self._lock:
1002 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001003 # (key, (value, ts))
1004 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001005 except KeyError:
1006 return None
1007
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001008 def remove_oldest(self):
1009 with self._lock:
1010 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001011 _name, size = self._remove_lru_item()
1012 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001013
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001014 def save(self):
1015 with self._lock:
1016 return self._save()
1017
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001018 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001019 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001020 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001021 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001022 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001023
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001024 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001025 if self._policies.max_items:
1026 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001027 name, size = self._remove_lru_item()
1028 evicted.append(size)
1029 logging.info(
1030 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1031 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001032
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001033 # Trim according to maximum age.
1034 if self._policies.max_age_secs:
1035 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1036 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001037 _name, (_data, ts) = self._lru.get_oldest()
1038 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001039 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001040 name, size = self._remove_lru_item()
1041 evicted.append(size)
1042 logging.info(
1043 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1044 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001045
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001046 # Trim according to minimum free space.
1047 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001048 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001049 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001050 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001051 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001052 name, size = self._remove_lru_item()
1053 evicted.append(size)
1054 logging.info(
1055 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1056 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001057
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001058 # Trim according to maximum total size.
1059 if self._policies.max_cache_size:
1060 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001061 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001062 if total <= self._policies.max_cache_size:
1063 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001064 name, size = self._remove_lru_item()
1065 evicted.append(size)
1066 logging.info(
1067 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1068 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001069
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001070 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001071 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001072
1073 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001074 """Removes unknown directories.
1075
1076 Does not recalculate the cache size since it's surprisingly slow on some
1077 OSes.
1078 """
1079 success = True
1080 with self._lock:
1081 try:
1082 actual = set(fs.listdir(self.cache_dir))
1083 actual.discard(self.NAMED_DIR)
1084 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001085 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001086 # First, handle the actual cache content.
1087 # Remove missing entries.
1088 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001089 name, size = self._lru.pop(expected[missing])
1090 logging.warning(
1091 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001092 # Remove unexpected items.
1093 for unexpected in (actual - set(expected)):
1094 try:
1095 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001096 logging.warning(
1097 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001098 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001099 file_path.rmtree(p)
1100 else:
1101 fs.remove(p)
1102 except (IOError, OSError) as e:
1103 logging.error('Failed to remove %s: %s', unexpected, e)
1104 success = False
1105
1106 # Second, fix named cache links.
1107 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001108 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001109 actual = set(fs.listdir(named))
1110 expected = set(self._lru)
1111 # Confirm entries. Do not add missing ones for now.
1112 for name in expected.intersection(actual):
1113 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001114 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001115 if fs.islink(p):
1116 link = fs.readlink(p)
1117 if expected_link == link:
1118 continue
1119 logging.warning(
1120 'Unexpected symlink for cache %s: %s, expected %s',
1121 name, link, expected_link)
1122 else:
1123 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001124 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001125 file_path.rmtree(p)
1126 else:
1127 fs.remove(p)
1128 # Remove unexpected items.
1129 for unexpected in (actual - expected):
1130 try:
1131 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1132 if fs.isdir(p):
1133 file_path.rmtree(p)
1134 else:
1135 fs.remove(p)
1136 except (IOError, OSError) as e:
1137 logging.error('Failed to remove %s: %s', unexpected, e)
1138 success = False
1139 finally:
1140 self._save()
1141 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001142
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001143 # Internal functions.
1144
1145 def _try_upgrade(self):
1146 """Upgrades from the old format to the new one if necessary.
1147
1148 This code can be removed so all bots are known to have the right new format.
1149 """
1150 if not self._lru:
1151 return
1152 _name, (data, _ts) = self._lru.get_oldest()
1153 if isinstance(data, (list, tuple)):
1154 return
1155 # Update to v2.
1156 def upgrade(_name, rel_cache):
1157 abs_cache = os.path.join(self.cache_dir, rel_cache)
1158 return rel_cache, _get_recursive_size(abs_cache)
1159 self._lru.transform(upgrade)
1160 self._save()
1161
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001162 def _remove_lru_item(self):
1163 """Removes the oldest LRU entry. LRU must not be empty."""
1164 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1165 logging.info('Removing named cache %r', name)
1166 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001167 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001168
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001169 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001170 """Creates and returns relative path of a new cache directory.
1171
1172 In practice, it is a 2-letter string.
1173 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001174 # We randomly generate directory names that have two lower/upper case
1175 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1176 abc_len = len(self._DIR_ALPHABET)
1177 tried = set()
1178 while len(tried) < 1000:
1179 i = random.randint(0, abc_len * abc_len - 1)
1180 rel_path = (
1181 self._DIR_ALPHABET[i / abc_len] +
1182 self._DIR_ALPHABET[i % abc_len])
1183 if rel_path in tried:
1184 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001185 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001186 if not fs.exists(abs_path):
1187 return rel_path
1188 tried.add(rel_path)
1189 raise NamedCacheError(
1190 'could not allocate a new cache dir, too many cache dirs')
1191
1192 def _remove(self, name):
1193 """Removes a cache directory and entry.
1194
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001195 Returns:
1196 Number of caches deleted.
1197 """
1198 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001199 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001200 named_dir = self._get_named_path(name)
1201 if fs.islink(named_dir):
1202 fs.unlink(named_dir)
1203
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001204 # Then remove the actual data.
1205 if name not in self._lru:
1206 return
1207 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001208 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001209 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001210 file_path.rmtree(abs_path)
1211 self._lru.pop(name)
1212
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001213 def _save(self):
1214 self._lock.assert_locked()
1215 file_path.ensure_tree(self.cache_dir)
1216 self._lru.save(self.state_file)
1217
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001218 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001219 return os.path.join(self.cache_dir, self.NAMED_DIR, name)