blob: fd85b47d8fa484f0c694a41ba5cb8ab5b0e38178 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000109def _use_scandir():
110 # Use scandir in windows for faster execution.
111 # Do not use in other OS due to crbug.com/989409
112 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000113
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000114def _get_recursive_size(path):
115 """Returns the total data size for the specified path.
116
117 This function can be surprisingly slow on OSX, so its output should be cached.
118 """
119 try:
120 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000121 if _use_scandir():
122
123 if sys.platform == 'win32':
124 def direntIsJunction(entry):
125 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
126 # windows-only symbols.
127 return bool(entry.stat().st_file_attributes &
128 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
129 else:
130 def direntIsJunction(_entry):
131 return False
132
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000133 stack = [path]
134 while stack:
135 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000136 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 continue
138 if entry.is_file():
139 total += entry.stat().st_size
140 elif entry.is_dir():
141 stack.append(entry.path)
142 else:
143 logging.warning('non directory/file entry: %s', entry)
144 return total
145
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000146 for root, _, files in fs.walk(path):
147 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000148 st = fs.lstat(os.path.join(root, f))
149 if stat.S_ISLNK(st.st_mode):
150 continue
151 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000152 return total
153 except (IOError, OSError, UnicodeEncodeError) as exc:
154 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
155 return None
156
157
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400158class NamedCacheError(Exception):
159 """Named cache specific error."""
160
161
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400162class NoMoreSpace(Exception):
163 """Not enough space to map the whole directory."""
164 pass
165
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400166
167class CachePolicies(object):
168 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
169 """Common caching policies for the multiple caches (isolated, named, cipd).
170
171 Arguments:
172 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
173 cache is effectively a leak.
174 - min_free_space: Trim if disk free space becomes lower than this value. If
175 0, it will unconditionally fill the disk.
176 - max_items: Maximum number of items to keep in the cache. If 0, do not
177 enforce a limit.
178 - max_age_secs: Maximum age an item is kept in the cache until it is
179 automatically evicted. Having a lot of dead luggage slows
180 everything down.
181 """
182 self.max_cache_size = max_cache_size
183 self.min_free_space = min_free_space
184 self.max_items = max_items
185 self.max_age_secs = max_age_secs
186
187 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000188 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
189 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
190 self.max_cache_size, float(self.max_cache_size) / 1024**3,
191 self.max_items, self.min_free_space,
192 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193
194
195class CacheMiss(Exception):
196 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197 def __init__(self, digest):
198 self.digest = digest
199 super(CacheMiss, self).__init__(
200 'Item with digest %r is not found in cache' % digest)
201
202
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203class Cache(object):
204 def __init__(self, cache_dir):
205 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000206 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 assert file_path.isabs(cache_dir), cache_dir
208 self.cache_dir = cache_dir
209 self._lock = threading_utils.LockWithAssert()
210 # Profiling values.
211 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 self._used = []
213
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000214 def __nonzero__(self):
215 """A cache is always True.
216
217 Otherwise it falls back to __len__, which is surprising.
218 """
219 return True
220
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000221 def __len__(self):
222 """Returns the number of entries in the cache."""
223 raise NotImplementedError()
224
225 def __iter__(self):
226 """Iterates over all the entries names."""
227 raise NotImplementedError()
228
229 def __contains__(self, name):
230 """Returns if an entry is in the cache."""
231 raise NotImplementedError()
232
233 @property
234 def total_size(self):
235 """Returns the total size of the cache in bytes."""
236 raise NotImplementedError()
237
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 @property
239 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000240 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 with self._lock:
242 return self._added[:]
243
244 @property
245 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000246 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400247 with self._lock:
248 return self._used[:]
249
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000250 def get_oldest(self):
251 """Returns timestamp of oldest cache entry or None.
252
253 Returns:
254 Timestamp of the oldest item.
255
256 Used for manual trimming.
257 """
258 raise NotImplementedError()
259
260 def remove_oldest(self):
261 """Removes the oldest item from the cache.
262
263 Returns:
264 Size of the oldest item.
265
266 Used for manual trimming.
267 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400268 raise NotImplementedError()
269
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 def save(self):
271 """Saves the current cache to disk."""
272 raise NotImplementedError()
273
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276
277 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000278 Slice with the size of evicted items.
279 """
280 raise NotImplementedError()
281
282 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 """Deletes any corrupted item from the cache, then calls trim(), then
284 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000285
286 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 """
288 raise NotImplementedError()
289
290
291class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 """Content addressed cache that stores objects temporarily.
293
294 It can be accessed concurrently from multiple threads, so it should protect
295 its internal state with some lock.
296 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298 def __enter__(self):
299 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 return self
302
303 def __exit__(self, _exc_type, _exec_value, _traceback):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return False
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 def touch(self, digest, size):
309 """Ensures item is not corrupted and updates its LRU position.
310
311 Arguments:
312 digest: hash digest of item to check.
313 size: expected size of this item.
314
315 Returns:
316 True if item is in cache and not corrupted.
317 """
318 raise NotImplementedError()
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def getfileobj(self, digest):
321 """Returns a readable file like object.
322
323 If file exists on the file system it will have a .name attribute with an
324 absolute path to the file.
325 """
326 raise NotImplementedError()
327
328 def write(self, digest, content):
329 """Reads data from |content| generator and stores it in cache.
330
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 It is possible to write to an object that already exists. It may be
332 ignored (sent to /dev/null) but the timestamp is still updated.
333
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400334 Returns digest to simplify chaining.
335 """
336 raise NotImplementedError()
337
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
339class MemoryContentAddressedCache(ContentAddressedCache):
340 """ContentAddressedCache implementation that stores everything in memory."""
341
Lei Leife202df2019-06-11 17:33:34 +0000342 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343 """Args:
344 file_mode_mask: bit mask to AND file mode with. Default value will make
345 all mapped files to be read only.
346 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400347 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 # Items in a LRU lookup dict(digest: size).
350 self._lru = lru.LRUDict()
351
352 # Cache interface implementation.
353
354 def __len__(self):
355 with self._lock:
356 return len(self._lru)
357
358 def __iter__(self):
359 # This is not thread-safe.
360 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def __contains__(self, digest):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
366 @property
367 def total_size(self):
368 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000369 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000371 def get_oldest(self):
372 with self._lock:
373 try:
374 # (key, (value, ts))
375 return self._lru.get_oldest()[1][1]
376 except KeyError:
377 return None
378
379 def remove_oldest(self):
380 with self._lock:
381 # TODO(maruel): Update self._added.
382 # (key, (value, ts))
383 return len(self._lru.pop_oldest()[1][0])
384
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000385 def save(self):
386 pass
387
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000388 def trim(self):
389 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000390 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 pass
395
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000396 # ContentAddressedCache interface implementation.
397
398 def __contains__(self, digest):
399 with self._lock:
400 return digest in self._lru
401
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 def touch(self, digest, size):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 try:
405 self._lru.touch(digest)
406 except KeyError:
407 return False
408 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 def getfileobj(self, digest):
411 with self._lock:
412 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000413 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 except KeyError:
415 raise CacheMiss(digest)
416 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 return io.BytesIO(d)
419
420 def write(self, digest, content):
421 # Assemble whole stream before taking the lock.
422 data = ''.join(content)
423 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 self._added.append(len(data))
426 return digest
427
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428
429class DiskContentAddressedCache(ContentAddressedCache):
430 """Stateful LRU cache in a flat hash table in a directory.
431
432 Saves its state as json file.
433 """
434 STATE_FILE = u'state.json'
435
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000436 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 """
438 Arguments:
439 cache_dir: directory where to place the cache.
440 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000442 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 """
444 # All protected methods (starting with '_') except _path should be called
445 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400446 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
449 # Items in a LRU lookup dict(digest: size).
450 self._lru = lru.LRUDict()
451 # Current cached free disk space. It is updated by self._trim().
452 file_path.ensure_tree(self.cache_dir)
453 self._free_disk = file_path.get_free_space(self.cache_dir)
454 # The first item in the LRU cache that must not be evicted during this run
455 # since it was referenced. All items more recent that _protected in the LRU
456 # cache are also inherently protected. It could be a set() of all items
457 # referenced but this increases memory usage without a use case.
458 self._protected = None
459 # Cleanup operations done by self._load(), if any.
460 self._operations = []
461 with tools.Profiler('Setup'):
462 with self._lock:
463 self._load(trim, time_fn)
464
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000465 # Cache interface implementation.
466
467 def __len__(self):
468 with self._lock:
469 return len(self._lru)
470
471 def __iter__(self):
472 # This is not thread-safe.
473 return self._lru.__iter__()
474
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400475 def __contains__(self, digest):
476 with self._lock:
477 return digest in self._lru
478
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def total_size(self):
481 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000482 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000484 def get_oldest(self):
485 with self._lock:
486 try:
487 # (key, (value, ts))
488 return self._lru.get_oldest()[1][1]
489 except KeyError:
490 return None
491
492 def remove_oldest(self):
493 with self._lock:
494 # TODO(maruel): Update self._added.
495 return self._remove_lru_file(True)
496
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000497 def save(self):
498 with self._lock:
499 return self._save()
500
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 def cleanup(self):
507 """Cleans up the cache directory.
508
509 Ensures there is no unknown files in cache_dir.
510 Ensures the read-only bits are set correctly.
511
512 At that point, the cache was already loaded, trimmed to respect cache
513 policies.
514 """
515 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000516 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000518 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # It'd be faster if there were a readdir() function.
520 for filename in fs.listdir(self.cache_dir):
521 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000522 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523 continue
524 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000525 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 previous.remove(filename)
527 continue
528
529 # An untracked file. Delete it.
530 logging.warning('Removing unknown file %s from cache', filename)
531 p = self._path(filename)
532 if fs.isdir(p):
533 try:
534 file_path.rmtree(p)
535 except OSError:
536 pass
537 else:
538 file_path.try_remove(p)
539 continue
540
541 if previous:
542 # Filter out entries that were not found.
543 logging.warning('Removed %d lost files', len(previous))
544 for filename in previous:
545 self._lru.pop(filename)
546 self._save()
547
548 # What remains to be done is to hash every single item to
549 # detect corruption, then save to ensure state.json is up to date.
550 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
551 # TODO(maruel): Let's revisit once directory metadata is stored in
552 # state.json so only the files that had been mapped since the last cleanup()
553 # call are manually verified.
554 #
555 #with self._lock:
556 # for digest in self._lru:
557 # if not isolated_format.is_valid_hash(
558 # self._path(digest), self.hash_algo):
559 # self.evict(digest)
560 # logging.info('Deleted corrupted item: %s', digest)
561
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000562 # ContentAddressedCache interface implementation.
563
564 def __contains__(self, digest):
565 with self._lock:
566 return digest in self._lru
567
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 def touch(self, digest, size):
569 """Verifies an actual file is valid and bumps its LRU position.
570
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000571 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572
573 Note that is doesn't compute the hash so it could still be corrupted if the
574 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 """
576 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000577 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
579 # Update its LRU position.
580 with self._lock:
581 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000582 if looks_valid:
583 # Exists but not in the LRU anymore.
584 self._delete_file(digest, size)
585 return False
586 if not looks_valid:
587 self._lru.pop(digest)
588 # Exists but not in the LRU anymore.
589 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 return False
591 self._lru.touch(digest)
592 self._protected = self._protected or digest
593 return True
594
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 def getfileobj(self, digest):
596 try:
597 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400598 except IOError:
599 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000600 with self._lock:
601 try:
602 self._used.append(self._lru[digest])
603 except KeyError:
604 # If the digest is not actually in _lru, assume it is a cache miss.
605 # Existing file will be overwritten by whoever uses the cache and added
606 # to _lru.
607 f.close()
608 raise CacheMiss(digest)
609 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610
611 def write(self, digest, content):
612 assert content is not None
613 with self._lock:
614 self._protected = self._protected or digest
615 path = self._path(digest)
616 # A stale broken file may remain. It is possible for the file to have write
617 # access bit removed which would cause the file_write() call to fail to open
618 # in write mode. Take no chance here.
619 file_path.try_remove(path)
620 try:
621 size = file_write(path, content)
622 except:
623 # There are two possible places were an exception can occur:
624 # 1) Inside |content| generator in case of network or unzipping errors.
625 # 2) Inside file_write itself in case of disk IO errors.
626 # In any case delete an incomplete file and propagate the exception to
627 # caller, it will be logged there.
628 file_path.try_remove(path)
629 raise
630 # Make the file read-only in the cache. This has a few side-effects since
631 # the file node is modified, so every directory entries to this file becomes
632 # read-only. It's fine here because it is a new file.
633 file_path.set_read_only(path, True)
634 with self._lock:
635 self._add(digest, size)
636 return digest
637
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000638 # Internal functions.
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def _load(self, trim, time_fn):
641 """Loads state of the cache from json file.
642
643 If cache_dir does not exist on disk, it is created.
644 """
645 self._lock.assert_locked()
646
647 if not fs.isfile(self.state_file):
648 if not fs.isdir(self.cache_dir):
649 fs.makedirs(self.cache_dir)
650 else:
651 # Load state of the cache.
652 try:
653 self._lru = lru.LRUDict.load(self.state_file)
654 except ValueError as err:
655 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000656 # Don't want to keep broken cache dir.
657 file_path.rmtree(self.cache_dir)
658 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400659 if time_fn:
660 self._lru.time_fn = time_fn
661 if trim:
662 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400663
664 def _save(self):
665 """Saves the LRU ordering."""
666 self._lock.assert_locked()
667 if sys.platform != 'win32':
668 d = os.path.dirname(self.state_file)
669 if fs.isdir(d):
670 # Necessary otherwise the file can't be created.
671 file_path.set_read_only(d, False)
672 if fs.isfile(self.state_file):
673 file_path.set_read_only(self.state_file, False)
674 self._lru.save(self.state_file)
675
676 def _trim(self):
677 """Trims anything we don't know, make sure enough free space exists."""
678 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680
681 # Trim old items.
682 if self.policies.max_age_secs:
683 cutoff = self._lru.time_fn() - self.policies.max_age_secs
684 while self._lru:
685 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400687 if oldest[1][1] >= cutoff:
688 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000689 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690
691 # Ensure maximum cache size.
692 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000693 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000695 e = self._remove_lru_file(True)
696 evicted.append(e)
697 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
699 # Ensure maximum number of items in the cache.
700 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000701 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000702 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703
704 # Ensure enough free space.
705 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 while (
707 self.policies.min_free_space and
708 self._lru and
709 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000710 # self._free_disk is updated by this call.
711 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000714 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400715 usage_percent = 0.
716 if total_usage:
717 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
718
719 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000720 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
721 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
722 '%.1fkb)',
723 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724 self._free_disk / 1024.,
725 total_usage / 1024.,
726 usage_percent,
727 self.policies.max_cache_size / 1024.)
728 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400730
731 def _path(self, digest):
732 """Returns the path to one item."""
733 return os.path.join(self.cache_dir, digest)
734
735 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000736 """Removes the lastest recently used file and returns its size.
737
738 Updates self._free_disk.
739 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400740 self._lock.assert_locked()
741 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000742 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400743 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000744 total_size = sum(self._lru.values())
745 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000746 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
747 '%s bytes (%.3f GiB) free_space') % (
748 self.policies, total_size, float(total_size) / 1024**3,
749 len(self._lru), self._free_disk,
750 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400751 raise NoMoreSpace(msg)
752 except KeyError:
753 # That means an internal error.
754 raise NoMoreSpace('Nothing to remove, can\'t happend')
755 digest, (size, _) = self._lru.pop_oldest()
756 logging.debug('Removing LRU file %s', digest)
757 self._delete_file(digest, size)
758 return size
759
760 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
761 """Adds an item into LRU cache marking it as a newest one."""
762 self._lock.assert_locked()
763 if size == UNKNOWN_FILE_SIZE:
764 size = fs.stat(self._path(digest)).st_size
765 self._added.append(size)
766 self._lru.add(digest, size)
767 self._free_disk -= size
768 # Do a quicker version of self._trim(). It only enforces free disk space,
769 # not cache size limits. It doesn't actually look at real free disk space,
770 # only uses its cache values. self._trim() will be called later to enforce
771 # real trimming but doing this quick version here makes it possible to map
772 # an isolated that is larger than the current amount of free disk space when
773 # the cache size is already large.
774 while (
775 self.policies.min_free_space and
776 self._lru and
777 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000778 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400779 if self._remove_lru_file(False) == -1:
780 break
781
782 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000783 """Deletes cache file from the file system.
784
785 Updates self._free_disk.
786 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400787 self._lock.assert_locked()
788 try:
789 if size == UNKNOWN_FILE_SIZE:
790 try:
791 size = fs.stat(self._path(digest)).st_size
792 except OSError:
793 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000794 if file_path.try_remove(self._path(digest)):
795 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400796 except OSError as e:
797 if e.errno != errno.ENOENT:
798 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400799
800
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400801class NamedCache(Cache):
802 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400804 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400805 name is a short identifier that describes the contents of the cache, e.g.
806 "git_v8" could be all git repositories required by v8 builds, or
807 "build_chromium" could be build artefacts of the Chromium.
808 path is a directory path relative to the task run dir. Cache installation
809 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400810 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400811 _DIR_ALPHABET = string.ascii_letters + string.digits
812 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000813 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400814
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400815 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400816 """Initializes NamedCaches.
817
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400818 Arguments:
819 - cache_dir is a directory for persistent cache storage.
820 - policies is a CachePolicies instance.
821 - time_fn is a function that returns timestamp (float) and used to take
822 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400823 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400824 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000826 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400827 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
828 self._lru = lru.LRUDict()
829 if not fs.isdir(self.cache_dir):
830 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000831 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000832 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400833 self._lru = lru.LRUDict.load(self.state_file)
834 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000835 logging.exception(
836 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000838 with self._lock:
839 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400840 if time_fn:
841 self._lru.time_fn = time_fn
842
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400843 @property
844 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000845 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000847 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000849 def install(self, dst, name):
850 """Creates the directory |dst| and moves a previous named cache |name| if it
851 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400852
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000853 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400854
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000855 Returns the reused named cache size in bytes, or 0 if none was present.
856
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400857 Raises NamedCacheError if cannot install the cache.
858 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000859 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400860 with self._lock:
861 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000862 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000864 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400865
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000866 # Remove the named symlink if it exists.
867 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000868 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000869 # Remove the symlink itself, not its destination.
870 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000871
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000872 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000873 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400874 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000875 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000876 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000877 file_path.ensure_tree(os.path.dirname(dst))
878 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000880 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400881
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000882 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400883 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400884
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000885 # The named cache does not exist, create an empty directory. When
886 # uninstalling, we will move it back to the cache and create an an
887 # entry.
888 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000889 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000890 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400891 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000892 # Raise using the original traceback.
893 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000894 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000895 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000896 finally:
897 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400898
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000899 def uninstall(self, src, name):
900 """Moves the cache directory back into the named cache hive for an eventual
901 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400902
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000903 The opposite of install().
904
905 src must be absolute and unicode. Its content is moved back into the local
906 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400907
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000908 Returns the named cache size in bytes.
909
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400910 Raises NamedCacheError if cannot uninstall the cache.
911 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000912 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400913 with self._lock:
914 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000915 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400916 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000917 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000918 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400919 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400920
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000921 if name in self._lru:
922 # This shouldn't happen but just remove the preexisting one and move
923 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000924 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000925 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000926
927 # Calculate the size of the named cache to keep. It's important because
928 # if size is zero (it's empty), we do not want to add it back to the
929 # named caches cache.
930 size = _get_recursive_size(src)
931 logging.info('- Size is %d', size)
932 if not size:
933 # Do not save empty named cache.
934 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400935
936 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000937 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400938 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000939 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000941 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400942
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000943 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000944 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000945
946 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
947 # for user convenience.
948 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000949 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000950 file_path.remove(named_path)
951 else:
952 file_path.ensure_tree(os.path.dirname(named_path))
953
954 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000955 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000956 logging.info(
957 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000958 except OSError:
959 # Ignore on Windows. It happens when running as a normal user or when
960 # UAC is enabled and the user is a filtered administrator account.
961 if sys.platform != 'win32':
962 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000963 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400964 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000965 # Raise using the original traceback.
966 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000967 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000968 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000969 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000970 # Call save() at every uninstall. The assumptions are:
971 # - The total the number of named caches is low, so the state.json file
972 # is small, so the time it takes to write it to disk is short.
973 # - The number of mapped named caches per task is low, so the number of
974 # times save() is called on tear-down isn't high enough to be
975 # significant.
976 # - uninstall() sometimes throws due to file locking on Windows or
977 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400979
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000980 # Cache interface implementation.
981
982 def __len__(self):
983 with self._lock:
984 return len(self._lru)
985
986 def __iter__(self):
987 # This is not thread-safe.
988 return self._lru.__iter__()
989
990 def __contains__(self, digest):
991 with self._lock:
992 return digest in self._lru
993
994 @property
995 def total_size(self):
996 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000997 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000998
999 def get_oldest(self):
1000 with self._lock:
1001 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001002 # (key, (value, ts))
1003 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001004 except KeyError:
1005 return None
1006
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001007 def remove_oldest(self):
1008 with self._lock:
1009 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001010 _name, size = self._remove_lru_item()
1011 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001012
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001013 def save(self):
1014 with self._lock:
1015 return self._save()
1016
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001017 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001018 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001019 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001020 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001021 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001022
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001023 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001024 if self._policies.max_items:
1025 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001026 name, size = self._remove_lru_item()
1027 evicted.append(size)
1028 logging.info(
1029 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1030 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001031
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001032 # Trim according to maximum age.
1033 if self._policies.max_age_secs:
1034 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1035 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001036 _name, (_data, ts) = self._lru.get_oldest()
1037 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001038 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001039 name, size = self._remove_lru_item()
1040 evicted.append(size)
1041 logging.info(
1042 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1043 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001044
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001045 # Trim according to minimum free space.
1046 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001047 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001048 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001049 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001050 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001051 name, size = self._remove_lru_item()
1052 evicted.append(size)
1053 logging.info(
1054 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1055 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001056
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001057 # Trim according to maximum total size.
1058 if self._policies.max_cache_size:
1059 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001060 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001061 if total <= self._policies.max_cache_size:
1062 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001063 name, size = self._remove_lru_item()
1064 evicted.append(size)
1065 logging.info(
1066 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1067 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001068
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001069 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001070 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001071
1072 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001073 """Removes unknown directories.
1074
1075 Does not recalculate the cache size since it's surprisingly slow on some
1076 OSes.
1077 """
1078 success = True
1079 with self._lock:
1080 try:
1081 actual = set(fs.listdir(self.cache_dir))
1082 actual.discard(self.NAMED_DIR)
1083 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001084 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001085 # First, handle the actual cache content.
1086 # Remove missing entries.
1087 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001088 name, size = self._lru.pop(expected[missing])
1089 logging.warning(
1090 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001091 # Remove unexpected items.
1092 for unexpected in (actual - set(expected)):
1093 try:
1094 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001095 logging.warning(
1096 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001097 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001098 file_path.rmtree(p)
1099 else:
1100 fs.remove(p)
1101 except (IOError, OSError) as e:
1102 logging.error('Failed to remove %s: %s', unexpected, e)
1103 success = False
1104
1105 # Second, fix named cache links.
1106 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001107 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001108 actual = set(fs.listdir(named))
1109 expected = set(self._lru)
1110 # Confirm entries. Do not add missing ones for now.
1111 for name in expected.intersection(actual):
1112 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001113 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001114 if fs.islink(p):
1115 link = fs.readlink(p)
1116 if expected_link == link:
1117 continue
1118 logging.warning(
1119 'Unexpected symlink for cache %s: %s, expected %s',
1120 name, link, expected_link)
1121 else:
1122 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001123 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001124 file_path.rmtree(p)
1125 else:
1126 fs.remove(p)
1127 # Remove unexpected items.
1128 for unexpected in (actual - expected):
1129 try:
1130 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1131 if fs.isdir(p):
1132 file_path.rmtree(p)
1133 else:
1134 fs.remove(p)
1135 except (IOError, OSError) as e:
1136 logging.error('Failed to remove %s: %s', unexpected, e)
1137 success = False
1138 finally:
1139 self._save()
1140 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001141
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001142 # Internal functions.
1143
1144 def _try_upgrade(self):
1145 """Upgrades from the old format to the new one if necessary.
1146
1147 This code can be removed so all bots are known to have the right new format.
1148 """
1149 if not self._lru:
1150 return
1151 _name, (data, _ts) = self._lru.get_oldest()
1152 if isinstance(data, (list, tuple)):
1153 return
1154 # Update to v2.
1155 def upgrade(_name, rel_cache):
1156 abs_cache = os.path.join(self.cache_dir, rel_cache)
1157 return rel_cache, _get_recursive_size(abs_cache)
1158 self._lru.transform(upgrade)
1159 self._save()
1160
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001161 def _remove_lru_item(self):
1162 """Removes the oldest LRU entry. LRU must not be empty."""
1163 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1164 logging.info('Removing named cache %r', name)
1165 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001166 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001167
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001168 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001169 """Creates and returns relative path of a new cache directory.
1170
1171 In practice, it is a 2-letter string.
1172 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001173 # We randomly generate directory names that have two lower/upper case
1174 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1175 abc_len = len(self._DIR_ALPHABET)
1176 tried = set()
1177 while len(tried) < 1000:
1178 i = random.randint(0, abc_len * abc_len - 1)
1179 rel_path = (
1180 self._DIR_ALPHABET[i / abc_len] +
1181 self._DIR_ALPHABET[i % abc_len])
1182 if rel_path in tried:
1183 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001184 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001185 if not fs.exists(abs_path):
1186 return rel_path
1187 tried.add(rel_path)
1188 raise NamedCacheError(
1189 'could not allocate a new cache dir, too many cache dirs')
1190
1191 def _remove(self, name):
1192 """Removes a cache directory and entry.
1193
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001194 Returns:
1195 Number of caches deleted.
1196 """
1197 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001198 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001199 named_dir = self._get_named_path(name)
1200 if fs.islink(named_dir):
1201 fs.unlink(named_dir)
1202
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001203 # Then remove the actual data.
1204 if name not in self._lru:
1205 return
1206 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001207 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001208 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001209 file_path.rmtree(abs_path)
1210 self._lru.pop(name)
1211
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001212 def _save(self):
1213 self._lock.assert_locked()
1214 file_path.ensure_tree(self.cache_dir)
1215 self._lru.save(self.state_file)
1216
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001217 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001218 return os.path.join(self.cache_dir, self.NAMED_DIR, name)