blob: 549b0c5eb9c011276aa696c6f2754506c621e818 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
Junji Watanabe5e73aab2020-04-09 04:20:27 +000029import isolated_format
30
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040031# The file size to be used when we don't know the correct file size,
32# generally used for .isolated files.
33UNKNOWN_FILE_SIZE = None
34
35
36def file_write(path, content_generator):
37 """Writes file content as generated by content_generator.
38
39 Creates the intermediary directory as needed.
40
41 Returns the number of bytes written.
42
43 Meant to be mocked out in unit tests.
44 """
45 file_path.ensure_tree(os.path.dirname(path))
46 total = 0
47 with fs.open(path, 'wb') as f:
48 for d in content_generator:
49 total += len(d)
50 f.write(d)
51 return total
52
53
54def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000055 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040056
57 Currently it just checks the file exists and its size matches the expectation.
58 """
59 if size == UNKNOWN_FILE_SIZE:
60 return fs.isfile(path)
61 try:
62 actual_size = fs.stat(path).st_size
63 except OSError as e:
64 logging.warning(
65 'Can\'t read item %s, assuming it\'s invalid: %s',
66 os.path.basename(path), e)
67 return False
68 if size != actual_size:
69 logging.warning(
70 'Found invalid item %s; %d != %d',
71 os.path.basename(path), actual_size, size)
72 return False
73 return True
74
75
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000076def trim_caches(caches, path, min_free_space, max_age_secs):
77 """Trims multiple caches.
78
79 The goal here is to coherently trim all caches in a coherent LRU fashion,
80 deleting older items independent of which container they belong to.
81
82 Two policies are enforced first:
83 - max_age_secs
84 - min_free_space
85
86 Once that's done, then we enforce each cache's own policies.
87
88 Returns:
89 Slice containing the size of all items evicted.
90 """
91 min_ts = time.time() - max_age_secs if max_age_secs else 0
92 free_disk = file_path.get_free_space(path) if min_free_space else 0
93 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000111def _use_scandir():
112 # Use scandir in windows for faster execution.
113 # Do not use in other OS due to crbug.com/989409
114 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000115
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000116def _get_recursive_size(path):
117 """Returns the total data size for the specified path.
118
119 This function can be surprisingly slow on OSX, so its output should be cached.
120 """
121 try:
122 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000123 if _use_scandir():
124
125 if sys.platform == 'win32':
126 def direntIsJunction(entry):
127 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
128 # windows-only symbols.
129 return bool(entry.stat().st_file_attributes &
130 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
131 else:
132 def direntIsJunction(_entry):
133 return False
134
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000135 stack = [path]
136 while stack:
137 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000138 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000139 continue
140 if entry.is_file():
141 total += entry.stat().st_size
142 elif entry.is_dir():
143 stack.append(entry.path)
144 else:
145 logging.warning('non directory/file entry: %s', entry)
146 return total
147
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000148 for root, _, files in fs.walk(path):
149 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000150 st = fs.lstat(os.path.join(root, f))
151 if stat.S_ISLNK(st.st_mode):
152 continue
153 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000154 return total
155 except (IOError, OSError, UnicodeEncodeError) as exc:
156 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
157 return None
158
159
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400160class NamedCacheError(Exception):
161 """Named cache specific error."""
162
163
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400164class NoMoreSpace(Exception):
165 """Not enough space to map the whole directory."""
166 pass
167
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400168
169class CachePolicies(object):
170 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
171 """Common caching policies for the multiple caches (isolated, named, cipd).
172
173 Arguments:
174 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
175 cache is effectively a leak.
176 - min_free_space: Trim if disk free space becomes lower than this value. If
177 0, it will unconditionally fill the disk.
178 - max_items: Maximum number of items to keep in the cache. If 0, do not
179 enforce a limit.
180 - max_age_secs: Maximum age an item is kept in the cache until it is
181 automatically evicted. Having a lot of dead luggage slows
182 everything down.
183 """
184 self.max_cache_size = max_cache_size
185 self.min_free_space = min_free_space
186 self.max_items = max_items
187 self.max_age_secs = max_age_secs
188
189 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000190 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
191 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
192 self.max_cache_size, float(self.max_cache_size) / 1024**3,
193 self.max_items, self.min_free_space,
194 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400195
196
197class CacheMiss(Exception):
198 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400199 def __init__(self, digest):
200 self.digest = digest
201 super(CacheMiss, self).__init__(
202 'Item with digest %r is not found in cache' % digest)
203
204
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400205class Cache(object):
206 def __init__(self, cache_dir):
207 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000208 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400209 assert file_path.isabs(cache_dir), cache_dir
210 self.cache_dir = cache_dir
211 self._lock = threading_utils.LockWithAssert()
212 # Profiling values.
213 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400214 self._used = []
215
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000216 def __nonzero__(self):
217 """A cache is always True.
218
219 Otherwise it falls back to __len__, which is surprising.
220 """
221 return True
222
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000223 def __len__(self):
224 """Returns the number of entries in the cache."""
225 raise NotImplementedError()
226
227 def __iter__(self):
228 """Iterates over all the entries names."""
229 raise NotImplementedError()
230
231 def __contains__(self, name):
232 """Returns if an entry is in the cache."""
233 raise NotImplementedError()
234
235 @property
236 def total_size(self):
237 """Returns the total size of the cache in bytes."""
238 raise NotImplementedError()
239
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400240 @property
241 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000242 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400243 with self._lock:
244 return self._added[:]
245
246 @property
247 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000248 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400249 with self._lock:
250 return self._used[:]
251
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000252 def get_oldest(self):
253 """Returns timestamp of oldest cache entry or None.
254
255 Returns:
256 Timestamp of the oldest item.
257
258 Used for manual trimming.
259 """
260 raise NotImplementedError()
261
262 def remove_oldest(self):
263 """Removes the oldest item from the cache.
264
265 Returns:
266 Size of the oldest item.
267
268 Used for manual trimming.
269 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400270 raise NotImplementedError()
271
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000272 def save(self):
273 """Saves the current cache to disk."""
274 raise NotImplementedError()
275
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000277 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400278
279 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000280 Slice with the size of evicted items.
281 """
282 raise NotImplementedError()
283
284 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000285 """Deletes any corrupted item from the cache, then calls trim(), then
286 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000287
288 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400289 """
290 raise NotImplementedError()
291
292
293class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400294 """Content addressed cache that stores objects temporarily.
295
296 It can be accessed concurrently from multiple threads, so it should protect
297 its internal state with some lock.
298 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400299
300 def __enter__(self):
301 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000302 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400303 return self
304
305 def __exit__(self, _exc_type, _exec_value, _traceback):
306 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000307 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 return False
309
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310 def touch(self, digest, size):
311 """Ensures item is not corrupted and updates its LRU position.
312
313 Arguments:
314 digest: hash digest of item to check.
315 size: expected size of this item.
316
317 Returns:
318 True if item is in cache and not corrupted.
319 """
320 raise NotImplementedError()
321
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400322 def getfileobj(self, digest):
323 """Returns a readable file like object.
324
325 If file exists on the file system it will have a .name attribute with an
326 absolute path to the file.
327 """
328 raise NotImplementedError()
329
330 def write(self, digest, content):
331 """Reads data from |content| generator and stores it in cache.
332
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000333 It is possible to write to an object that already exists. It may be
334 ignored (sent to /dev/null) but the timestamp is still updated.
335
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400336 Returns digest to simplify chaining.
337 """
338 raise NotImplementedError()
339
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400340
341class MemoryContentAddressedCache(ContentAddressedCache):
342 """ContentAddressedCache implementation that stores everything in memory."""
343
Lei Leife202df2019-06-11 17:33:34 +0000344 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400345 """Args:
346 file_mode_mask: bit mask to AND file mode with. Default value will make
347 all mapped files to be read only.
348 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400349 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400350 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000351 # Items in a LRU lookup dict(digest: size).
352 self._lru = lru.LRUDict()
353
354 # Cache interface implementation.
355
356 def __len__(self):
357 with self._lock:
358 return len(self._lru)
359
360 def __iter__(self):
361 # This is not thread-safe.
362 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400363
364 def __contains__(self, digest):
365 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000366 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400367
368 @property
369 def total_size(self):
370 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000371 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400372
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000373 def get_oldest(self):
374 with self._lock:
375 try:
376 # (key, (value, ts))
377 return self._lru.get_oldest()[1][1]
378 except KeyError:
379 return None
380
381 def remove_oldest(self):
382 with self._lock:
383 # TODO(maruel): Update self._added.
384 # (key, (value, ts))
385 return len(self._lru.pop_oldest()[1][0])
386
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000387 def save(self):
388 pass
389
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000390 def trim(self):
391 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000392 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400393
394 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000395 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400396 pass
397
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000398 # ContentAddressedCache interface implementation.
399
400 def __contains__(self, digest):
401 with self._lock:
402 return digest in self._lru
403
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404 def touch(self, digest, size):
405 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000406 try:
407 self._lru.touch(digest)
408 except KeyError:
409 return False
410 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400411
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400412 def getfileobj(self, digest):
413 with self._lock:
414 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000415 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400416 except KeyError:
417 raise CacheMiss(digest)
418 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000419 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400420 return io.BytesIO(d)
421
422 def write(self, digest, content):
423 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000424 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000426 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400427 self._added.append(len(data))
428 return digest
429
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400430
431class DiskContentAddressedCache(ContentAddressedCache):
432 """Stateful LRU cache in a flat hash table in a directory.
433
434 Saves its state as json file.
435 """
436 STATE_FILE = u'state.json'
437
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000438 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400439 """
440 Arguments:
441 cache_dir: directory where to place the cache.
442 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000444 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400445 """
446 # All protected methods (starting with '_') except _path should be called
447 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400448 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400449 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
451 # Items in a LRU lookup dict(digest: size).
452 self._lru = lru.LRUDict()
453 # Current cached free disk space. It is updated by self._trim().
454 file_path.ensure_tree(self.cache_dir)
455 self._free_disk = file_path.get_free_space(self.cache_dir)
456 # The first item in the LRU cache that must not be evicted during this run
457 # since it was referenced. All items more recent that _protected in the LRU
458 # cache are also inherently protected. It could be a set() of all items
459 # referenced but this increases memory usage without a use case.
460 self._protected = None
461 # Cleanup operations done by self._load(), if any.
462 self._operations = []
463 with tools.Profiler('Setup'):
464 with self._lock:
465 self._load(trim, time_fn)
466
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000467 # Cache interface implementation.
468
469 def __len__(self):
470 with self._lock:
471 return len(self._lru)
472
473 def __iter__(self):
474 # This is not thread-safe.
475 return self._lru.__iter__()
476
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400477 def __contains__(self, digest):
478 with self._lock:
479 return digest in self._lru
480
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400481 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400482 def total_size(self):
483 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000484 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400485
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000486 def get_oldest(self):
487 with self._lock:
488 try:
489 # (key, (value, ts))
490 return self._lru.get_oldest()[1][1]
491 except KeyError:
492 return None
493
494 def remove_oldest(self):
495 with self._lock:
496 # TODO(maruel): Update self._added.
497 return self._remove_lru_file(True)
498
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000499 def save(self):
500 with self._lock:
501 return self._save()
502
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000503 def trim(self):
504 """Forces retention policies."""
505 with self._lock:
506 return self._trim()
507
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400508 def cleanup(self):
509 """Cleans up the cache directory.
510
511 Ensures there is no unknown files in cache_dir.
512 Ensures the read-only bits are set correctly.
513
514 At that point, the cache was already loaded, trimmed to respect cache
515 policies.
516 """
517 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000518 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000520 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400521 # It'd be faster if there were a readdir() function.
522 for filename in fs.listdir(self.cache_dir):
523 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000524 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400525 continue
526 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000527 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400528 previous.remove(filename)
529 continue
530
531 # An untracked file. Delete it.
532 logging.warning('Removing unknown file %s from cache', filename)
533 p = self._path(filename)
534 if fs.isdir(p):
535 try:
536 file_path.rmtree(p)
537 except OSError:
538 pass
539 else:
540 file_path.try_remove(p)
541 continue
542
543 if previous:
544 # Filter out entries that were not found.
545 logging.warning('Removed %d lost files', len(previous))
546 for filename in previous:
547 self._lru.pop(filename)
548 self._save()
549
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000550 # Verify hash of every single item to detect corruption. the corrupted
551 # files will be evicted.
552 with self._lock:
553 for digest, (_, timestamp) in self._lru._items.items():
554 # verify only if the mtime is grather than the timestamp in state.json
555 # to avoid take too long time.
556 if self._get_mtime(digest) <= timestamp:
557 continue
558 logging.warning('Item has been modified. item: %s', digest)
559 if self._is_valid_hash(digest):
560 # Update timestamp in state.json
561 self._lru.touch(digest)
562 continue
563 # remove corrupted file from LRU and file system
564 self._lru.pop(digest)
565 self._delete_file(digest, UNKNOWN_FILE_SIZE)
566 logging.error('Deleted corrupted item: %s', digest)
567 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000569 # ContentAddressedCache interface implementation.
570
571 def __contains__(self, digest):
572 with self._lock:
573 return digest in self._lru
574
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 def touch(self, digest, size):
576 """Verifies an actual file is valid and bumps its LRU position.
577
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000578 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400579
580 Note that is doesn't compute the hash so it could still be corrupted if the
581 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400582 """
583 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000584 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400585
586 # Update its LRU position.
587 with self._lock:
588 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000589 if looks_valid:
590 # Exists but not in the LRU anymore.
591 self._delete_file(digest, size)
592 return False
593 if not looks_valid:
594 self._lru.pop(digest)
595 # Exists but not in the LRU anymore.
596 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400597 return False
598 self._lru.touch(digest)
599 self._protected = self._protected or digest
600 return True
601
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400602 def getfileobj(self, digest):
603 try:
604 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400605 except IOError:
606 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000607 with self._lock:
608 try:
609 self._used.append(self._lru[digest])
610 except KeyError:
611 # If the digest is not actually in _lru, assume it is a cache miss.
612 # Existing file will be overwritten by whoever uses the cache and added
613 # to _lru.
614 f.close()
615 raise CacheMiss(digest)
616 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400617
618 def write(self, digest, content):
619 assert content is not None
620 with self._lock:
621 self._protected = self._protected or digest
622 path = self._path(digest)
623 # A stale broken file may remain. It is possible for the file to have write
624 # access bit removed which would cause the file_write() call to fail to open
625 # in write mode. Take no chance here.
626 file_path.try_remove(path)
627 try:
628 size = file_write(path, content)
629 except:
630 # There are two possible places were an exception can occur:
631 # 1) Inside |content| generator in case of network or unzipping errors.
632 # 2) Inside file_write itself in case of disk IO errors.
633 # In any case delete an incomplete file and propagate the exception to
634 # caller, it will be logged there.
635 file_path.try_remove(path)
636 raise
637 # Make the file read-only in the cache. This has a few side-effects since
638 # the file node is modified, so every directory entries to this file becomes
639 # read-only. It's fine here because it is a new file.
640 file_path.set_read_only(path, True)
641 with self._lock:
642 self._add(digest, size)
643 return digest
644
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000645 # Internal functions.
646
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400647 def _load(self, trim, time_fn):
648 """Loads state of the cache from json file.
649
650 If cache_dir does not exist on disk, it is created.
651 """
652 self._lock.assert_locked()
653
654 if not fs.isfile(self.state_file):
655 if not fs.isdir(self.cache_dir):
656 fs.makedirs(self.cache_dir)
657 else:
658 # Load state of the cache.
659 try:
660 self._lru = lru.LRUDict.load(self.state_file)
661 except ValueError as err:
662 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000663 # Don't want to keep broken cache dir.
664 file_path.rmtree(self.cache_dir)
665 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000666 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400667 if time_fn:
668 self._lru.time_fn = time_fn
669 if trim:
670 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400671
672 def _save(self):
673 """Saves the LRU ordering."""
674 self._lock.assert_locked()
675 if sys.platform != 'win32':
676 d = os.path.dirname(self.state_file)
677 if fs.isdir(d):
678 # Necessary otherwise the file can't be created.
679 file_path.set_read_only(d, False)
680 if fs.isfile(self.state_file):
681 file_path.set_read_only(self.state_file, False)
682 self._lru.save(self.state_file)
683
684 def _trim(self):
685 """Trims anything we don't know, make sure enough free space exists."""
686 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000687 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400688
689 # Trim old items.
690 if self.policies.max_age_secs:
691 cutoff = self._lru.time_fn() - self.policies.max_age_secs
692 while self._lru:
693 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000694 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695 if oldest[1][1] >= cutoff:
696 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000697 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
699 # Ensure maximum cache size.
700 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000701 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400702 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000703 e = self._remove_lru_file(True)
704 evicted.append(e)
705 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706
707 # Ensure maximum number of items in the cache.
708 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000709 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000710 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400711
712 # Ensure enough free space.
713 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400714 while (
715 self.policies.min_free_space and
716 self._lru and
717 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000718 # self._free_disk is updated by this call.
719 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400720
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000722 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400723 usage_percent = 0.
724 if total_usage:
725 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
726
727 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000728 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
729 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
730 '%.1fkb)',
731 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400732 self._free_disk / 1024.,
733 total_usage / 1024.,
734 usage_percent,
735 self.policies.max_cache_size / 1024.)
736 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000737 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400738
739 def _path(self, digest):
740 """Returns the path to one item."""
741 return os.path.join(self.cache_dir, digest)
742
743 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000744 """Removes the lastest recently used file and returns its size.
745
746 Updates self._free_disk.
747 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400748 self._lock.assert_locked()
749 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000750 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400751 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000752 total_size = sum(self._lru.values())
753 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000754 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
755 '%s bytes (%.3f GiB) free_space') % (
756 self.policies, total_size, float(total_size) / 1024**3,
757 len(self._lru), self._free_disk,
758 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 raise NoMoreSpace(msg)
760 except KeyError:
761 # That means an internal error.
762 raise NoMoreSpace('Nothing to remove, can\'t happend')
763 digest, (size, _) = self._lru.pop_oldest()
764 logging.debug('Removing LRU file %s', digest)
765 self._delete_file(digest, size)
766 return size
767
768 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
769 """Adds an item into LRU cache marking it as a newest one."""
770 self._lock.assert_locked()
771 if size == UNKNOWN_FILE_SIZE:
772 size = fs.stat(self._path(digest)).st_size
773 self._added.append(size)
774 self._lru.add(digest, size)
775 self._free_disk -= size
776 # Do a quicker version of self._trim(). It only enforces free disk space,
777 # not cache size limits. It doesn't actually look at real free disk space,
778 # only uses its cache values. self._trim() will be called later to enforce
779 # real trimming but doing this quick version here makes it possible to map
780 # an isolated that is larger than the current amount of free disk space when
781 # the cache size is already large.
782 while (
783 self.policies.min_free_space and
784 self._lru and
785 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000786 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400787 if self._remove_lru_file(False) == -1:
788 break
789
790 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000791 """Deletes cache file from the file system.
792
793 Updates self._free_disk.
794 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400795 self._lock.assert_locked()
796 try:
797 if size == UNKNOWN_FILE_SIZE:
798 try:
799 size = fs.stat(self._path(digest)).st_size
800 except OSError:
801 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000802 if file_path.try_remove(self._path(digest)):
803 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400804 except OSError as e:
805 if e.errno != errno.ENOENT:
806 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400807
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000808 def _get_mtime(self, digest):
809 """Get mtime of cache file."""
810 return os.path.getmtime(self._path(digest))
811
812 def _is_valid_hash(self, digest):
813 """Verify digest with supported hash algos."""
814 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
815 if digest == isolated_format.hash_file(self._path(digest), algo):
816 return True
817 return False
818
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400819
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400820class NamedCache(Cache):
821 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400822
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400823 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 name is a short identifier that describes the contents of the cache, e.g.
825 "git_v8" could be all git repositories required by v8 builds, or
826 "build_chromium" could be build artefacts of the Chromium.
827 path is a directory path relative to the task run dir. Cache installation
828 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400829 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400830 _DIR_ALPHABET = string.ascii_letters + string.digits
831 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000832 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400833
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400835 """Initializes NamedCaches.
836
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400837 Arguments:
838 - cache_dir is a directory for persistent cache storage.
839 - policies is a CachePolicies instance.
840 - time_fn is a function that returns timestamp (float) and used to take
841 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400842 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400844 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000845 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400846 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
847 self._lru = lru.LRUDict()
848 if not fs.isdir(self.cache_dir):
849 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000850 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000851 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 self._lru = lru.LRUDict.load(self.state_file)
853 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000854 logging.exception(
855 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400856 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000857 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000858 with self._lock:
859 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400860 if time_fn:
861 self._lru.time_fn = time_fn
862
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400863 @property
864 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000865 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400866 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000867 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400868
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000869 def install(self, dst, name):
870 """Creates the directory |dst| and moves a previous named cache |name| if it
871 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400872
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000873 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400874
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000875 Returns the reused named cache size in bytes, or 0 if none was present.
876
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877 Raises NamedCacheError if cannot install the cache.
878 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000879 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400880 with self._lock:
881 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000882 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400883 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000884 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400885
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000886 # Remove the named symlink if it exists.
887 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000888 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000889 # Remove the symlink itself, not its destination.
890 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000891
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000892 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000893 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400894 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000895 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000896 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000897 file_path.ensure_tree(os.path.dirname(dst))
898 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400899 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400901
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000902 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400903 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000905 # The named cache does not exist, create an empty directory. When
906 # uninstalling, we will move it back to the cache and create an an
907 # entry.
908 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000909 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000910 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400911 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000912 # Raise using the original traceback.
913 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000914 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000915 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000916 finally:
917 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400918
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000919 def uninstall(self, src, name):
920 """Moves the cache directory back into the named cache hive for an eventual
921 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400922
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000923 The opposite of install().
924
925 src must be absolute and unicode. Its content is moved back into the local
926 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400927
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000928 Returns the named cache size in bytes.
929
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400930 Raises NamedCacheError if cannot uninstall the cache.
931 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400933 with self._lock:
934 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000935 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400936 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000937 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000938 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400939 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400940
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000941 if name in self._lru:
942 # This shouldn't happen but just remove the preexisting one and move
943 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000944 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000945 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000946
947 # Calculate the size of the named cache to keep. It's important because
948 # if size is zero (it's empty), we do not want to add it back to the
949 # named caches cache.
950 size = _get_recursive_size(src)
951 logging.info('- Size is %d', size)
952 if not size:
953 # Do not save empty named cache.
954 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400955
956 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000957 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400958 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000959 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400960 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000961 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400962
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000963 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000964 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000965
966 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
967 # for user convenience.
968 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000969 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000970 file_path.remove(named_path)
971 else:
972 file_path.ensure_tree(os.path.dirname(named_path))
973
974 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000975 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000976 logging.info(
977 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978 except OSError:
979 # Ignore on Windows. It happens when running as a normal user or when
980 # UAC is enabled and the user is a filtered administrator account.
981 if sys.platform != 'win32':
982 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000983 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400984 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000985 # Raise using the original traceback.
986 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000987 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000988 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000989 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000990 # Call save() at every uninstall. The assumptions are:
991 # - The total the number of named caches is low, so the state.json file
992 # is small, so the time it takes to write it to disk is short.
993 # - The number of mapped named caches per task is low, so the number of
994 # times save() is called on tear-down isn't high enough to be
995 # significant.
996 # - uninstall() sometimes throws due to file locking on Windows or
997 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000998 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400999
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001000 # Cache interface implementation.
1001
1002 def __len__(self):
1003 with self._lock:
1004 return len(self._lru)
1005
1006 def __iter__(self):
1007 # This is not thread-safe.
1008 return self._lru.__iter__()
1009
John Budorickc6186972020-02-26 00:58:14 +00001010 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001011 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001012 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001013
1014 @property
1015 def total_size(self):
1016 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001017 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001018
1019 def get_oldest(self):
1020 with self._lock:
1021 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001022 # (key, (value, ts))
1023 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001024 except KeyError:
1025 return None
1026
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001027 def remove_oldest(self):
1028 with self._lock:
1029 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001030 _name, size = self._remove_lru_item()
1031 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001032
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001033 def save(self):
1034 with self._lock:
1035 return self._save()
1036
John Budorickc6186972020-02-26 00:58:14 +00001037 def touch(self, *names):
1038 with self._lock:
1039 for name in names:
1040 if name in self._lru:
1041 self._lru.touch(name)
1042 self._save()
1043
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001044 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001045 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001046 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001047 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001048 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001049
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001050 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001051 if self._policies.max_items:
1052 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001053 name, size = self._remove_lru_item()
1054 evicted.append(size)
1055 logging.info(
1056 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1057 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001058
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001059 # Trim according to maximum age.
1060 if self._policies.max_age_secs:
1061 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1062 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001063 _name, (_data, ts) = self._lru.get_oldest()
1064 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001065 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001066 name, size = self._remove_lru_item()
1067 evicted.append(size)
1068 logging.info(
1069 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1070 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001071
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001072 # Trim according to minimum free space.
1073 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001074 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001075 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001076 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001077 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001078 name, size = self._remove_lru_item()
1079 evicted.append(size)
1080 logging.info(
1081 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1082 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001083
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001084 # Trim according to maximum total size.
1085 if self._policies.max_cache_size:
1086 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001087 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001088 if total <= self._policies.max_cache_size:
1089 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001090 name, size = self._remove_lru_item()
1091 evicted.append(size)
1092 logging.info(
1093 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1094 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001095
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001096 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001097 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001098
1099 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001100 """Removes unknown directories.
1101
1102 Does not recalculate the cache size since it's surprisingly slow on some
1103 OSes.
1104 """
1105 success = True
1106 with self._lock:
1107 try:
1108 actual = set(fs.listdir(self.cache_dir))
1109 actual.discard(self.NAMED_DIR)
1110 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001111 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001112 # First, handle the actual cache content.
1113 # Remove missing entries.
1114 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001115 name, size = self._lru.pop(expected[missing])
1116 logging.warning(
1117 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001118 # Remove unexpected items.
1119 for unexpected in (actual - set(expected)):
1120 try:
1121 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001122 logging.warning(
1123 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001124 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001125 file_path.rmtree(p)
1126 else:
1127 fs.remove(p)
1128 except (IOError, OSError) as e:
1129 logging.error('Failed to remove %s: %s', unexpected, e)
1130 success = False
1131
1132 # Second, fix named cache links.
1133 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001134 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001135 actual = set(fs.listdir(named))
1136 expected = set(self._lru)
1137 # Confirm entries. Do not add missing ones for now.
1138 for name in expected.intersection(actual):
1139 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001140 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001141 if fs.islink(p):
1142 link = fs.readlink(p)
1143 if expected_link == link:
1144 continue
1145 logging.warning(
1146 'Unexpected symlink for cache %s: %s, expected %s',
1147 name, link, expected_link)
1148 else:
1149 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001150 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001151 file_path.rmtree(p)
1152 else:
1153 fs.remove(p)
1154 # Remove unexpected items.
1155 for unexpected in (actual - expected):
1156 try:
1157 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1158 if fs.isdir(p):
1159 file_path.rmtree(p)
1160 else:
1161 fs.remove(p)
1162 except (IOError, OSError) as e:
1163 logging.error('Failed to remove %s: %s', unexpected, e)
1164 success = False
1165 finally:
1166 self._save()
1167 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001168
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001169 # Internal functions.
1170
1171 def _try_upgrade(self):
1172 """Upgrades from the old format to the new one if necessary.
1173
1174 This code can be removed so all bots are known to have the right new format.
1175 """
1176 if not self._lru:
1177 return
1178 _name, (data, _ts) = self._lru.get_oldest()
1179 if isinstance(data, (list, tuple)):
1180 return
1181 # Update to v2.
1182 def upgrade(_name, rel_cache):
1183 abs_cache = os.path.join(self.cache_dir, rel_cache)
1184 return rel_cache, _get_recursive_size(abs_cache)
1185 self._lru.transform(upgrade)
1186 self._save()
1187
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001188 def _remove_lru_item(self):
1189 """Removes the oldest LRU entry. LRU must not be empty."""
1190 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1191 logging.info('Removing named cache %r', name)
1192 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001193 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001194
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001195 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001196 """Creates and returns relative path of a new cache directory.
1197
1198 In practice, it is a 2-letter string.
1199 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001200 # We randomly generate directory names that have two lower/upper case
1201 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1202 abc_len = len(self._DIR_ALPHABET)
1203 tried = set()
1204 while len(tried) < 1000:
1205 i = random.randint(0, abc_len * abc_len - 1)
1206 rel_path = (
1207 self._DIR_ALPHABET[i / abc_len] +
1208 self._DIR_ALPHABET[i % abc_len])
1209 if rel_path in tried:
1210 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001211 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001212 if not fs.exists(abs_path):
1213 return rel_path
1214 tried.add(rel_path)
1215 raise NamedCacheError(
1216 'could not allocate a new cache dir, too many cache dirs')
1217
1218 def _remove(self, name):
1219 """Removes a cache directory and entry.
1220
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001221 Returns:
1222 Number of caches deleted.
1223 """
1224 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001225 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001226 named_dir = self._get_named_path(name)
1227 if fs.islink(named_dir):
1228 fs.unlink(named_dir)
1229
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001230 # Then remove the actual data.
1231 if name not in self._lru:
1232 return
1233 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001234 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001235 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001236 file_path.rmtree(abs_path)
1237 self._lru.pop(name)
1238
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001239 def _save(self):
1240 self._lock.assert_locked()
1241 file_path.ensure_tree(self.cache_dir)
1242 self._lru.save(self.state_file)
1243
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001244 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001245 return os.path.join(self.cache_dir, self.NAMED_DIR, name)