blob: 6121057389f61593cc096d22cae42d7dcbe9f82e [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000109def _use_scandir():
110 # Use scandir in windows for faster execution.
111 # Do not use in other OS due to crbug.com/989409
112 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000113
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000114def _get_recursive_size(path):
115 """Returns the total data size for the specified path.
116
117 This function can be surprisingly slow on OSX, so its output should be cached.
118 """
119 try:
120 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000121 if _use_scandir():
122
123 if sys.platform == 'win32':
124 def direntIsJunction(entry):
125 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
126 # windows-only symbols.
127 return bool(entry.stat().st_file_attributes &
128 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
129 else:
130 def direntIsJunction(_entry):
131 return False
132
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000133 stack = [path]
134 while stack:
135 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000136 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 continue
138 if entry.is_file():
139 total += entry.stat().st_size
140 elif entry.is_dir():
141 stack.append(entry.path)
142 else:
143 logging.warning('non directory/file entry: %s', entry)
144 return total
145
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000146 for root, _, files in fs.walk(path):
147 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000148 st = fs.lstat(os.path.join(root, f))
149 if stat.S_ISLNK(st.st_mode):
150 continue
151 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000152 return total
153 except (IOError, OSError, UnicodeEncodeError) as exc:
154 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
155 return None
156
157
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400158class NamedCacheError(Exception):
159 """Named cache specific error."""
160
161
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400162class NoMoreSpace(Exception):
163 """Not enough space to map the whole directory."""
164 pass
165
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400166
167class CachePolicies(object):
168 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
169 """Common caching policies for the multiple caches (isolated, named, cipd).
170
171 Arguments:
172 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
173 cache is effectively a leak.
174 - min_free_space: Trim if disk free space becomes lower than this value. If
175 0, it will unconditionally fill the disk.
176 - max_items: Maximum number of items to keep in the cache. If 0, do not
177 enforce a limit.
178 - max_age_secs: Maximum age an item is kept in the cache until it is
179 automatically evicted. Having a lot of dead luggage slows
180 everything down.
181 """
182 self.max_cache_size = max_cache_size
183 self.min_free_space = min_free_space
184 self.max_items = max_items
185 self.max_age_secs = max_age_secs
186
187 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000188 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
189 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
190 self.max_cache_size, float(self.max_cache_size) / 1024**3,
191 self.max_items, self.min_free_space,
192 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193
194
195class CacheMiss(Exception):
196 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197 def __init__(self, digest):
198 self.digest = digest
199 super(CacheMiss, self).__init__(
200 'Item with digest %r is not found in cache' % digest)
201
202
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203class Cache(object):
204 def __init__(self, cache_dir):
205 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000206 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 assert file_path.isabs(cache_dir), cache_dir
208 self.cache_dir = cache_dir
209 self._lock = threading_utils.LockWithAssert()
210 # Profiling values.
211 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 self._used = []
213
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000214 def __nonzero__(self):
215 """A cache is always True.
216
217 Otherwise it falls back to __len__, which is surprising.
218 """
219 return True
220
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000221 def __len__(self):
222 """Returns the number of entries in the cache."""
223 raise NotImplementedError()
224
225 def __iter__(self):
226 """Iterates over all the entries names."""
227 raise NotImplementedError()
228
229 def __contains__(self, name):
230 """Returns if an entry is in the cache."""
231 raise NotImplementedError()
232
233 @property
234 def total_size(self):
235 """Returns the total size of the cache in bytes."""
236 raise NotImplementedError()
237
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 @property
239 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000240 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 with self._lock:
242 return self._added[:]
243
244 @property
245 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000246 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400247 with self._lock:
248 return self._used[:]
249
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000250 def get_oldest(self):
251 """Returns timestamp of oldest cache entry or None.
252
253 Returns:
254 Timestamp of the oldest item.
255
256 Used for manual trimming.
257 """
258 raise NotImplementedError()
259
260 def remove_oldest(self):
261 """Removes the oldest item from the cache.
262
263 Returns:
264 Size of the oldest item.
265
266 Used for manual trimming.
267 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400268 raise NotImplementedError()
269
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 def save(self):
271 """Saves the current cache to disk."""
272 raise NotImplementedError()
273
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276
277 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000278 Slice with the size of evicted items.
279 """
280 raise NotImplementedError()
281
282 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 """Deletes any corrupted item from the cache, then calls trim(), then
284 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000285
286 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 """
288 raise NotImplementedError()
289
290
291class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 """Content addressed cache that stores objects temporarily.
293
294 It can be accessed concurrently from multiple threads, so it should protect
295 its internal state with some lock.
296 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298 def __enter__(self):
299 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 return self
302
303 def __exit__(self, _exc_type, _exec_value, _traceback):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return False
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 def touch(self, digest, size):
309 """Ensures item is not corrupted and updates its LRU position.
310
311 Arguments:
312 digest: hash digest of item to check.
313 size: expected size of this item.
314
315 Returns:
316 True if item is in cache and not corrupted.
317 """
318 raise NotImplementedError()
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def getfileobj(self, digest):
321 """Returns a readable file like object.
322
323 If file exists on the file system it will have a .name attribute with an
324 absolute path to the file.
325 """
326 raise NotImplementedError()
327
328 def write(self, digest, content):
329 """Reads data from |content| generator and stores it in cache.
330
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 It is possible to write to an object that already exists. It may be
332 ignored (sent to /dev/null) but the timestamp is still updated.
333
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400334 Returns digest to simplify chaining.
335 """
336 raise NotImplementedError()
337
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
339class MemoryContentAddressedCache(ContentAddressedCache):
340 """ContentAddressedCache implementation that stores everything in memory."""
341
Lei Leife202df2019-06-11 17:33:34 +0000342 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343 """Args:
344 file_mode_mask: bit mask to AND file mode with. Default value will make
345 all mapped files to be read only.
346 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400347 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 # Items in a LRU lookup dict(digest: size).
350 self._lru = lru.LRUDict()
351
352 # Cache interface implementation.
353
354 def __len__(self):
355 with self._lock:
356 return len(self._lru)
357
358 def __iter__(self):
359 # This is not thread-safe.
360 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def __contains__(self, digest):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
366 @property
367 def total_size(self):
368 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000369 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000371 def get_oldest(self):
372 with self._lock:
373 try:
374 # (key, (value, ts))
375 return self._lru.get_oldest()[1][1]
376 except KeyError:
377 return None
378
379 def remove_oldest(self):
380 with self._lock:
381 # TODO(maruel): Update self._added.
382 # (key, (value, ts))
383 return len(self._lru.pop_oldest()[1][0])
384
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000385 def save(self):
386 pass
387
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000388 def trim(self):
389 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000390 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 pass
395
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000396 # ContentAddressedCache interface implementation.
397
398 def __contains__(self, digest):
399 with self._lock:
400 return digest in self._lru
401
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 def touch(self, digest, size):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 try:
405 self._lru.touch(digest)
406 except KeyError:
407 return False
408 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 def getfileobj(self, digest):
411 with self._lock:
412 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000413 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 except KeyError:
415 raise CacheMiss(digest)
416 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 return io.BytesIO(d)
419
420 def write(self, digest, content):
421 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000422 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400423 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 self._added.append(len(data))
426 return digest
427
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428
429class DiskContentAddressedCache(ContentAddressedCache):
430 """Stateful LRU cache in a flat hash table in a directory.
431
432 Saves its state as json file.
433 """
434 STATE_FILE = u'state.json'
435
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000436 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 """
438 Arguments:
439 cache_dir: directory where to place the cache.
440 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000442 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 """
444 # All protected methods (starting with '_') except _path should be called
445 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400446 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
449 # Items in a LRU lookup dict(digest: size).
450 self._lru = lru.LRUDict()
451 # Current cached free disk space. It is updated by self._trim().
452 file_path.ensure_tree(self.cache_dir)
453 self._free_disk = file_path.get_free_space(self.cache_dir)
454 # The first item in the LRU cache that must not be evicted during this run
455 # since it was referenced. All items more recent that _protected in the LRU
456 # cache are also inherently protected. It could be a set() of all items
457 # referenced but this increases memory usage without a use case.
458 self._protected = None
459 # Cleanup operations done by self._load(), if any.
460 self._operations = []
461 with tools.Profiler('Setup'):
462 with self._lock:
463 self._load(trim, time_fn)
464
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000465 # Cache interface implementation.
466
467 def __len__(self):
468 with self._lock:
469 return len(self._lru)
470
471 def __iter__(self):
472 # This is not thread-safe.
473 return self._lru.__iter__()
474
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400475 def __contains__(self, digest):
476 with self._lock:
477 return digest in self._lru
478
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def total_size(self):
481 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000482 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000484 def get_oldest(self):
485 with self._lock:
486 try:
487 # (key, (value, ts))
488 return self._lru.get_oldest()[1][1]
489 except KeyError:
490 return None
491
492 def remove_oldest(self):
493 with self._lock:
494 # TODO(maruel): Update self._added.
495 return self._remove_lru_file(True)
496
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000497 def save(self):
498 with self._lock:
499 return self._save()
500
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 def cleanup(self):
507 """Cleans up the cache directory.
508
509 Ensures there is no unknown files in cache_dir.
510 Ensures the read-only bits are set correctly.
511
512 At that point, the cache was already loaded, trimmed to respect cache
513 policies.
514 """
515 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000516 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000518 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # It'd be faster if there were a readdir() function.
520 for filename in fs.listdir(self.cache_dir):
521 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000522 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523 continue
524 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000525 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 previous.remove(filename)
527 continue
528
529 # An untracked file. Delete it.
530 logging.warning('Removing unknown file %s from cache', filename)
531 p = self._path(filename)
532 if fs.isdir(p):
533 try:
534 file_path.rmtree(p)
535 except OSError:
536 pass
537 else:
538 file_path.try_remove(p)
539 continue
540
541 if previous:
542 # Filter out entries that were not found.
543 logging.warning('Removed %d lost files', len(previous))
544 for filename in previous:
545 self._lru.pop(filename)
546 self._save()
547
548 # What remains to be done is to hash every single item to
549 # detect corruption, then save to ensure state.json is up to date.
550 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
551 # TODO(maruel): Let's revisit once directory metadata is stored in
552 # state.json so only the files that had been mapped since the last cleanup()
553 # call are manually verified.
554 #
555 #with self._lock:
556 # for digest in self._lru:
557 # if not isolated_format.is_valid_hash(
558 # self._path(digest), self.hash_algo):
559 # self.evict(digest)
560 # logging.info('Deleted corrupted item: %s', digest)
561
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000562 # ContentAddressedCache interface implementation.
563
564 def __contains__(self, digest):
565 with self._lock:
566 return digest in self._lru
567
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 def touch(self, digest, size):
569 """Verifies an actual file is valid and bumps its LRU position.
570
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000571 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572
573 Note that is doesn't compute the hash so it could still be corrupted if the
574 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 """
576 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000577 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
579 # Update its LRU position.
580 with self._lock:
581 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000582 if looks_valid:
583 # Exists but not in the LRU anymore.
584 self._delete_file(digest, size)
585 return False
586 if not looks_valid:
587 self._lru.pop(digest)
588 # Exists but not in the LRU anymore.
589 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 return False
591 self._lru.touch(digest)
592 self._protected = self._protected or digest
593 return True
594
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 def getfileobj(self, digest):
596 try:
597 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400598 except IOError:
599 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000600 with self._lock:
601 try:
602 self._used.append(self._lru[digest])
603 except KeyError:
604 # If the digest is not actually in _lru, assume it is a cache miss.
605 # Existing file will be overwritten by whoever uses the cache and added
606 # to _lru.
607 f.close()
608 raise CacheMiss(digest)
609 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610
611 def write(self, digest, content):
612 assert content is not None
613 with self._lock:
614 self._protected = self._protected or digest
615 path = self._path(digest)
616 # A stale broken file may remain. It is possible for the file to have write
617 # access bit removed which would cause the file_write() call to fail to open
618 # in write mode. Take no chance here.
619 file_path.try_remove(path)
620 try:
621 size = file_write(path, content)
622 except:
623 # There are two possible places were an exception can occur:
624 # 1) Inside |content| generator in case of network or unzipping errors.
625 # 2) Inside file_write itself in case of disk IO errors.
626 # In any case delete an incomplete file and propagate the exception to
627 # caller, it will be logged there.
628 file_path.try_remove(path)
629 raise
630 # Make the file read-only in the cache. This has a few side-effects since
631 # the file node is modified, so every directory entries to this file becomes
632 # read-only. It's fine here because it is a new file.
633 file_path.set_read_only(path, True)
634 with self._lock:
635 self._add(digest, size)
636 return digest
637
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000638 # Internal functions.
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def _load(self, trim, time_fn):
641 """Loads state of the cache from json file.
642
643 If cache_dir does not exist on disk, it is created.
644 """
645 self._lock.assert_locked()
646
647 if not fs.isfile(self.state_file):
648 if not fs.isdir(self.cache_dir):
649 fs.makedirs(self.cache_dir)
650 else:
651 # Load state of the cache.
652 try:
653 self._lru = lru.LRUDict.load(self.state_file)
654 except ValueError as err:
655 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000656 # Don't want to keep broken cache dir.
657 file_path.rmtree(self.cache_dir)
658 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000659 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400660 if time_fn:
661 self._lru.time_fn = time_fn
662 if trim:
663 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400664
665 def _save(self):
666 """Saves the LRU ordering."""
667 self._lock.assert_locked()
668 if sys.platform != 'win32':
669 d = os.path.dirname(self.state_file)
670 if fs.isdir(d):
671 # Necessary otherwise the file can't be created.
672 file_path.set_read_only(d, False)
673 if fs.isfile(self.state_file):
674 file_path.set_read_only(self.state_file, False)
675 self._lru.save(self.state_file)
676
677 def _trim(self):
678 """Trims anything we don't know, make sure enough free space exists."""
679 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000680 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400681
682 # Trim old items.
683 if self.policies.max_age_secs:
684 cutoff = self._lru.time_fn() - self.policies.max_age_secs
685 while self._lru:
686 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000687 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400688 if oldest[1][1] >= cutoff:
689 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000690 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400691
692 # Ensure maximum cache size.
693 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000694 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400695 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000696 e = self._remove_lru_file(True)
697 evicted.append(e)
698 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699
700 # Ensure maximum number of items in the cache.
701 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000702 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000703 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400704
705 # Ensure enough free space.
706 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400707 while (
708 self.policies.min_free_space and
709 self._lru and
710 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000711 # self._free_disk is updated by this call.
712 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000715 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400716 usage_percent = 0.
717 if total_usage:
718 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
719
720 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
722 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
723 '%.1fkb)',
724 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725 self._free_disk / 1024.,
726 total_usage / 1024.,
727 usage_percent,
728 self.policies.max_cache_size / 1024.)
729 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000730 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400731
732 def _path(self, digest):
733 """Returns the path to one item."""
734 return os.path.join(self.cache_dir, digest)
735
736 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000737 """Removes the lastest recently used file and returns its size.
738
739 Updates self._free_disk.
740 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400741 self._lock.assert_locked()
742 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000743 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000745 total_size = sum(self._lru.values())
746 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000747 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
748 '%s bytes (%.3f GiB) free_space') % (
749 self.policies, total_size, float(total_size) / 1024**3,
750 len(self._lru), self._free_disk,
751 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400752 raise NoMoreSpace(msg)
753 except KeyError:
754 # That means an internal error.
755 raise NoMoreSpace('Nothing to remove, can\'t happend')
756 digest, (size, _) = self._lru.pop_oldest()
757 logging.debug('Removing LRU file %s', digest)
758 self._delete_file(digest, size)
759 return size
760
761 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
762 """Adds an item into LRU cache marking it as a newest one."""
763 self._lock.assert_locked()
764 if size == UNKNOWN_FILE_SIZE:
765 size = fs.stat(self._path(digest)).st_size
766 self._added.append(size)
767 self._lru.add(digest, size)
768 self._free_disk -= size
769 # Do a quicker version of self._trim(). It only enforces free disk space,
770 # not cache size limits. It doesn't actually look at real free disk space,
771 # only uses its cache values. self._trim() will be called later to enforce
772 # real trimming but doing this quick version here makes it possible to map
773 # an isolated that is larger than the current amount of free disk space when
774 # the cache size is already large.
775 while (
776 self.policies.min_free_space and
777 self._lru and
778 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000779 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400780 if self._remove_lru_file(False) == -1:
781 break
782
783 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000784 """Deletes cache file from the file system.
785
786 Updates self._free_disk.
787 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400788 self._lock.assert_locked()
789 try:
790 if size == UNKNOWN_FILE_SIZE:
791 try:
792 size = fs.stat(self._path(digest)).st_size
793 except OSError:
794 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000795 if file_path.try_remove(self._path(digest)):
796 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400797 except OSError as e:
798 if e.errno != errno.ENOENT:
799 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400800
801
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802class NamedCache(Cache):
803 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400804
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400805 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400806 name is a short identifier that describes the contents of the cache, e.g.
807 "git_v8" could be all git repositories required by v8 builds, or
808 "build_chromium" could be build artefacts of the Chromium.
809 path is a directory path relative to the task run dir. Cache installation
810 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400811 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400812 _DIR_ALPHABET = string.ascii_letters + string.digits
813 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000814 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400815
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400816 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400817 """Initializes NamedCaches.
818
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400819 Arguments:
820 - cache_dir is a directory for persistent cache storage.
821 - policies is a CachePolicies instance.
822 - time_fn is a function that returns timestamp (float) and used to take
823 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400824 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400825 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400826 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000827 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400828 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
829 self._lru = lru.LRUDict()
830 if not fs.isdir(self.cache_dir):
831 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000832 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000833 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400834 self._lru = lru.LRUDict.load(self.state_file)
835 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000836 logging.exception(
837 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400838 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000839 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000840 with self._lock:
841 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400842 if time_fn:
843 self._lru.time_fn = time_fn
844
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400845 @property
846 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000847 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400848 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000849 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000851 def install(self, dst, name):
852 """Creates the directory |dst| and moves a previous named cache |name| if it
853 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400854
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000855 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400856
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000857 Returns the reused named cache size in bytes, or 0 if none was present.
858
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400859 Raises NamedCacheError if cannot install the cache.
860 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000861 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400862 with self._lock:
863 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000864 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400865 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000866 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400867
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000868 # Remove the named symlink if it exists.
869 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000870 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000871 # Remove the symlink itself, not its destination.
872 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000873
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000874 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000875 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000877 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000878 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000879 file_path.ensure_tree(os.path.dirname(dst))
880 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400881 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000882 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400883
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000884 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400885 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400886
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000887 # The named cache does not exist, create an empty directory. When
888 # uninstalling, we will move it back to the cache and create an an
889 # entry.
890 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000891 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000892 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000894 # Raise using the original traceback.
895 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000896 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000897 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000898 finally:
899 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400900
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000901 def uninstall(self, src, name):
902 """Moves the cache directory back into the named cache hive for an eventual
903 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400904
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000905 The opposite of install().
906
907 src must be absolute and unicode. Its content is moved back into the local
908 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400909
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000910 Returns the named cache size in bytes.
911
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400912 Raises NamedCacheError if cannot uninstall the cache.
913 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000914 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400915 with self._lock:
916 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000917 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400918 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000919 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000920 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400921 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400922
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000923 if name in self._lru:
924 # This shouldn't happen but just remove the preexisting one and move
925 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000926 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000927 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000928
929 # Calculate the size of the named cache to keep. It's important because
930 # if size is zero (it's empty), we do not want to add it back to the
931 # named caches cache.
932 size = _get_recursive_size(src)
933 logging.info('- Size is %d', size)
934 if not size:
935 # Do not save empty named cache.
936 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937
938 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000939 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000941 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400942 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000943 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400944
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000945 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000946 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000947
948 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
949 # for user convenience.
950 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000951 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000952 file_path.remove(named_path)
953 else:
954 file_path.ensure_tree(os.path.dirname(named_path))
955
956 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000957 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000958 logging.info(
959 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000960 except OSError:
961 # Ignore on Windows. It happens when running as a normal user or when
962 # UAC is enabled and the user is a filtered administrator account.
963 if sys.platform != 'win32':
964 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000965 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400966 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000967 # Raise using the original traceback.
968 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000969 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000970 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000971 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000972 # Call save() at every uninstall. The assumptions are:
973 # - The total the number of named caches is low, so the state.json file
974 # is small, so the time it takes to write it to disk is short.
975 # - The number of mapped named caches per task is low, so the number of
976 # times save() is called on tear-down isn't high enough to be
977 # significant.
978 # - uninstall() sometimes throws due to file locking on Windows or
979 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000980 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400981
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000982 # Cache interface implementation.
983
984 def __len__(self):
985 with self._lock:
986 return len(self._lru)
987
988 def __iter__(self):
989 # This is not thread-safe.
990 return self._lru.__iter__()
991
John Budorickc6186972020-02-26 00:58:14 +0000992 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000993 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +0000994 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000995
996 @property
997 def total_size(self):
998 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000999 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001000
1001 def get_oldest(self):
1002 with self._lock:
1003 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001004 # (key, (value, ts))
1005 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001006 except KeyError:
1007 return None
1008
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001009 def remove_oldest(self):
1010 with self._lock:
1011 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001012 _name, size = self._remove_lru_item()
1013 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001014
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001015 def save(self):
1016 with self._lock:
1017 return self._save()
1018
John Budorickc6186972020-02-26 00:58:14 +00001019 def touch(self, *names):
1020 with self._lock:
1021 for name in names:
1022 if name in self._lru:
1023 self._lru.touch(name)
1024 self._save()
1025
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001026 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001027 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001028 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001029 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001030 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001031
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001032 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001033 if self._policies.max_items:
1034 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001035 name, size = self._remove_lru_item()
1036 evicted.append(size)
1037 logging.info(
1038 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1039 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001040
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001041 # Trim according to maximum age.
1042 if self._policies.max_age_secs:
1043 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1044 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001045 _name, (_data, ts) = self._lru.get_oldest()
1046 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001047 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001048 name, size = self._remove_lru_item()
1049 evicted.append(size)
1050 logging.info(
1051 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1052 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001053
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001054 # Trim according to minimum free space.
1055 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001056 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001057 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001058 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001059 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001060 name, size = self._remove_lru_item()
1061 evicted.append(size)
1062 logging.info(
1063 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1064 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001065
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001066 # Trim according to maximum total size.
1067 if self._policies.max_cache_size:
1068 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001069 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001070 if total <= self._policies.max_cache_size:
1071 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001072 name, size = self._remove_lru_item()
1073 evicted.append(size)
1074 logging.info(
1075 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1076 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001077
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001078 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001079 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001080
1081 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001082 """Removes unknown directories.
1083
1084 Does not recalculate the cache size since it's surprisingly slow on some
1085 OSes.
1086 """
1087 success = True
1088 with self._lock:
1089 try:
1090 actual = set(fs.listdir(self.cache_dir))
1091 actual.discard(self.NAMED_DIR)
1092 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001093 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001094 # First, handle the actual cache content.
1095 # Remove missing entries.
1096 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001097 name, size = self._lru.pop(expected[missing])
1098 logging.warning(
1099 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001100 # Remove unexpected items.
1101 for unexpected in (actual - set(expected)):
1102 try:
1103 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001104 logging.warning(
1105 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001106 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001107 file_path.rmtree(p)
1108 else:
1109 fs.remove(p)
1110 except (IOError, OSError) as e:
1111 logging.error('Failed to remove %s: %s', unexpected, e)
1112 success = False
1113
1114 # Second, fix named cache links.
1115 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001116 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001117 actual = set(fs.listdir(named))
1118 expected = set(self._lru)
1119 # Confirm entries. Do not add missing ones for now.
1120 for name in expected.intersection(actual):
1121 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001122 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001123 if fs.islink(p):
1124 link = fs.readlink(p)
1125 if expected_link == link:
1126 continue
1127 logging.warning(
1128 'Unexpected symlink for cache %s: %s, expected %s',
1129 name, link, expected_link)
1130 else:
1131 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001132 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001133 file_path.rmtree(p)
1134 else:
1135 fs.remove(p)
1136 # Remove unexpected items.
1137 for unexpected in (actual - expected):
1138 try:
1139 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1140 if fs.isdir(p):
1141 file_path.rmtree(p)
1142 else:
1143 fs.remove(p)
1144 except (IOError, OSError) as e:
1145 logging.error('Failed to remove %s: %s', unexpected, e)
1146 success = False
1147 finally:
1148 self._save()
1149 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001150
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001151 # Internal functions.
1152
1153 def _try_upgrade(self):
1154 """Upgrades from the old format to the new one if necessary.
1155
1156 This code can be removed so all bots are known to have the right new format.
1157 """
1158 if not self._lru:
1159 return
1160 _name, (data, _ts) = self._lru.get_oldest()
1161 if isinstance(data, (list, tuple)):
1162 return
1163 # Update to v2.
1164 def upgrade(_name, rel_cache):
1165 abs_cache = os.path.join(self.cache_dir, rel_cache)
1166 return rel_cache, _get_recursive_size(abs_cache)
1167 self._lru.transform(upgrade)
1168 self._save()
1169
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001170 def _remove_lru_item(self):
1171 """Removes the oldest LRU entry. LRU must not be empty."""
1172 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1173 logging.info('Removing named cache %r', name)
1174 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001175 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001176
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001177 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001178 """Creates and returns relative path of a new cache directory.
1179
1180 In practice, it is a 2-letter string.
1181 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001182 # We randomly generate directory names that have two lower/upper case
1183 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1184 abc_len = len(self._DIR_ALPHABET)
1185 tried = set()
1186 while len(tried) < 1000:
1187 i = random.randint(0, abc_len * abc_len - 1)
1188 rel_path = (
1189 self._DIR_ALPHABET[i / abc_len] +
1190 self._DIR_ALPHABET[i % abc_len])
1191 if rel_path in tried:
1192 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001193 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001194 if not fs.exists(abs_path):
1195 return rel_path
1196 tried.add(rel_path)
1197 raise NamedCacheError(
1198 'could not allocate a new cache dir, too many cache dirs')
1199
1200 def _remove(self, name):
1201 """Removes a cache directory and entry.
1202
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001203 Returns:
1204 Number of caches deleted.
1205 """
1206 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001207 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001208 named_dir = self._get_named_path(name)
1209 if fs.islink(named_dir):
1210 fs.unlink(named_dir)
1211
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001212 # Then remove the actual data.
1213 if name not in self._lru:
1214 return
1215 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001216 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001217 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001218 file_path.rmtree(abs_path)
1219 self._lru.pop(name)
1220
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001221 def _save(self):
1222 self._lock.assert_locked()
1223 file_path.ensure_tree(self.cache_dir)
1224 self._lru.save(self.state_file)
1225
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001226 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001227 return os.path.join(self.cache_dir, self.NAMED_DIR, name)