blob: d34f13764d0e218f39dd02b05a6b9e6e05fc508d [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040015import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000016import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040017
18from utils import file_path
19from utils import fs
20from utils import lru
21from utils import threading_utils
22from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000023tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040024
Lei Leife202df2019-06-11 17:33:34 +000025# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000026from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000027import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040028
29# The file size to be used when we don't know the correct file size,
30# generally used for .isolated files.
31UNKNOWN_FILE_SIZE = None
32
33
34def file_write(path, content_generator):
35 """Writes file content as generated by content_generator.
36
37 Creates the intermediary directory as needed.
38
39 Returns the number of bytes written.
40
41 Meant to be mocked out in unit tests.
42 """
43 file_path.ensure_tree(os.path.dirname(path))
44 total = 0
45 with fs.open(path, 'wb') as f:
46 for d in content_generator:
47 total += len(d)
48 f.write(d)
49 return total
50
51
52def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000053 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040054
55 Currently it just checks the file exists and its size matches the expectation.
56 """
57 if size == UNKNOWN_FILE_SIZE:
58 return fs.isfile(path)
59 try:
60 actual_size = fs.stat(path).st_size
61 except OSError as e:
62 logging.warning(
63 'Can\'t read item %s, assuming it\'s invalid: %s',
64 os.path.basename(path), e)
65 return False
66 if size != actual_size:
67 logging.warning(
68 'Found invalid item %s; %d != %d',
69 os.path.basename(path), actual_size, size)
70 return False
71 return True
72
73
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000074def trim_caches(caches, path, min_free_space, max_age_secs):
75 """Trims multiple caches.
76
77 The goal here is to coherently trim all caches in a coherent LRU fashion,
78 deleting older items independent of which container they belong to.
79
80 Two policies are enforced first:
81 - max_age_secs
82 - min_free_space
83
84 Once that's done, then we enforce each cache's own policies.
85
86 Returns:
87 Slice containing the size of all items evicted.
88 """
89 min_ts = time.time() - max_age_secs if max_age_secs else 0
90 free_disk = file_path.get_free_space(path) if min_free_space else 0
91 total = []
92 if min_ts or free_disk:
93 while True:
94 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
95 if not oldest:
96 break
Lei Leife202df2019-06-11 17:33:34 +000097 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000098 c, ts = oldest[0]
99 if ts >= min_ts and free_disk >= min_free_space:
100 break
101 total.append(c.remove_oldest())
102 if min_free_space:
103 free_disk = file_path.get_free_space(path)
104 # Evaluate each cache's own policies.
105 for c in caches:
106 total.extend(c.trim())
107 return total
108
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000109def _use_scandir():
110 # Use scandir in windows for faster execution.
111 # Do not use in other OS due to crbug.com/989409
112 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000113
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000114def _get_recursive_size(path):
115 """Returns the total data size for the specified path.
116
117 This function can be surprisingly slow on OSX, so its output should be cached.
118 """
119 try:
120 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000121 if _use_scandir():
122
123 if sys.platform == 'win32':
124 def direntIsJunction(entry):
125 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
126 # windows-only symbols.
127 return bool(entry.stat().st_file_attributes &
128 scandir.FILE_ATTRIBUTE_REPARSE_POINT)
129 else:
130 def direntIsJunction(_entry):
131 return False
132
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000133 stack = [path]
134 while stack:
135 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000136 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000137 continue
138 if entry.is_file():
139 total += entry.stat().st_size
140 elif entry.is_dir():
141 stack.append(entry.path)
142 else:
143 logging.warning('non directory/file entry: %s', entry)
144 return total
145
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000146 for root, _, files in fs.walk(path):
147 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000148 st = fs.lstat(os.path.join(root, f))
149 if stat.S_ISLNK(st.st_mode):
150 continue
151 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000152 return total
153 except (IOError, OSError, UnicodeEncodeError) as exc:
154 logging.warning('Exception while getting the size of %s:\n%s', path, exc)
155 return None
156
157
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400158class NamedCacheError(Exception):
159 """Named cache specific error."""
160
161
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400162class NoMoreSpace(Exception):
163 """Not enough space to map the whole directory."""
164 pass
165
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400166
167class CachePolicies(object):
168 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
169 """Common caching policies for the multiple caches (isolated, named, cipd).
170
171 Arguments:
172 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
173 cache is effectively a leak.
174 - min_free_space: Trim if disk free space becomes lower than this value. If
175 0, it will unconditionally fill the disk.
176 - max_items: Maximum number of items to keep in the cache. If 0, do not
177 enforce a limit.
178 - max_age_secs: Maximum age an item is kept in the cache until it is
179 automatically evicted. Having a lot of dead luggage slows
180 everything down.
181 """
182 self.max_cache_size = max_cache_size
183 self.min_free_space = min_free_space
184 self.max_items = max_items
185 self.max_age_secs = max_age_secs
186
187 def __str__(self):
188 return (
189 'CachePolicies(max_cache_size=%s; max_items=%s; min_free_space=%s; '
190 'max_age_secs=%s)') % (
191 self.max_cache_size, self.max_items, self.min_free_space,
192 self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400193
194
195class CacheMiss(Exception):
196 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400197 def __init__(self, digest):
198 self.digest = digest
199 super(CacheMiss, self).__init__(
200 'Item with digest %r is not found in cache' % digest)
201
202
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400203class Cache(object):
204 def __init__(self, cache_dir):
205 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000206 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400207 assert file_path.isabs(cache_dir), cache_dir
208 self.cache_dir = cache_dir
209 self._lock = threading_utils.LockWithAssert()
210 # Profiling values.
211 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400212 self._used = []
213
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000214 def __nonzero__(self):
215 """A cache is always True.
216
217 Otherwise it falls back to __len__, which is surprising.
218 """
219 return True
220
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000221 def __len__(self):
222 """Returns the number of entries in the cache."""
223 raise NotImplementedError()
224
225 def __iter__(self):
226 """Iterates over all the entries names."""
227 raise NotImplementedError()
228
229 def __contains__(self, name):
230 """Returns if an entry is in the cache."""
231 raise NotImplementedError()
232
233 @property
234 def total_size(self):
235 """Returns the total size of the cache in bytes."""
236 raise NotImplementedError()
237
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400238 @property
239 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000240 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400241 with self._lock:
242 return self._added[:]
243
244 @property
245 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000246 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400247 with self._lock:
248 return self._used[:]
249
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000250 def get_oldest(self):
251 """Returns timestamp of oldest cache entry or None.
252
253 Returns:
254 Timestamp of the oldest item.
255
256 Used for manual trimming.
257 """
258 raise NotImplementedError()
259
260 def remove_oldest(self):
261 """Removes the oldest item from the cache.
262
263 Returns:
264 Size of the oldest item.
265
266 Used for manual trimming.
267 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400268 raise NotImplementedError()
269
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000270 def save(self):
271 """Saves the current cache to disk."""
272 raise NotImplementedError()
273
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400274 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000275 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400276
277 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000278 Slice with the size of evicted items.
279 """
280 raise NotImplementedError()
281
282 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 """Deletes any corrupted item from the cache, then calls trim(), then
284 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000285
286 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 """
288 raise NotImplementedError()
289
290
291class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400292 """Content addressed cache that stores objects temporarily.
293
294 It can be accessed concurrently from multiple threads, so it should protect
295 its internal state with some lock.
296 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400297
298 def __enter__(self):
299 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000300 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400301 return self
302
303 def __exit__(self, _exc_type, _exec_value, _traceback):
304 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000305 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400306 return False
307
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400308 def touch(self, digest, size):
309 """Ensures item is not corrupted and updates its LRU position.
310
311 Arguments:
312 digest: hash digest of item to check.
313 size: expected size of this item.
314
315 Returns:
316 True if item is in cache and not corrupted.
317 """
318 raise NotImplementedError()
319
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400320 def getfileobj(self, digest):
321 """Returns a readable file like object.
322
323 If file exists on the file system it will have a .name attribute with an
324 absolute path to the file.
325 """
326 raise NotImplementedError()
327
328 def write(self, digest, content):
329 """Reads data from |content| generator and stores it in cache.
330
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000331 It is possible to write to an object that already exists. It may be
332 ignored (sent to /dev/null) but the timestamp is still updated.
333
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400334 Returns digest to simplify chaining.
335 """
336 raise NotImplementedError()
337
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400338
339class MemoryContentAddressedCache(ContentAddressedCache):
340 """ContentAddressedCache implementation that stores everything in memory."""
341
Lei Leife202df2019-06-11 17:33:34 +0000342 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400343 """Args:
344 file_mode_mask: bit mask to AND file mode with. Default value will make
345 all mapped files to be read only.
346 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400347 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400348 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000349 # Items in a LRU lookup dict(digest: size).
350 self._lru = lru.LRUDict()
351
352 # Cache interface implementation.
353
354 def __len__(self):
355 with self._lock:
356 return len(self._lru)
357
358 def __iter__(self):
359 # This is not thread-safe.
360 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361
362 def __contains__(self, digest):
363 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000364 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400365
366 @property
367 def total_size(self):
368 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000369 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400370
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000371 def get_oldest(self):
372 with self._lock:
373 try:
374 # (key, (value, ts))
375 return self._lru.get_oldest()[1][1]
376 except KeyError:
377 return None
378
379 def remove_oldest(self):
380 with self._lock:
381 # TODO(maruel): Update self._added.
382 # (key, (value, ts))
383 return len(self._lru.pop_oldest()[1][0])
384
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000385 def save(self):
386 pass
387
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000388 def trim(self):
389 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000390 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400391
392 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000393 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400394 pass
395
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000396 # ContentAddressedCache interface implementation.
397
398 def __contains__(self, digest):
399 with self._lock:
400 return digest in self._lru
401
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400402 def touch(self, digest, size):
403 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000404 try:
405 self._lru.touch(digest)
406 except KeyError:
407 return False
408 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400409
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400410 def getfileobj(self, digest):
411 with self._lock:
412 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000413 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400414 except KeyError:
415 raise CacheMiss(digest)
416 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400418 return io.BytesIO(d)
419
420 def write(self, digest, content):
421 # Assemble whole stream before taking the lock.
422 data = ''.join(content)
423 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000424 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400425 self._added.append(len(data))
426 return digest
427
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400428
429class DiskContentAddressedCache(ContentAddressedCache):
430 """Stateful LRU cache in a flat hash table in a directory.
431
432 Saves its state as json file.
433 """
434 STATE_FILE = u'state.json'
435
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000436 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400437 """
438 Arguments:
439 cache_dir: directory where to place the cache.
440 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000442 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400443 """
444 # All protected methods (starting with '_') except _path should be called
445 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400446 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400447 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400448 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
449 # Items in a LRU lookup dict(digest: size).
450 self._lru = lru.LRUDict()
451 # Current cached free disk space. It is updated by self._trim().
452 file_path.ensure_tree(self.cache_dir)
453 self._free_disk = file_path.get_free_space(self.cache_dir)
454 # The first item in the LRU cache that must not be evicted during this run
455 # since it was referenced. All items more recent that _protected in the LRU
456 # cache are also inherently protected. It could be a set() of all items
457 # referenced but this increases memory usage without a use case.
458 self._protected = None
459 # Cleanup operations done by self._load(), if any.
460 self._operations = []
461 with tools.Profiler('Setup'):
462 with self._lock:
463 self._load(trim, time_fn)
464
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000465 # Cache interface implementation.
466
467 def __len__(self):
468 with self._lock:
469 return len(self._lru)
470
471 def __iter__(self):
472 # This is not thread-safe.
473 return self._lru.__iter__()
474
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400475 def __contains__(self, digest):
476 with self._lock:
477 return digest in self._lru
478
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400479 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400480 def total_size(self):
481 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000482 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400483
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000484 def get_oldest(self):
485 with self._lock:
486 try:
487 # (key, (value, ts))
488 return self._lru.get_oldest()[1][1]
489 except KeyError:
490 return None
491
492 def remove_oldest(self):
493 with self._lock:
494 # TODO(maruel): Update self._added.
495 return self._remove_lru_file(True)
496
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000497 def save(self):
498 with self._lock:
499 return self._save()
500
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000501 def trim(self):
502 """Forces retention policies."""
503 with self._lock:
504 return self._trim()
505
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400506 def cleanup(self):
507 """Cleans up the cache directory.
508
509 Ensures there is no unknown files in cache_dir.
510 Ensures the read-only bits are set correctly.
511
512 At that point, the cache was already loaded, trimmed to respect cache
513 policies.
514 """
515 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000516 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400517 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000518 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 # It'd be faster if there were a readdir() function.
520 for filename in fs.listdir(self.cache_dir):
521 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000522 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400523 continue
524 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000525 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400526 previous.remove(filename)
527 continue
528
529 # An untracked file. Delete it.
530 logging.warning('Removing unknown file %s from cache', filename)
531 p = self._path(filename)
532 if fs.isdir(p):
533 try:
534 file_path.rmtree(p)
535 except OSError:
536 pass
537 else:
538 file_path.try_remove(p)
539 continue
540
541 if previous:
542 # Filter out entries that were not found.
543 logging.warning('Removed %d lost files', len(previous))
544 for filename in previous:
545 self._lru.pop(filename)
546 self._save()
547
548 # What remains to be done is to hash every single item to
549 # detect corruption, then save to ensure state.json is up to date.
550 # Sadly, on a 50GiB cache with 100MiB/s I/O, this is still over 8 minutes.
551 # TODO(maruel): Let's revisit once directory metadata is stored in
552 # state.json so only the files that had been mapped since the last cleanup()
553 # call are manually verified.
554 #
555 #with self._lock:
556 # for digest in self._lru:
557 # if not isolated_format.is_valid_hash(
558 # self._path(digest), self.hash_algo):
559 # self.evict(digest)
560 # logging.info('Deleted corrupted item: %s', digest)
561
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000562 # ContentAddressedCache interface implementation.
563
564 def __contains__(self, digest):
565 with self._lock:
566 return digest in self._lru
567
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400568 def touch(self, digest, size):
569 """Verifies an actual file is valid and bumps its LRU position.
570
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000571 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400572
573 Note that is doesn't compute the hash so it could still be corrupted if the
574 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400575 """
576 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000577 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400578
579 # Update its LRU position.
580 with self._lock:
581 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000582 if looks_valid:
583 # Exists but not in the LRU anymore.
584 self._delete_file(digest, size)
585 return False
586 if not looks_valid:
587 self._lru.pop(digest)
588 # Exists but not in the LRU anymore.
589 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590 return False
591 self._lru.touch(digest)
592 self._protected = self._protected or digest
593 return True
594
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400595 def getfileobj(self, digest):
596 try:
597 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400598 except IOError:
599 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000600 with self._lock:
601 try:
602 self._used.append(self._lru[digest])
603 except KeyError:
604 # If the digest is not actually in _lru, assume it is a cache miss.
605 # Existing file will be overwritten by whoever uses the cache and added
606 # to _lru.
607 f.close()
608 raise CacheMiss(digest)
609 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400610
611 def write(self, digest, content):
612 assert content is not None
613 with self._lock:
614 self._protected = self._protected or digest
615 path = self._path(digest)
616 # A stale broken file may remain. It is possible for the file to have write
617 # access bit removed which would cause the file_write() call to fail to open
618 # in write mode. Take no chance here.
619 file_path.try_remove(path)
620 try:
621 size = file_write(path, content)
622 except:
623 # There are two possible places were an exception can occur:
624 # 1) Inside |content| generator in case of network or unzipping errors.
625 # 2) Inside file_write itself in case of disk IO errors.
626 # In any case delete an incomplete file and propagate the exception to
627 # caller, it will be logged there.
628 file_path.try_remove(path)
629 raise
630 # Make the file read-only in the cache. This has a few side-effects since
631 # the file node is modified, so every directory entries to this file becomes
632 # read-only. It's fine here because it is a new file.
633 file_path.set_read_only(path, True)
634 with self._lock:
635 self._add(digest, size)
636 return digest
637
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000638 # Internal functions.
639
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400640 def _load(self, trim, time_fn):
641 """Loads state of the cache from json file.
642
643 If cache_dir does not exist on disk, it is created.
644 """
645 self._lock.assert_locked()
646
647 if not fs.isfile(self.state_file):
648 if not fs.isdir(self.cache_dir):
649 fs.makedirs(self.cache_dir)
650 else:
651 # Load state of the cache.
652 try:
653 self._lru = lru.LRUDict.load(self.state_file)
654 except ValueError as err:
655 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000656 # Don't want to keep broken cache dir.
657 file_path.rmtree(self.cache_dir)
658 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400659 if time_fn:
660 self._lru.time_fn = time_fn
661 if trim:
662 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400663
664 def _save(self):
665 """Saves the LRU ordering."""
666 self._lock.assert_locked()
667 if sys.platform != 'win32':
668 d = os.path.dirname(self.state_file)
669 if fs.isdir(d):
670 # Necessary otherwise the file can't be created.
671 file_path.set_read_only(d, False)
672 if fs.isfile(self.state_file):
673 file_path.set_read_only(self.state_file, False)
674 self._lru.save(self.state_file)
675
676 def _trim(self):
677 """Trims anything we don't know, make sure enough free space exists."""
678 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000679 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400680
681 # Trim old items.
682 if self.policies.max_age_secs:
683 cutoff = self._lru.time_fn() - self.policies.max_age_secs
684 while self._lru:
685 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000686 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400687 if oldest[1][1] >= cutoff:
688 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000689 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400690
691 # Ensure maximum cache size.
692 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000693 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400694 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000695 e = self._remove_lru_file(True)
696 evicted.append(e)
697 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400698
699 # Ensure maximum number of items in the cache.
700 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000701 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000702 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400703
704 # Ensure enough free space.
705 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 while (
707 self.policies.min_free_space and
708 self._lru and
709 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000710 # self._free_disk is updated by this call.
711 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400712
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000713 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000714 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400715 usage_percent = 0.
716 if total_usage:
717 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
718
719 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000720 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
721 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
722 '%.1fkb)',
723 len(evicted), sum(evicted) / 1024.,
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400724 self._free_disk / 1024.,
725 total_usage / 1024.,
726 usage_percent,
727 self.policies.max_cache_size / 1024.)
728 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400730
731 def _path(self, digest):
732 """Returns the path to one item."""
733 return os.path.join(self.cache_dir, digest)
734
735 def _remove_lru_file(self, allow_protected):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000736 """Removes the lastest recently used file and returns its size.
737
738 Updates self._free_disk.
739 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400740 self._lock.assert_locked()
741 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000742 digest, (size, _ts) = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400743 if not allow_protected and digest == self._protected:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000744 total_size = sum(self._lru.values())+size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400745 msg = (
746 'Not enough space to fetch the whole isolated tree.\n'
747 ' %s\n cache=%dbytes, %d items; %sb free_space') % (
748 self.policies, total_size, len(self._lru)+1, self._free_disk)
749 raise NoMoreSpace(msg)
750 except KeyError:
751 # That means an internal error.
752 raise NoMoreSpace('Nothing to remove, can\'t happend')
753 digest, (size, _) = self._lru.pop_oldest()
754 logging.debug('Removing LRU file %s', digest)
755 self._delete_file(digest, size)
756 return size
757
758 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
759 """Adds an item into LRU cache marking it as a newest one."""
760 self._lock.assert_locked()
761 if size == UNKNOWN_FILE_SIZE:
762 size = fs.stat(self._path(digest)).st_size
763 self._added.append(size)
764 self._lru.add(digest, size)
765 self._free_disk -= size
766 # Do a quicker version of self._trim(). It only enforces free disk space,
767 # not cache size limits. It doesn't actually look at real free disk space,
768 # only uses its cache values. self._trim() will be called later to enforce
769 # real trimming but doing this quick version here makes it possible to map
770 # an isolated that is larger than the current amount of free disk space when
771 # the cache size is already large.
772 while (
773 self.policies.min_free_space and
774 self._lru and
775 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000776 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400777 if self._remove_lru_file(False) == -1:
778 break
779
780 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000781 """Deletes cache file from the file system.
782
783 Updates self._free_disk.
784 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400785 self._lock.assert_locked()
786 try:
787 if size == UNKNOWN_FILE_SIZE:
788 try:
789 size = fs.stat(self._path(digest)).st_size
790 except OSError:
791 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000792 if file_path.try_remove(self._path(digest)):
793 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400794 except OSError as e:
795 if e.errno != errno.ENOENT:
796 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400797
798
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400799class NamedCache(Cache):
800 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400801
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400802 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400803 name is a short identifier that describes the contents of the cache, e.g.
804 "git_v8" could be all git repositories required by v8 builds, or
805 "build_chromium" could be build artefacts of the Chromium.
806 path is a directory path relative to the task run dir. Cache installation
807 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400808 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400809 _DIR_ALPHABET = string.ascii_letters + string.digits
810 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000811 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400812
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400813 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400814 """Initializes NamedCaches.
815
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400816 Arguments:
817 - cache_dir is a directory for persistent cache storage.
818 - policies is a CachePolicies instance.
819 - time_fn is a function that returns timestamp (float) and used to take
820 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400821 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400822 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400823 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000824 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400825 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
826 self._lru = lru.LRUDict()
827 if not fs.isdir(self.cache_dir):
828 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000829 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000830 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400831 self._lru = lru.LRUDict.load(self.state_file)
832 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000833 logging.exception(
834 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400835 file_path.rmtree(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000836 with self._lock:
837 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400838 if time_fn:
839 self._lru.time_fn = time_fn
840
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841 @property
842 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000843 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400844 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000845 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400846
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000847 def install(self, dst, name):
848 """Creates the directory |dst| and moves a previous named cache |name| if it
849 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000851 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400852
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000853 Returns the reused named cache size in bytes, or 0 if none was present.
854
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400855 Raises NamedCacheError if cannot install the cache.
856 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000857 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 with self._lock:
859 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000860 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400861 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000862 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400863
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000864 # Remove the named symlink if it exists.
865 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000866 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000867 # Remove the symlink itself, not its destination.
868 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000869
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000870 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000871 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400872 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000873 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000874 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000875 file_path.ensure_tree(os.path.dirname(dst))
876 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400877 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000878 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000880 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400881 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000883 # The named cache does not exist, create an empty directory. When
884 # uninstalling, we will move it back to the cache and create an an
885 # entry.
886 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000887 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000888 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400889 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000890 # Raise using the original traceback.
891 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000892 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Lei Leife202df2019-06-11 17:33:34 +0000893 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000894 finally:
895 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400896
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000897 def uninstall(self, src, name):
898 """Moves the cache directory back into the named cache hive for an eventual
899 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400900
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000901 The opposite of install().
902
903 src must be absolute and unicode. Its content is moved back into the local
904 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400905
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000906 Returns the named cache size in bytes.
907
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400908 Raises NamedCacheError if cannot uninstall the cache.
909 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000910 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400911 with self._lock:
912 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000913 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400914 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000915 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000916 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400917 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400918
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000919 if name in self._lru:
920 # This shouldn't happen but just remove the preexisting one and move
921 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000922 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000923 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000924
925 # Calculate the size of the named cache to keep. It's important because
926 # if size is zero (it's empty), we do not want to add it back to the
927 # named caches cache.
928 size = _get_recursive_size(src)
929 logging.info('- Size is %d', size)
930 if not size:
931 # Do not save empty named cache.
932 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400933
934 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000935 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400936 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000937 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400938 file_path.ensure_tree(os.path.dirname(abs_cache))
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000939 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400940
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000941 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000942 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000943
944 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
945 # for user convenience.
946 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000947 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000948 file_path.remove(named_path)
949 else:
950 file_path.ensure_tree(os.path.dirname(named_path))
951
952 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000953 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000954 logging.info(
955 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000956 except OSError:
957 # Ignore on Windows. It happens when running as a normal user or when
958 # UAC is enabled and the user is a filtered administrator account.
959 if sys.platform != 'win32':
960 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000961 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400962 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000963 # Raise using the original traceback.
964 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000965 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Lei Leife202df2019-06-11 17:33:34 +0000966 six.reraise(exc, None, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000967 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000968 # Call save() at every uninstall. The assumptions are:
969 # - The total the number of named caches is low, so the state.json file
970 # is small, so the time it takes to write it to disk is short.
971 # - The number of mapped named caches per task is low, so the number of
972 # times save() is called on tear-down isn't high enough to be
973 # significant.
974 # - uninstall() sometimes throws due to file locking on Windows or
975 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000976 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400977
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000978 # Cache interface implementation.
979
980 def __len__(self):
981 with self._lock:
982 return len(self._lru)
983
984 def __iter__(self):
985 # This is not thread-safe.
986 return self._lru.__iter__()
987
988 def __contains__(self, digest):
989 with self._lock:
990 return digest in self._lru
991
992 @property
993 def total_size(self):
994 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000995 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000996
997 def get_oldest(self):
998 with self._lock:
999 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001000 # (key, (value, ts))
1001 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001002 except KeyError:
1003 return None
1004
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001005 def remove_oldest(self):
1006 with self._lock:
1007 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001008 _name, size = self._remove_lru_item()
1009 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001010
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001011 def save(self):
1012 with self._lock:
1013 return self._save()
1014
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001015 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001016 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001017 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001018 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001019 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001020
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001021 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001022 if self._policies.max_items:
1023 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001024 name, size = self._remove_lru_item()
1025 evicted.append(size)
1026 logging.info(
1027 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1028 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001029
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001030 # Trim according to maximum age.
1031 if self._policies.max_age_secs:
1032 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1033 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001034 _name, (_data, ts) = self._lru.get_oldest()
1035 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001036 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001037 name, size = self._remove_lru_item()
1038 evicted.append(size)
1039 logging.info(
1040 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1041 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001042
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001043 # Trim according to minimum free space.
1044 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001045 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001046 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001047 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001048 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001049 name, size = self._remove_lru_item()
1050 evicted.append(size)
1051 logging.info(
1052 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1053 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001054
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001055 # Trim according to maximum total size.
1056 if self._policies.max_cache_size:
1057 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001058 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001059 if total <= self._policies.max_cache_size:
1060 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001061 name, size = self._remove_lru_item()
1062 evicted.append(size)
1063 logging.info(
1064 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1065 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001066
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001067 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001068 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001069
1070 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001071 """Removes unknown directories.
1072
1073 Does not recalculate the cache size since it's surprisingly slow on some
1074 OSes.
1075 """
1076 success = True
1077 with self._lock:
1078 try:
1079 actual = set(fs.listdir(self.cache_dir))
1080 actual.discard(self.NAMED_DIR)
1081 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001082 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001083 # First, handle the actual cache content.
1084 # Remove missing entries.
1085 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001086 name, size = self._lru.pop(expected[missing])
1087 logging.warning(
1088 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001089 # Remove unexpected items.
1090 for unexpected in (actual - set(expected)):
1091 try:
1092 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001093 logging.warning(
1094 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001095 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001096 file_path.rmtree(p)
1097 else:
1098 fs.remove(p)
1099 except (IOError, OSError) as e:
1100 logging.error('Failed to remove %s: %s', unexpected, e)
1101 success = False
1102
1103 # Second, fix named cache links.
1104 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001105 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001106 actual = set(fs.listdir(named))
1107 expected = set(self._lru)
1108 # Confirm entries. Do not add missing ones for now.
1109 for name in expected.intersection(actual):
1110 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001111 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001112 if fs.islink(p):
1113 link = fs.readlink(p)
1114 if expected_link == link:
1115 continue
1116 logging.warning(
1117 'Unexpected symlink for cache %s: %s, expected %s',
1118 name, link, expected_link)
1119 else:
1120 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001121 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001122 file_path.rmtree(p)
1123 else:
1124 fs.remove(p)
1125 # Remove unexpected items.
1126 for unexpected in (actual - expected):
1127 try:
1128 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1129 if fs.isdir(p):
1130 file_path.rmtree(p)
1131 else:
1132 fs.remove(p)
1133 except (IOError, OSError) as e:
1134 logging.error('Failed to remove %s: %s', unexpected, e)
1135 success = False
1136 finally:
1137 self._save()
1138 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001139
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001140 # Internal functions.
1141
1142 def _try_upgrade(self):
1143 """Upgrades from the old format to the new one if necessary.
1144
1145 This code can be removed so all bots are known to have the right new format.
1146 """
1147 if not self._lru:
1148 return
1149 _name, (data, _ts) = self._lru.get_oldest()
1150 if isinstance(data, (list, tuple)):
1151 return
1152 # Update to v2.
1153 def upgrade(_name, rel_cache):
1154 abs_cache = os.path.join(self.cache_dir, rel_cache)
1155 return rel_cache, _get_recursive_size(abs_cache)
1156 self._lru.transform(upgrade)
1157 self._save()
1158
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001159 def _remove_lru_item(self):
1160 """Removes the oldest LRU entry. LRU must not be empty."""
1161 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1162 logging.info('Removing named cache %r', name)
1163 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001164 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001165
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001166 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001167 """Creates and returns relative path of a new cache directory.
1168
1169 In practice, it is a 2-letter string.
1170 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001171 # We randomly generate directory names that have two lower/upper case
1172 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1173 abc_len = len(self._DIR_ALPHABET)
1174 tried = set()
1175 while len(tried) < 1000:
1176 i = random.randint(0, abc_len * abc_len - 1)
1177 rel_path = (
1178 self._DIR_ALPHABET[i / abc_len] +
1179 self._DIR_ALPHABET[i % abc_len])
1180 if rel_path in tried:
1181 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001182 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001183 if not fs.exists(abs_path):
1184 return rel_path
1185 tried.add(rel_path)
1186 raise NamedCacheError(
1187 'could not allocate a new cache dir, too many cache dirs')
1188
1189 def _remove(self, name):
1190 """Removes a cache directory and entry.
1191
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001192 Returns:
1193 Number of caches deleted.
1194 """
1195 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001196 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001197 named_dir = self._get_named_path(name)
1198 if fs.islink(named_dir):
1199 fs.unlink(named_dir)
1200
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001201 # Then remove the actual data.
1202 if name not in self._lru:
1203 return
1204 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001205 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001206 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001207 file_path.rmtree(abs_path)
1208 self._lru.pop(name)
1209
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001210 def _save(self):
1211 self._lock.assert_locked()
1212 file_path.ensure_tree(self.cache_dir)
1213 self._lru.save(self.state_file)
1214
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001215 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001216 return os.path.join(self.cache_dir, self.NAMED_DIR, name)