blob: 7357ba76d5a9e47bc4808a04c99ab88dba1ee12a [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Junji Watanabe7b720782020-07-01 01:51:07 +000015import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000017import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040018
19from utils import file_path
20from utils import fs
21from utils import lru
22from utils import threading_utils
23from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000024tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
Lei Leife202df2019-06-11 17:33:34 +000026# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000027from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000028import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
Junji Watanabe5e73aab2020-04-09 04:20:27 +000030import isolated_format
31
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040032# The file size to be used when we don't know the correct file size,
33# generally used for .isolated files.
34UNKNOWN_FILE_SIZE = None
35
36
37def file_write(path, content_generator):
38 """Writes file content as generated by content_generator.
39
40 Creates the intermediary directory as needed.
41
42 Returns the number of bytes written.
43
44 Meant to be mocked out in unit tests.
45 """
46 file_path.ensure_tree(os.path.dirname(path))
47 total = 0
48 with fs.open(path, 'wb') as f:
49 for d in content_generator:
50 total += len(d)
51 f.write(d)
52 return total
53
54
55def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000056 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040057
58 Currently it just checks the file exists and its size matches the expectation.
59 """
60 if size == UNKNOWN_FILE_SIZE:
61 return fs.isfile(path)
62 try:
63 actual_size = fs.stat(path).st_size
64 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000065 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
66 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040067 return False
68 if size != actual_size:
69 logging.warning(
70 'Found invalid item %s; %d != %d',
71 os.path.basename(path), actual_size, size)
72 return False
73 return True
74
75
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000076def trim_caches(caches, path, min_free_space, max_age_secs):
77 """Trims multiple caches.
78
79 The goal here is to coherently trim all caches in a coherent LRU fashion,
80 deleting older items independent of which container they belong to.
81
82 Two policies are enforced first:
83 - max_age_secs
84 - min_free_space
85
86 Once that's done, then we enforce each cache's own policies.
87
88 Returns:
89 Slice containing the size of all items evicted.
90 """
91 min_ts = time.time() - max_age_secs if max_age_secs else 0
92 free_disk = file_path.get_free_space(path) if min_free_space else 0
93 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000111def _use_scandir():
112 # Use scandir in windows for faster execution.
113 # Do not use in other OS due to crbug.com/989409
114 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000115
Junji Watanabe38b28b02020-04-23 10:23:30 +0000116
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000117def _get_recursive_size(path):
118 """Returns the total data size for the specified path.
119
120 This function can be surprisingly slow on OSX, so its output should be cached.
121 """
122 try:
123 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000124 if _use_scandir():
125
126 if sys.platform == 'win32':
Junji Watanabe38b28b02020-04-23 10:23:30 +0000127
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000128 def direntIsJunction(entry):
129 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
130 # windows-only symbols.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000131 return bool(entry.stat().st_file_attributes & scandir
132 .FILE_ATTRIBUTE_REPARSE_POINT)
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000133 else:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000134
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000135 def direntIsJunction(_entry):
136 return False
137
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000138 stack = [path]
139 while stack:
140 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000141 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000142 continue
143 if entry.is_file():
144 total += entry.stat().st_size
145 elif entry.is_dir():
146 stack.append(entry.path)
147 else:
148 logging.warning('non directory/file entry: %s', entry)
149 return total
150
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000151 for root, _, files in fs.walk(path):
152 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000153 st = fs.lstat(os.path.join(root, f))
154 if stat.S_ISLNK(st.st_mode):
155 continue
156 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000157 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000158 except (IOError, OSError, UnicodeEncodeError):
159 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000160 return None
161
162
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400163class NamedCacheError(Exception):
164 """Named cache specific error."""
165
166
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400167class NoMoreSpace(Exception):
168 """Not enough space to map the whole directory."""
169 pass
170
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400171
172class CachePolicies(object):
173 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
174 """Common caching policies for the multiple caches (isolated, named, cipd).
175
176 Arguments:
177 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
178 cache is effectively a leak.
179 - min_free_space: Trim if disk free space becomes lower than this value. If
180 0, it will unconditionally fill the disk.
181 - max_items: Maximum number of items to keep in the cache. If 0, do not
182 enforce a limit.
183 - max_age_secs: Maximum age an item is kept in the cache until it is
184 automatically evicted. Having a lot of dead luggage slows
185 everything down.
186 """
187 self.max_cache_size = max_cache_size
188 self.min_free_space = min_free_space
189 self.max_items = max_items
190 self.max_age_secs = max_age_secs
191
192 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000193 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
194 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
195 self.max_cache_size, float(self.max_cache_size) / 1024**3,
196 self.max_items, self.min_free_space,
197 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400198
199
200class CacheMiss(Exception):
201 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400202 def __init__(self, digest):
203 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000204 super(CacheMiss,
205 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400206
207
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000209
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400210 def __init__(self, cache_dir):
211 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000212 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400213 assert file_path.isabs(cache_dir), cache_dir
214 self.cache_dir = cache_dir
215 self._lock = threading_utils.LockWithAssert()
216 # Profiling values.
217 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400218 self._used = []
219
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000220 def __nonzero__(self):
221 """A cache is always True.
222
223 Otherwise it falls back to __len__, which is surprising.
224 """
225 return True
226
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000227 def __bool__(self):
228 """A cache is always True.
229
230 Otherwise it falls back to __len__, which is surprising.
231 """
232 return True
233
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000234 def __len__(self):
235 """Returns the number of entries in the cache."""
236 raise NotImplementedError()
237
238 def __iter__(self):
239 """Iterates over all the entries names."""
240 raise NotImplementedError()
241
242 def __contains__(self, name):
243 """Returns if an entry is in the cache."""
244 raise NotImplementedError()
245
246 @property
247 def total_size(self):
248 """Returns the total size of the cache in bytes."""
249 raise NotImplementedError()
250
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400251 @property
252 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000253 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400254 with self._lock:
255 return self._added[:]
256
257 @property
258 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000259 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400260 with self._lock:
261 return self._used[:]
262
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000263 def get_oldest(self):
264 """Returns timestamp of oldest cache entry or None.
265
266 Returns:
267 Timestamp of the oldest item.
268
269 Used for manual trimming.
270 """
271 raise NotImplementedError()
272
273 def remove_oldest(self):
274 """Removes the oldest item from the cache.
275
276 Returns:
277 Size of the oldest item.
278
279 Used for manual trimming.
280 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400281 raise NotImplementedError()
282
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 def save(self):
284 """Saves the current cache to disk."""
285 raise NotImplementedError()
286
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000288 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400289
290 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000291 Slice with the size of evicted items.
292 """
293 raise NotImplementedError()
294
295 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000296 """Deletes any corrupted item from the cache, then calls trim(), then
297 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000298
299 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400300 """
301 raise NotImplementedError()
302
303
304class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Content addressed cache that stores objects temporarily.
306
307 It can be accessed concurrently from multiple threads, so it should protect
308 its internal state with some lock.
309 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310
311 def __enter__(self):
312 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000313 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400314 return self
315
316 def __exit__(self, _exc_type, _exec_value, _traceback):
317 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000318 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400319 return False
320
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321 def touch(self, digest, size):
322 """Ensures item is not corrupted and updates its LRU position.
323
324 Arguments:
325 digest: hash digest of item to check.
326 size: expected size of this item.
327
328 Returns:
329 True if item is in cache and not corrupted.
330 """
331 raise NotImplementedError()
332
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333 def getfileobj(self, digest):
334 """Returns a readable file like object.
335
336 If file exists on the file system it will have a .name attribute with an
337 absolute path to the file.
338 """
339 raise NotImplementedError()
340
341 def write(self, digest, content):
342 """Reads data from |content| generator and stores it in cache.
343
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000344 It is possible to write to an object that already exists. It may be
345 ignored (sent to /dev/null) but the timestamp is still updated.
346
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400347 Returns digest to simplify chaining.
348 """
349 raise NotImplementedError()
350
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400351
352class MemoryContentAddressedCache(ContentAddressedCache):
353 """ContentAddressedCache implementation that stores everything in memory."""
354
Lei Leife202df2019-06-11 17:33:34 +0000355 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356 """Args:
357 file_mode_mask: bit mask to AND file mode with. Default value will make
358 all mapped files to be read only.
359 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400360 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000362 # Items in a LRU lookup dict(digest: size).
363 self._lru = lru.LRUDict()
364
365 # Cache interface implementation.
366
367 def __len__(self):
368 with self._lock:
369 return len(self._lru)
370
371 def __iter__(self):
372 # This is not thread-safe.
373 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374
375 def __contains__(self, digest):
376 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000377 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378
379 @property
380 def total_size(self):
381 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000382 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400383
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000384 def get_oldest(self):
385 with self._lock:
386 try:
387 # (key, (value, ts))
388 return self._lru.get_oldest()[1][1]
389 except KeyError:
390 return None
391
392 def remove_oldest(self):
393 with self._lock:
394 # TODO(maruel): Update self._added.
395 # (key, (value, ts))
396 return len(self._lru.pop_oldest()[1][0])
397
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000398 def save(self):
399 pass
400
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000401 def trim(self):
402 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000403 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404
405 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000406 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 pass
408
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000409 # ContentAddressedCache interface implementation.
410
411 def __contains__(self, digest):
412 with self._lock:
413 return digest in self._lru
414
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 def touch(self, digest, size):
416 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 try:
418 self._lru.touch(digest)
419 except KeyError:
420 return False
421 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400422
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400423 def getfileobj(self, digest):
424 with self._lock:
425 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000426 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400427 except KeyError:
428 raise CacheMiss(digest)
429 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000430 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400431 return io.BytesIO(d)
432
433 def write(self, digest, content):
434 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000435 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400436 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000437 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 self._added.append(len(data))
439 return digest
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441
442class DiskContentAddressedCache(ContentAddressedCache):
443 """Stateful LRU cache in a flat hash table in a directory.
444
445 Saves its state as json file.
446 """
447 STATE_FILE = u'state.json'
448
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000449 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 """
451 Arguments:
452 cache_dir: directory where to place the cache.
453 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400454 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000455 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400456 """
457 # All protected methods (starting with '_') except _path should be called
458 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400459 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400460 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400461 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
462 # Items in a LRU lookup dict(digest: size).
463 self._lru = lru.LRUDict()
464 # Current cached free disk space. It is updated by self._trim().
465 file_path.ensure_tree(self.cache_dir)
466 self._free_disk = file_path.get_free_space(self.cache_dir)
467 # The first item in the LRU cache that must not be evicted during this run
468 # since it was referenced. All items more recent that _protected in the LRU
469 # cache are also inherently protected. It could be a set() of all items
470 # referenced but this increases memory usage without a use case.
471 self._protected = None
472 # Cleanup operations done by self._load(), if any.
473 self._operations = []
474 with tools.Profiler('Setup'):
475 with self._lock:
476 self._load(trim, time_fn)
477
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000478 # Cache interface implementation.
479
480 def __len__(self):
481 with self._lock:
482 return len(self._lru)
483
484 def __iter__(self):
485 # This is not thread-safe.
486 return self._lru.__iter__()
487
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 def __contains__(self, digest):
489 with self._lock:
490 return digest in self._lru
491
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 def total_size(self):
494 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000495 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000497 def get_oldest(self):
498 with self._lock:
499 try:
500 # (key, (value, ts))
501 return self._lru.get_oldest()[1][1]
502 except KeyError:
503 return None
504
505 def remove_oldest(self):
506 with self._lock:
507 # TODO(maruel): Update self._added.
508 return self._remove_lru_file(True)
509
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000510 def save(self):
511 with self._lock:
512 return self._save()
513
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000514 def trim(self):
515 """Forces retention policies."""
516 with self._lock:
517 return self._trim()
518
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 def cleanup(self):
520 """Cleans up the cache directory.
521
522 Ensures there is no unknown files in cache_dir.
523 Ensures the read-only bits are set correctly.
524
525 At that point, the cache was already loaded, trimmed to respect cache
526 policies.
527 """
528 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000529 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400530 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000531 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400532 # It'd be faster if there were a readdir() function.
533 for filename in fs.listdir(self.cache_dir):
534 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000535 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536 continue
537 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000538 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400539 previous.remove(filename)
540 continue
541
542 # An untracked file. Delete it.
543 logging.warning('Removing unknown file %s from cache', filename)
544 p = self._path(filename)
545 if fs.isdir(p):
546 try:
547 file_path.rmtree(p)
548 except OSError:
549 pass
550 else:
551 file_path.try_remove(p)
552 continue
553
554 if previous:
555 # Filter out entries that were not found.
556 logging.warning('Removed %d lost files', len(previous))
557 for filename in previous:
558 self._lru.pop(filename)
559 self._save()
560
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000561 # Verify hash of every single item to detect corruption. the corrupted
562 # files will be evicted.
563 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000564 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000565 # verify only if the mtime is grather than the timestamp in state.json
566 # to avoid take too long time.
567 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000568 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000569 logging.warning('Item has been modified. item: %s', digest)
570 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000571 # Update timestamp in state.json
572 self._lru.touch(digest)
573 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000574 # remove corrupted file from LRU and file system
575 self._lru.pop(digest)
576 self._delete_file(digest, UNKNOWN_FILE_SIZE)
577 logging.error('Deleted corrupted item: %s', digest)
578 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400579
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000580 # ContentAddressedCache interface implementation.
581
582 def __contains__(self, digest):
583 with self._lock:
584 return digest in self._lru
585
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400586 def touch(self, digest, size):
587 """Verifies an actual file is valid and bumps its LRU position.
588
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000589 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590
591 Note that is doesn't compute the hash so it could still be corrupted if the
592 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400593 """
594 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000595 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400596
597 # Update its LRU position.
598 with self._lock:
599 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000600 if looks_valid:
601 # Exists but not in the LRU anymore.
602 self._delete_file(digest, size)
603 return False
604 if not looks_valid:
605 self._lru.pop(digest)
606 # Exists but not in the LRU anymore.
607 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400608 return False
609 self._lru.touch(digest)
610 self._protected = self._protected or digest
611 return True
612
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400613 def getfileobj(self, digest):
614 try:
615 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400616 except IOError:
617 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000618 with self._lock:
619 try:
620 self._used.append(self._lru[digest])
621 except KeyError:
622 # If the digest is not actually in _lru, assume it is a cache miss.
623 # Existing file will be overwritten by whoever uses the cache and added
624 # to _lru.
625 f.close()
626 raise CacheMiss(digest)
627 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 def write(self, digest, content):
630 assert content is not None
631 with self._lock:
632 self._protected = self._protected or digest
633 path = self._path(digest)
634 # A stale broken file may remain. It is possible for the file to have write
635 # access bit removed which would cause the file_write() call to fail to open
636 # in write mode. Take no chance here.
637 file_path.try_remove(path)
638 try:
639 size = file_write(path, content)
640 except:
641 # There are two possible places were an exception can occur:
642 # 1) Inside |content| generator in case of network or unzipping errors.
643 # 2) Inside file_write itself in case of disk IO errors.
644 # In any case delete an incomplete file and propagate the exception to
645 # caller, it will be logged there.
646 file_path.try_remove(path)
647 raise
648 # Make the file read-only in the cache. This has a few side-effects since
649 # the file node is modified, so every directory entries to this file becomes
650 # read-only. It's fine here because it is a new file.
651 file_path.set_read_only(path, True)
652 with self._lock:
653 self._add(digest, size)
654 return digest
655
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000656 # Internal functions.
657
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400658 def _load(self, trim, time_fn):
659 """Loads state of the cache from json file.
660
661 If cache_dir does not exist on disk, it is created.
662 """
663 self._lock.assert_locked()
664
665 if not fs.isfile(self.state_file):
666 if not fs.isdir(self.cache_dir):
667 fs.makedirs(self.cache_dir)
668 else:
669 # Load state of the cache.
670 try:
671 self._lru = lru.LRUDict.load(self.state_file)
672 except ValueError as err:
673 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000674 # Don't want to keep broken cache dir.
675 file_path.rmtree(self.cache_dir)
676 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000677 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 if time_fn:
679 self._lru.time_fn = time_fn
680 if trim:
681 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400682
683 def _save(self):
684 """Saves the LRU ordering."""
685 self._lock.assert_locked()
686 if sys.platform != 'win32':
687 d = os.path.dirname(self.state_file)
688 if fs.isdir(d):
689 # Necessary otherwise the file can't be created.
690 file_path.set_read_only(d, False)
691 if fs.isfile(self.state_file):
692 file_path.set_read_only(self.state_file, False)
693 self._lru.save(self.state_file)
694
695 def _trim(self):
696 """Trims anything we don't know, make sure enough free space exists."""
697 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699
700 # Trim old items.
701 if self.policies.max_age_secs:
702 cutoff = self._lru.time_fn() - self.policies.max_age_secs
703 while self._lru:
704 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000705 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 if oldest[1][1] >= cutoff:
707 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000708 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
710 # Ensure maximum cache size.
711 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000712 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 e = self._remove_lru_file(True)
715 evicted.append(e)
716 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400717
718 # Ensure maximum number of items in the cache.
719 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000720 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400722
723 # Ensure enough free space.
724 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725 while (
726 self.policies.min_free_space and
727 self._lru and
728 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 # self._free_disk is updated by this call.
730 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400731
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000732 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000733 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400734 usage_percent = 0.
735 if total_usage:
736 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
737
738 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000739 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
740 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000741 '%.1fkb)', len(evicted),
742 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
743 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746
747 def _path(self, digest):
748 """Returns the path to one item."""
749 return os.path.join(self.cache_dir, digest)
750
751 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000752 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000753
754 Updates self._free_disk.
755 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400756 self._lock.assert_locked()
757 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000758 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000760 total_size = sum(self._lru.values())
761 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000762 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
763 '%s bytes (%.3f GiB) free_space') % (
764 self.policies, total_size, float(total_size) / 1024**3,
765 len(self._lru), self._free_disk,
766 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400767 raise NoMoreSpace(msg)
768 except KeyError:
769 # That means an internal error.
770 raise NoMoreSpace('Nothing to remove, can\'t happend')
771 digest, (size, _) = self._lru.pop_oldest()
772 logging.debug('Removing LRU file %s', digest)
773 self._delete_file(digest, size)
774 return size
775
776 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
777 """Adds an item into LRU cache marking it as a newest one."""
778 self._lock.assert_locked()
779 if size == UNKNOWN_FILE_SIZE:
780 size = fs.stat(self._path(digest)).st_size
781 self._added.append(size)
782 self._lru.add(digest, size)
783 self._free_disk -= size
784 # Do a quicker version of self._trim(). It only enforces free disk space,
785 # not cache size limits. It doesn't actually look at real free disk space,
786 # only uses its cache values. self._trim() will be called later to enforce
787 # real trimming but doing this quick version here makes it possible to map
788 # an isolated that is larger than the current amount of free disk space when
789 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000790 while (self.policies.min_free_space and self._lru and
791 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000792 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400793 if self._remove_lru_file(False) == -1:
794 break
795
796 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000797 """Deletes cache file from the file system.
798
799 Updates self._free_disk.
800 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400801 self._lock.assert_locked()
802 try:
803 if size == UNKNOWN_FILE_SIZE:
804 try:
805 size = fs.stat(self._path(digest)).st_size
806 except OSError:
807 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000808 if file_path.try_remove(self._path(digest)):
809 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400810 except OSError as e:
811 if e.errno != errno.ENOENT:
812 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000814 def _get_mtime(self, digest):
815 """Get mtime of cache file."""
816 return os.path.getmtime(self._path(digest))
817
818 def _is_valid_hash(self, digest):
819 """Verify digest with supported hash algos."""
820 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000821 if digest == isolated_format.hash_file(self._path(digest), algo):
822 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000823 return False
824
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826class NamedCache(Cache):
827 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400830 name is a short identifier that describes the contents of the cache, e.g.
831 "git_v8" could be all git repositories required by v8 builds, or
832 "build_chromium" could be build artefacts of the Chromium.
833 path is a directory path relative to the task run dir. Cache installation
834 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400835 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400836 _DIR_ALPHABET = string.ascii_letters + string.digits
837 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000838 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400839
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400840 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841 """Initializes NamedCaches.
842
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 Arguments:
844 - cache_dir is a directory for persistent cache storage.
845 - policies is a CachePolicies instance.
846 - time_fn is a function that returns timestamp (float) and used to take
847 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000851 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
853 self._lru = lru.LRUDict()
854 if not fs.isdir(self.cache_dir):
855 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000856 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000857 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000859 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000860 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000861 with open(self.state_file, 'r') as f:
862 logging.info('named cache state file: %s\n%s', self.state_file,
863 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000864 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000865
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400866 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000867 logging.exception(
868 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400869 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000870 fs.makedirs(self.cache_dir)
Takuto Ikutadadfbb02020-07-10 03:31:26 +0000871 self._lru = lru.LRUDict()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000872 with self._lock:
873 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400874 if time_fn:
875 self._lru.time_fn = time_fn
876
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400877 @property
878 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000879 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400880 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000881 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882
Takuto Ikutaeab23172020-07-02 03:50:02 +0000883 def _sudo_chown(self, path):
884 if sys.platform == 'win32':
885 return
886 uid = os.getuid()
887 if os.stat(path).st_uid == uid:
888 return
889 # Maybe owner of |path| is different from runner of this script. This is to
890 # make fs.rename work in that case.
891 # https://crbug.com/986676
892 subprocess.check_call(['sudo', '-n', 'chown', str(uid), path])
893
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000894 def install(self, dst, name):
895 """Creates the directory |dst| and moves a previous named cache |name| if it
896 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400897
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000898 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400899
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000900 Returns the reused named cache size in bytes, or 0 if none was present.
901
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400902 Raises NamedCacheError if cannot install the cache.
903 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000904 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400905 with self._lock:
906 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000907 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400908 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000909 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400910
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000911 # Remove the named symlink if it exists.
912 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000913 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000914 # Remove the symlink itself, not its destination.
915 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000916
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000917 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000918 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400919 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000920 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000921 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000922 file_path.ensure_tree(os.path.dirname(dst))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000923 self._sudo_chown(abs_cache)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000924 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400925 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000926 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400927
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000928 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400929 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400930
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000931 # The named cache does not exist, create an empty directory. When
932 # uninstalling, we will move it back to the cache and create an an
933 # entry.
934 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000935 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000936 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400937 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000938 # Raise using the original traceback.
939 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000940 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000941 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000942 finally:
943 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400944
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000945 def uninstall(self, src, name):
946 """Moves the cache directory back into the named cache hive for an eventual
947 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400948
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000949 The opposite of install().
950
951 src must be absolute and unicode. Its content is moved back into the local
952 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400953
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000954 Returns the named cache size in bytes.
955
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400956 Raises NamedCacheError if cannot uninstall the cache.
957 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000958 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Junji Watanabe9cdfff52021-01-08 07:20:35 +0000959 start = time.time()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400960 with self._lock:
961 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000962 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400963 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000964 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000965 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400966 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400967
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000968 if name in self._lru:
969 # This shouldn't happen but just remove the preexisting one and move
970 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000971 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000972 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000973
Takuto Ikuta93483272020-06-05 09:06:34 +0000974 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000975 size = _get_recursive_size(src)
Takuto Ikuta262f8292020-08-26 01:54:22 +0000976 logging.info('- Size is %s', size)
977 if size is None:
978 # Do not save a named cache that was deleted.
979 return
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400980
981 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000982 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400983 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000984 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400985 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikutaeab23172020-07-02 03:50:02 +0000986 self._sudo_chown(src)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000987 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400988
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000989 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000990 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000991
992 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
993 # for user convenience.
994 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000995 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000996 file_path.remove(named_path)
997 else:
998 file_path.ensure_tree(os.path.dirname(named_path))
999
1000 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001001 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001002 logging.info(
1003 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001004 except OSError:
1005 # Ignore on Windows. It happens when running as a normal user or when
1006 # UAC is enabled and the user is a filtered administrator account.
1007 if sys.platform != 'win32':
1008 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001009 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001010 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +00001011 # Raise using the original traceback.
1012 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001013 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001014 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001015 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001016 # Call save() at every uninstall. The assumptions are:
1017 # - The total the number of named caches is low, so the state.json file
1018 # is small, so the time it takes to write it to disk is short.
1019 # - The number of mapped named caches per task is low, so the number of
1020 # times save() is called on tear-down isn't high enough to be
1021 # significant.
1022 # - uninstall() sometimes throws due to file locking on Windows or
1023 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001024 self._save()
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001025 logging.info('NamedCache.uninstall(%r, %r) took %d seconds', src, name,
1026 time.time() - start)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001027
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001028 # Cache interface implementation.
1029
1030 def __len__(self):
1031 with self._lock:
1032 return len(self._lru)
1033
1034 def __iter__(self):
1035 # This is not thread-safe.
1036 return self._lru.__iter__()
1037
John Budorickc6186972020-02-26 00:58:14 +00001038 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001039 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001040 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001041
1042 @property
1043 def total_size(self):
1044 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001045 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001046
1047 def get_oldest(self):
1048 with self._lock:
1049 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001050 # (key, (value, ts))
1051 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001052 except KeyError:
1053 return None
1054
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001055 def remove_oldest(self):
1056 with self._lock:
1057 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001058 _name, size = self._remove_lru_item()
1059 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001060
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001061 def save(self):
1062 with self._lock:
1063 return self._save()
1064
John Budorickc6186972020-02-26 00:58:14 +00001065 def touch(self, *names):
1066 with self._lock:
1067 for name in names:
1068 if name in self._lru:
1069 self._lru.touch(name)
1070 self._save()
1071
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001072 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001073 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001074 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001075 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001076 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001077
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001078 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001079 if self._policies.max_items:
1080 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001081 name, size = self._remove_lru_item()
1082 evicted.append(size)
1083 logging.info(
1084 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1085 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001086
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001087 # Trim according to maximum age.
1088 if self._policies.max_age_secs:
1089 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1090 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001091 _name, (_data, ts) = self._lru.get_oldest()
1092 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001093 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001094 name, size = self._remove_lru_item()
1095 evicted.append(size)
1096 logging.info(
1097 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1098 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001099
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001100 # Trim according to minimum free space.
1101 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001102 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001103 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001104 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001105 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001106 name, size = self._remove_lru_item()
1107 evicted.append(size)
1108 logging.info(
1109 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1110 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001111
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001112 # Trim according to maximum total size.
1113 if self._policies.max_cache_size:
1114 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001115 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001116 if total <= self._policies.max_cache_size:
1117 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001118 name, size = self._remove_lru_item()
1119 evicted.append(size)
1120 logging.info(
1121 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1122 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001123
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001124 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001125 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001126
1127 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001128 """Removes unknown directories.
1129
1130 Does not recalculate the cache size since it's surprisingly slow on some
1131 OSes.
1132 """
1133 success = True
1134 with self._lock:
1135 try:
1136 actual = set(fs.listdir(self.cache_dir))
1137 actual.discard(self.NAMED_DIR)
1138 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001139 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001140 # First, handle the actual cache content.
1141 # Remove missing entries.
1142 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001143 name, size = self._lru.pop(expected[missing])
1144 logging.warning(
1145 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001146 # Remove unexpected items.
1147 for unexpected in (actual - set(expected)):
1148 try:
1149 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001150 logging.warning(
1151 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001152 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001153 file_path.rmtree(p)
1154 else:
1155 fs.remove(p)
1156 except (IOError, OSError) as e:
1157 logging.error('Failed to remove %s: %s', unexpected, e)
1158 success = False
1159
1160 # Second, fix named cache links.
1161 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001162 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001163 actual = set(fs.listdir(named))
1164 expected = set(self._lru)
1165 # Confirm entries. Do not add missing ones for now.
1166 for name in expected.intersection(actual):
1167 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001168 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001169 if fs.islink(p):
1170 link = fs.readlink(p)
1171 if expected_link == link:
1172 continue
1173 logging.warning(
1174 'Unexpected symlink for cache %s: %s, expected %s',
1175 name, link, expected_link)
1176 else:
1177 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001178 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001179 file_path.rmtree(p)
1180 else:
1181 fs.remove(p)
1182 # Remove unexpected items.
1183 for unexpected in (actual - expected):
1184 try:
1185 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1186 if fs.isdir(p):
1187 file_path.rmtree(p)
1188 else:
1189 fs.remove(p)
1190 except (IOError, OSError) as e:
1191 logging.error('Failed to remove %s: %s', unexpected, e)
1192 success = False
1193 finally:
1194 self._save()
1195 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001196
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001197 # Internal functions.
1198
1199 def _try_upgrade(self):
1200 """Upgrades from the old format to the new one if necessary.
1201
1202 This code can be removed so all bots are known to have the right new format.
1203 """
1204 if not self._lru:
1205 return
1206 _name, (data, _ts) = self._lru.get_oldest()
1207 if isinstance(data, (list, tuple)):
1208 return
1209 # Update to v2.
1210 def upgrade(_name, rel_cache):
1211 abs_cache = os.path.join(self.cache_dir, rel_cache)
1212 return rel_cache, _get_recursive_size(abs_cache)
1213 self._lru.transform(upgrade)
1214 self._save()
1215
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001216 def _remove_lru_item(self):
1217 """Removes the oldest LRU entry. LRU must not be empty."""
1218 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1219 logging.info('Removing named cache %r', name)
1220 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001221 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001222
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001223 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001224 """Creates and returns relative path of a new cache directory.
1225
1226 In practice, it is a 2-letter string.
1227 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001228 # We randomly generate directory names that have two lower/upper case
1229 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1230 abc_len = len(self._DIR_ALPHABET)
1231 tried = set()
1232 while len(tried) < 1000:
1233 i = random.randint(0, abc_len * abc_len - 1)
1234 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001235 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001236 if rel_path in tried:
1237 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001238 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001239 if not fs.exists(abs_path):
1240 return rel_path
1241 tried.add(rel_path)
1242 raise NamedCacheError(
1243 'could not allocate a new cache dir, too many cache dirs')
1244
1245 def _remove(self, name):
1246 """Removes a cache directory and entry.
1247
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001248 Returns:
1249 Number of caches deleted.
1250 """
1251 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001252 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001253 named_dir = self._get_named_path(name)
1254 if fs.islink(named_dir):
1255 fs.unlink(named_dir)
1256
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001257 # Then remove the actual data.
1258 if name not in self._lru:
1259 return
1260 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001261 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001262 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001263 file_path.rmtree(abs_path)
1264 self._lru.pop(name)
1265
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001266 def _save(self):
1267 self._lock.assert_locked()
1268 file_path.ensure_tree(self.cache_dir)
1269 self._lru.save(self.state_file)
1270
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001271 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001272 return os.path.join(self.cache_dir, self.NAMED_DIR, name)