blob: a8ff494781818577ae8d6150d7b8e08de6396ea1 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Junji Watanabe7b720782020-07-01 01:51:07 +000015import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000017import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040018
19from utils import file_path
20from utils import fs
21from utils import lru
22from utils import threading_utils
23from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000024tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
Lei Leife202df2019-06-11 17:33:34 +000026# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000027from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000028import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
Junji Watanabe5e73aab2020-04-09 04:20:27 +000030import isolated_format
31
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040032# The file size to be used when we don't know the correct file size,
33# generally used for .isolated files.
34UNKNOWN_FILE_SIZE = None
35
36
37def file_write(path, content_generator):
38 """Writes file content as generated by content_generator.
39
40 Creates the intermediary directory as needed.
41
42 Returns the number of bytes written.
43
44 Meant to be mocked out in unit tests.
45 """
46 file_path.ensure_tree(os.path.dirname(path))
47 total = 0
48 with fs.open(path, 'wb') as f:
49 for d in content_generator:
50 total += len(d)
51 f.write(d)
52 return total
53
54
55def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000056 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040057
58 Currently it just checks the file exists and its size matches the expectation.
59 """
60 if size == UNKNOWN_FILE_SIZE:
61 return fs.isfile(path)
62 try:
63 actual_size = fs.stat(path).st_size
64 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000065 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
66 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040067 return False
68 if size != actual_size:
69 logging.warning(
70 'Found invalid item %s; %d != %d',
71 os.path.basename(path), actual_size, size)
72 return False
73 return True
74
75
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000076def trim_caches(caches, path, min_free_space, max_age_secs):
77 """Trims multiple caches.
78
79 The goal here is to coherently trim all caches in a coherent LRU fashion,
80 deleting older items independent of which container they belong to.
81
82 Two policies are enforced first:
83 - max_age_secs
84 - min_free_space
85
86 Once that's done, then we enforce each cache's own policies.
87
88 Returns:
89 Slice containing the size of all items evicted.
90 """
91 min_ts = time.time() - max_age_secs if max_age_secs else 0
92 free_disk = file_path.get_free_space(path) if min_free_space else 0
93 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000111def _use_scandir():
112 # Use scandir in windows for faster execution.
113 # Do not use in other OS due to crbug.com/989409
114 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000115
Junji Watanabe38b28b02020-04-23 10:23:30 +0000116
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000117def _get_recursive_size(path):
118 """Returns the total data size for the specified path.
119
120 This function can be surprisingly slow on OSX, so its output should be cached.
121 """
122 try:
123 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000124 if _use_scandir():
125
126 if sys.platform == 'win32':
Junji Watanabe38b28b02020-04-23 10:23:30 +0000127
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000128 def direntIsJunction(entry):
129 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
130 # windows-only symbols.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000131 return bool(entry.stat().st_file_attributes & scandir
132 .FILE_ATTRIBUTE_REPARSE_POINT)
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000133 else:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000134
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000135 def direntIsJunction(_entry):
136 return False
137
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000138 stack = [path]
139 while stack:
140 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000141 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000142 continue
143 if entry.is_file():
144 total += entry.stat().st_size
145 elif entry.is_dir():
146 stack.append(entry.path)
147 else:
148 logging.warning('non directory/file entry: %s', entry)
149 return total
150
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000151 for root, _, files in fs.walk(path):
152 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000153 st = fs.lstat(os.path.join(root, f))
154 if stat.S_ISLNK(st.st_mode):
155 continue
156 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000157 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000158 except (IOError, OSError, UnicodeEncodeError):
159 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000160 return None
161
162
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400163class NamedCacheError(Exception):
164 """Named cache specific error."""
165
166
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400167class NoMoreSpace(Exception):
168 """Not enough space to map the whole directory."""
169 pass
170
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400171
172class CachePolicies(object):
173 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
174 """Common caching policies for the multiple caches (isolated, named, cipd).
175
176 Arguments:
177 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
178 cache is effectively a leak.
179 - min_free_space: Trim if disk free space becomes lower than this value. If
180 0, it will unconditionally fill the disk.
181 - max_items: Maximum number of items to keep in the cache. If 0, do not
182 enforce a limit.
183 - max_age_secs: Maximum age an item is kept in the cache until it is
184 automatically evicted. Having a lot of dead luggage slows
185 everything down.
186 """
187 self.max_cache_size = max_cache_size
188 self.min_free_space = min_free_space
189 self.max_items = max_items
190 self.max_age_secs = max_age_secs
191
192 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000193 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
194 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
195 self.max_cache_size, float(self.max_cache_size) / 1024**3,
196 self.max_items, self.min_free_space,
197 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400198
199
200class CacheMiss(Exception):
201 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400202 def __init__(self, digest):
203 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000204 super(CacheMiss,
205 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400206
207
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000209
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400210 def __init__(self, cache_dir):
211 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000212 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400213 assert file_path.isabs(cache_dir), cache_dir
214 self.cache_dir = cache_dir
215 self._lock = threading_utils.LockWithAssert()
216 # Profiling values.
217 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400218 self._used = []
219
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000220 def __nonzero__(self):
221 """A cache is always True.
222
223 Otherwise it falls back to __len__, which is surprising.
224 """
225 return True
226
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000227 def __bool__(self):
228 """A cache is always True.
229
230 Otherwise it falls back to __len__, which is surprising.
231 """
232 return True
233
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000234 def __len__(self):
235 """Returns the number of entries in the cache."""
236 raise NotImplementedError()
237
238 def __iter__(self):
239 """Iterates over all the entries names."""
240 raise NotImplementedError()
241
242 def __contains__(self, name):
243 """Returns if an entry is in the cache."""
244 raise NotImplementedError()
245
246 @property
247 def total_size(self):
248 """Returns the total size of the cache in bytes."""
249 raise NotImplementedError()
250
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400251 @property
252 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000253 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400254 with self._lock:
255 return self._added[:]
256
257 @property
258 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000259 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400260 with self._lock:
261 return self._used[:]
262
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000263 def get_oldest(self):
264 """Returns timestamp of oldest cache entry or None.
265
266 Returns:
267 Timestamp of the oldest item.
268
269 Used for manual trimming.
270 """
271 raise NotImplementedError()
272
273 def remove_oldest(self):
274 """Removes the oldest item from the cache.
275
276 Returns:
277 Size of the oldest item.
278
279 Used for manual trimming.
280 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400281 raise NotImplementedError()
282
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 def save(self):
284 """Saves the current cache to disk."""
285 raise NotImplementedError()
286
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000288 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400289
290 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000291 Slice with the size of evicted items.
292 """
293 raise NotImplementedError()
294
295 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000296 """Deletes any corrupted item from the cache, then calls trim(), then
297 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000298
299 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400300 """
301 raise NotImplementedError()
302
303
304class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Content addressed cache that stores objects temporarily.
306
307 It can be accessed concurrently from multiple threads, so it should protect
308 its internal state with some lock.
309 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310
311 def __enter__(self):
312 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000313 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400314 return self
315
316 def __exit__(self, _exc_type, _exec_value, _traceback):
317 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000318 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400319 return False
320
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321 def touch(self, digest, size):
322 """Ensures item is not corrupted and updates its LRU position.
323
324 Arguments:
325 digest: hash digest of item to check.
326 size: expected size of this item.
327
328 Returns:
329 True if item is in cache and not corrupted.
330 """
331 raise NotImplementedError()
332
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333 def getfileobj(self, digest):
334 """Returns a readable file like object.
335
336 If file exists on the file system it will have a .name attribute with an
337 absolute path to the file.
338 """
339 raise NotImplementedError()
340
341 def write(self, digest, content):
342 """Reads data from |content| generator and stores it in cache.
343
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000344 It is possible to write to an object that already exists. It may be
345 ignored (sent to /dev/null) but the timestamp is still updated.
346
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400347 Returns digest to simplify chaining.
348 """
349 raise NotImplementedError()
350
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400351
352class MemoryContentAddressedCache(ContentAddressedCache):
353 """ContentAddressedCache implementation that stores everything in memory."""
354
Lei Leife202df2019-06-11 17:33:34 +0000355 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356 """Args:
357 file_mode_mask: bit mask to AND file mode with. Default value will make
358 all mapped files to be read only.
359 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400360 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000362 # Items in a LRU lookup dict(digest: size).
363 self._lru = lru.LRUDict()
364
365 # Cache interface implementation.
366
367 def __len__(self):
368 with self._lock:
369 return len(self._lru)
370
371 def __iter__(self):
372 # This is not thread-safe.
373 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374
375 def __contains__(self, digest):
376 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000377 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378
379 @property
380 def total_size(self):
381 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000382 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400383
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000384 def get_oldest(self):
385 with self._lock:
386 try:
387 # (key, (value, ts))
388 return self._lru.get_oldest()[1][1]
389 except KeyError:
390 return None
391
392 def remove_oldest(self):
393 with self._lock:
394 # TODO(maruel): Update self._added.
395 # (key, (value, ts))
396 return len(self._lru.pop_oldest()[1][0])
397
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000398 def save(self):
399 pass
400
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000401 def trim(self):
402 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000403 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404
405 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000406 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 pass
408
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000409 # ContentAddressedCache interface implementation.
410
411 def __contains__(self, digest):
412 with self._lock:
413 return digest in self._lru
414
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 def touch(self, digest, size):
416 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 try:
418 self._lru.touch(digest)
419 except KeyError:
420 return False
421 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400422
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400423 def getfileobj(self, digest):
424 with self._lock:
425 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000426 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400427 except KeyError:
428 raise CacheMiss(digest)
429 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000430 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400431 return io.BytesIO(d)
432
433 def write(self, digest, content):
434 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000435 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400436 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000437 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 self._added.append(len(data))
439 return digest
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441
442class DiskContentAddressedCache(ContentAddressedCache):
443 """Stateful LRU cache in a flat hash table in a directory.
444
445 Saves its state as json file.
446 """
447 STATE_FILE = u'state.json'
448
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000449 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 """
451 Arguments:
452 cache_dir: directory where to place the cache.
453 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400454 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000455 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400456 """
457 # All protected methods (starting with '_') except _path should be called
458 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400459 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400460 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400461 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
462 # Items in a LRU lookup dict(digest: size).
463 self._lru = lru.LRUDict()
464 # Current cached free disk space. It is updated by self._trim().
465 file_path.ensure_tree(self.cache_dir)
466 self._free_disk = file_path.get_free_space(self.cache_dir)
467 # The first item in the LRU cache that must not be evicted during this run
468 # since it was referenced. All items more recent that _protected in the LRU
469 # cache are also inherently protected. It could be a set() of all items
470 # referenced but this increases memory usage without a use case.
471 self._protected = None
472 # Cleanup operations done by self._load(), if any.
473 self._operations = []
474 with tools.Profiler('Setup'):
475 with self._lock:
476 self._load(trim, time_fn)
477
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000478 # Cache interface implementation.
479
480 def __len__(self):
481 with self._lock:
482 return len(self._lru)
483
484 def __iter__(self):
485 # This is not thread-safe.
486 return self._lru.__iter__()
487
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 def __contains__(self, digest):
489 with self._lock:
490 return digest in self._lru
491
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 def total_size(self):
494 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000495 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000497 def get_oldest(self):
498 with self._lock:
499 try:
500 # (key, (value, ts))
501 return self._lru.get_oldest()[1][1]
502 except KeyError:
503 return None
504
505 def remove_oldest(self):
506 with self._lock:
507 # TODO(maruel): Update self._added.
508 return self._remove_lru_file(True)
509
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000510 def save(self):
511 with self._lock:
512 return self._save()
513
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000514 def trim(self):
515 """Forces retention policies."""
516 with self._lock:
517 return self._trim()
518
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 def cleanup(self):
520 """Cleans up the cache directory.
521
522 Ensures there is no unknown files in cache_dir.
523 Ensures the read-only bits are set correctly.
524
525 At that point, the cache was already loaded, trimmed to respect cache
526 policies.
527 """
528 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000529 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400530 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000531 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400532 # It'd be faster if there were a readdir() function.
533 for filename in fs.listdir(self.cache_dir):
534 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000535 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536 continue
537 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000538 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400539 previous.remove(filename)
540 continue
541
542 # An untracked file. Delete it.
543 logging.warning('Removing unknown file %s from cache', filename)
544 p = self._path(filename)
545 if fs.isdir(p):
546 try:
547 file_path.rmtree(p)
548 except OSError:
549 pass
550 else:
551 file_path.try_remove(p)
552 continue
553
554 if previous:
555 # Filter out entries that were not found.
556 logging.warning('Removed %d lost files', len(previous))
557 for filename in previous:
558 self._lru.pop(filename)
559 self._save()
560
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000561 # Verify hash of every single item to detect corruption. the corrupted
562 # files will be evicted.
563 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000564 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000565 # verify only if the mtime is grather than the timestamp in state.json
566 # to avoid take too long time.
567 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000568 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000569 logging.warning('Item has been modified. item: %s', digest)
570 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000571 # Update timestamp in state.json
572 self._lru.touch(digest)
573 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000574 # remove corrupted file from LRU and file system
575 self._lru.pop(digest)
576 self._delete_file(digest, UNKNOWN_FILE_SIZE)
577 logging.error('Deleted corrupted item: %s', digest)
578 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400579
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000580 # ContentAddressedCache interface implementation.
581
582 def __contains__(self, digest):
583 with self._lock:
584 return digest in self._lru
585
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400586 def touch(self, digest, size):
587 """Verifies an actual file is valid and bumps its LRU position.
588
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000589 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590
591 Note that is doesn't compute the hash so it could still be corrupted if the
592 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400593 """
594 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000595 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400596
597 # Update its LRU position.
598 with self._lock:
599 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000600 if looks_valid:
601 # Exists but not in the LRU anymore.
602 self._delete_file(digest, size)
603 return False
604 if not looks_valid:
605 self._lru.pop(digest)
606 # Exists but not in the LRU anymore.
607 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400608 return False
609 self._lru.touch(digest)
610 self._protected = self._protected or digest
611 return True
612
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400613 def getfileobj(self, digest):
614 try:
615 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400616 except IOError:
617 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000618 with self._lock:
619 try:
620 self._used.append(self._lru[digest])
621 except KeyError:
622 # If the digest is not actually in _lru, assume it is a cache miss.
623 # Existing file will be overwritten by whoever uses the cache and added
624 # to _lru.
625 f.close()
626 raise CacheMiss(digest)
627 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 def write(self, digest, content):
630 assert content is not None
631 with self._lock:
632 self._protected = self._protected or digest
633 path = self._path(digest)
634 # A stale broken file may remain. It is possible for the file to have write
635 # access bit removed which would cause the file_write() call to fail to open
636 # in write mode. Take no chance here.
637 file_path.try_remove(path)
638 try:
639 size = file_write(path, content)
640 except:
641 # There are two possible places were an exception can occur:
642 # 1) Inside |content| generator in case of network or unzipping errors.
643 # 2) Inside file_write itself in case of disk IO errors.
644 # In any case delete an incomplete file and propagate the exception to
645 # caller, it will be logged there.
646 file_path.try_remove(path)
647 raise
648 # Make the file read-only in the cache. This has a few side-effects since
649 # the file node is modified, so every directory entries to this file becomes
650 # read-only. It's fine here because it is a new file.
651 file_path.set_read_only(path, True)
652 with self._lock:
653 self._add(digest, size)
654 return digest
655
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000656 # Internal functions.
657
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400658 def _load(self, trim, time_fn):
659 """Loads state of the cache from json file.
660
661 If cache_dir does not exist on disk, it is created.
662 """
663 self._lock.assert_locked()
664
665 if not fs.isfile(self.state_file):
666 if not fs.isdir(self.cache_dir):
667 fs.makedirs(self.cache_dir)
668 else:
669 # Load state of the cache.
670 try:
671 self._lru = lru.LRUDict.load(self.state_file)
672 except ValueError as err:
673 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000674 # Don't want to keep broken cache dir.
675 file_path.rmtree(self.cache_dir)
676 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000677 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 if time_fn:
679 self._lru.time_fn = time_fn
680 if trim:
681 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400682
683 def _save(self):
684 """Saves the LRU ordering."""
685 self._lock.assert_locked()
686 if sys.platform != 'win32':
687 d = os.path.dirname(self.state_file)
688 if fs.isdir(d):
689 # Necessary otherwise the file can't be created.
690 file_path.set_read_only(d, False)
691 if fs.isfile(self.state_file):
692 file_path.set_read_only(self.state_file, False)
693 self._lru.save(self.state_file)
694
695 def _trim(self):
696 """Trims anything we don't know, make sure enough free space exists."""
697 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699
700 # Trim old items.
701 if self.policies.max_age_secs:
702 cutoff = self._lru.time_fn() - self.policies.max_age_secs
703 while self._lru:
704 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000705 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 if oldest[1][1] >= cutoff:
707 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000708 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
710 # Ensure maximum cache size.
711 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000712 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 e = self._remove_lru_file(True)
715 evicted.append(e)
716 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400717
718 # Ensure maximum number of items in the cache.
719 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000720 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400722
723 # Ensure enough free space.
724 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725 while (
726 self.policies.min_free_space and
727 self._lru and
728 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 # self._free_disk is updated by this call.
730 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400731
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000732 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000733 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400734 usage_percent = 0.
735 if total_usage:
736 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
737
738 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000739 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
740 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000741 '%.1fkb)', len(evicted),
742 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
743 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746
747 def _path(self, digest):
748 """Returns the path to one item."""
749 return os.path.join(self.cache_dir, digest)
750
751 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000752 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000753
754 Updates self._free_disk.
755 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400756 self._lock.assert_locked()
757 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000758 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000760 total_size = sum(self._lru.values())
761 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000762 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
763 '%s bytes (%.3f GiB) free_space') % (
764 self.policies, total_size, float(total_size) / 1024**3,
765 len(self._lru), self._free_disk,
766 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400767 raise NoMoreSpace(msg)
768 except KeyError:
769 # That means an internal error.
770 raise NoMoreSpace('Nothing to remove, can\'t happend')
771 digest, (size, _) = self._lru.pop_oldest()
772 logging.debug('Removing LRU file %s', digest)
773 self._delete_file(digest, size)
774 return size
775
776 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
777 """Adds an item into LRU cache marking it as a newest one."""
778 self._lock.assert_locked()
779 if size == UNKNOWN_FILE_SIZE:
780 size = fs.stat(self._path(digest)).st_size
781 self._added.append(size)
782 self._lru.add(digest, size)
783 self._free_disk -= size
784 # Do a quicker version of self._trim(). It only enforces free disk space,
785 # not cache size limits. It doesn't actually look at real free disk space,
786 # only uses its cache values. self._trim() will be called later to enforce
787 # real trimming but doing this quick version here makes it possible to map
788 # an isolated that is larger than the current amount of free disk space when
789 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000790 while (self.policies.min_free_space and self._lru and
791 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000792 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400793 if self._remove_lru_file(False) == -1:
794 break
795
796 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000797 """Deletes cache file from the file system.
798
799 Updates self._free_disk.
800 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400801 self._lock.assert_locked()
802 try:
803 if size == UNKNOWN_FILE_SIZE:
804 try:
805 size = fs.stat(self._path(digest)).st_size
806 except OSError:
807 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000808 if file_path.try_remove(self._path(digest)):
809 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400810 except OSError as e:
811 if e.errno != errno.ENOENT:
812 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000814 def _get_mtime(self, digest):
815 """Get mtime of cache file."""
816 return os.path.getmtime(self._path(digest))
817
818 def _is_valid_hash(self, digest):
819 """Verify digest with supported hash algos."""
820 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000821 if digest == isolated_format.hash_file(self._path(digest), algo):
822 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000823 return False
824
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826class NamedCache(Cache):
827 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400830 name is a short identifier that describes the contents of the cache, e.g.
831 "git_v8" could be all git repositories required by v8 builds, or
832 "build_chromium" could be build artefacts of the Chromium.
833 path is a directory path relative to the task run dir. Cache installation
834 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400835 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400836 _DIR_ALPHABET = string.ascii_letters + string.digits
837 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000838 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400839
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400840 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841 """Initializes NamedCaches.
842
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 Arguments:
844 - cache_dir is a directory for persistent cache storage.
845 - policies is a CachePolicies instance.
846 - time_fn is a function that returns timestamp (float) and used to take
847 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000851 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
853 self._lru = lru.LRUDict()
854 if not fs.isdir(self.cache_dir):
855 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000856 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000857 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000859 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000860 if not isinstance(size, six.integer_types):
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000861 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000862
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400863 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000864 logging.exception(
865 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400866 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000867 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000868 with self._lock:
869 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400870 if time_fn:
871 self._lru.time_fn = time_fn
872
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400873 @property
874 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000875 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400876 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000877 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400878
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000879 def install(self, dst, name):
880 """Creates the directory |dst| and moves a previous named cache |name| if it
881 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400882
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000883 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400884
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000885 Returns the reused named cache size in bytes, or 0 if none was present.
886
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887 Raises NamedCacheError if cannot install the cache.
888 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000889 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400890 with self._lock:
891 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000892 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000894 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400895
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000896 # Remove the named symlink if it exists.
897 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000898 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000899 # Remove the symlink itself, not its destination.
900 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000901
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000902 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000903 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400904 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000905 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000906 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000907 file_path.ensure_tree(os.path.dirname(dst))
908 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400909 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000910 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400911
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000912 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400913 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400914
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000915 # The named cache does not exist, create an empty directory. When
916 # uninstalling, we will move it back to the cache and create an an
917 # entry.
918 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000919 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000920 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400921 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000922 # Raise using the original traceback.
923 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000924 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000925 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000926 finally:
927 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400928
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000929 def uninstall(self, src, name):
930 """Moves the cache directory back into the named cache hive for an eventual
931 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400932
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000933 The opposite of install().
934
935 src must be absolute and unicode. Its content is moved back into the local
936 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400937
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000938 Returns the named cache size in bytes.
939
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400940 Raises NamedCacheError if cannot uninstall the cache.
941 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000942 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400943 with self._lock:
944 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000945 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000947 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000948 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400949 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400950
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000951 if name in self._lru:
952 # This shouldn't happen but just remove the preexisting one and move
953 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000954 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000955 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000956
Takuto Ikuta93483272020-06-05 09:06:34 +0000957 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000958 size = _get_recursive_size(src)
959 logging.info('- Size is %d', size)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400960
961 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000962 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400963 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000964 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400965 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikuta17a27ac2020-06-24 08:11:55 +0000966
967 if sys.platform != 'win32':
968 uid = os.getuid()
969 if os.stat(src).st_uid != uid:
970 # Maybe owner of |src| is different from runner of this script. This
971 # is to make fs.rename work in that case.
972 # https://crbug.com/986676
973 subprocess.check_call(['sudo', '-n', 'chown', str(uid), src])
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000974 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400975
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000976 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000977 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000978
979 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
980 # for user convenience.
981 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000982 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000983 file_path.remove(named_path)
984 else:
985 file_path.ensure_tree(os.path.dirname(named_path))
986
987 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000988 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000989 logging.info(
990 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000991 except OSError:
992 # Ignore on Windows. It happens when running as a normal user or when
993 # UAC is enabled and the user is a filtered administrator account.
994 if sys.platform != 'win32':
995 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000996 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400997 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000998 # Raise using the original traceback.
999 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001000 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001001 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001002 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001003 # Call save() at every uninstall. The assumptions are:
1004 # - The total the number of named caches is low, so the state.json file
1005 # is small, so the time it takes to write it to disk is short.
1006 # - The number of mapped named caches per task is low, so the number of
1007 # times save() is called on tear-down isn't high enough to be
1008 # significant.
1009 # - uninstall() sometimes throws due to file locking on Windows or
1010 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001011 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001012
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001013 # Cache interface implementation.
1014
1015 def __len__(self):
1016 with self._lock:
1017 return len(self._lru)
1018
1019 def __iter__(self):
1020 # This is not thread-safe.
1021 return self._lru.__iter__()
1022
John Budorickc6186972020-02-26 00:58:14 +00001023 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001024 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001025 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001026
1027 @property
1028 def total_size(self):
1029 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001030 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001031
1032 def get_oldest(self):
1033 with self._lock:
1034 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001035 # (key, (value, ts))
1036 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001037 except KeyError:
1038 return None
1039
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001040 def remove_oldest(self):
1041 with self._lock:
1042 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001043 _name, size = self._remove_lru_item()
1044 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001045
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001046 def save(self):
1047 with self._lock:
1048 return self._save()
1049
John Budorickc6186972020-02-26 00:58:14 +00001050 def touch(self, *names):
1051 with self._lock:
1052 for name in names:
1053 if name in self._lru:
1054 self._lru.touch(name)
1055 self._save()
1056
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001057 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001058 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001059 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001060 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001061 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001062
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001063 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001064 if self._policies.max_items:
1065 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001066 name, size = self._remove_lru_item()
1067 evicted.append(size)
1068 logging.info(
1069 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1070 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001071
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001072 # Trim according to maximum age.
1073 if self._policies.max_age_secs:
1074 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1075 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001076 _name, (_data, ts) = self._lru.get_oldest()
1077 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001078 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001079 name, size = self._remove_lru_item()
1080 evicted.append(size)
1081 logging.info(
1082 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1083 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001084
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001085 # Trim according to minimum free space.
1086 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001087 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001088 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001089 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001090 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001091 name, size = self._remove_lru_item()
1092 evicted.append(size)
1093 logging.info(
1094 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1095 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001096
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001097 # Trim according to maximum total size.
1098 if self._policies.max_cache_size:
1099 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001100 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001101 if total <= self._policies.max_cache_size:
1102 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001103 name, size = self._remove_lru_item()
1104 evicted.append(size)
1105 logging.info(
1106 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1107 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001108
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001109 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001110 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001111
1112 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001113 """Removes unknown directories.
1114
1115 Does not recalculate the cache size since it's surprisingly slow on some
1116 OSes.
1117 """
1118 success = True
1119 with self._lock:
1120 try:
1121 actual = set(fs.listdir(self.cache_dir))
1122 actual.discard(self.NAMED_DIR)
1123 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001124 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001125 # First, handle the actual cache content.
1126 # Remove missing entries.
1127 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001128 name, size = self._lru.pop(expected[missing])
1129 logging.warning(
1130 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001131 # Remove unexpected items.
1132 for unexpected in (actual - set(expected)):
1133 try:
1134 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001135 logging.warning(
1136 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001137 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001138 file_path.rmtree(p)
1139 else:
1140 fs.remove(p)
1141 except (IOError, OSError) as e:
1142 logging.error('Failed to remove %s: %s', unexpected, e)
1143 success = False
1144
1145 # Second, fix named cache links.
1146 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001147 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001148 actual = set(fs.listdir(named))
1149 expected = set(self._lru)
1150 # Confirm entries. Do not add missing ones for now.
1151 for name in expected.intersection(actual):
1152 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001153 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001154 if fs.islink(p):
1155 link = fs.readlink(p)
1156 if expected_link == link:
1157 continue
1158 logging.warning(
1159 'Unexpected symlink for cache %s: %s, expected %s',
1160 name, link, expected_link)
1161 else:
1162 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001163 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001164 file_path.rmtree(p)
1165 else:
1166 fs.remove(p)
1167 # Remove unexpected items.
1168 for unexpected in (actual - expected):
1169 try:
1170 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1171 if fs.isdir(p):
1172 file_path.rmtree(p)
1173 else:
1174 fs.remove(p)
1175 except (IOError, OSError) as e:
1176 logging.error('Failed to remove %s: %s', unexpected, e)
1177 success = False
1178 finally:
1179 self._save()
1180 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001181
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001182 # Internal functions.
1183
1184 def _try_upgrade(self):
1185 """Upgrades from the old format to the new one if necessary.
1186
1187 This code can be removed so all bots are known to have the right new format.
1188 """
1189 if not self._lru:
1190 return
1191 _name, (data, _ts) = self._lru.get_oldest()
1192 if isinstance(data, (list, tuple)):
1193 return
1194 # Update to v2.
1195 def upgrade(_name, rel_cache):
1196 abs_cache = os.path.join(self.cache_dir, rel_cache)
1197 return rel_cache, _get_recursive_size(abs_cache)
1198 self._lru.transform(upgrade)
1199 self._save()
1200
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001201 def _remove_lru_item(self):
1202 """Removes the oldest LRU entry. LRU must not be empty."""
1203 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1204 logging.info('Removing named cache %r', name)
1205 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001206 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001207
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001208 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001209 """Creates and returns relative path of a new cache directory.
1210
1211 In practice, it is a 2-letter string.
1212 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001213 # We randomly generate directory names that have two lower/upper case
1214 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1215 abc_len = len(self._DIR_ALPHABET)
1216 tried = set()
1217 while len(tried) < 1000:
1218 i = random.randint(0, abc_len * abc_len - 1)
1219 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001220 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001221 if rel_path in tried:
1222 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001223 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001224 if not fs.exists(abs_path):
1225 return rel_path
1226 tried.add(rel_path)
1227 raise NamedCacheError(
1228 'could not allocate a new cache dir, too many cache dirs')
1229
1230 def _remove(self, name):
1231 """Removes a cache directory and entry.
1232
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001233 Returns:
1234 Number of caches deleted.
1235 """
1236 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001237 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001238 named_dir = self._get_named_path(name)
1239 if fs.islink(named_dir):
1240 fs.unlink(named_dir)
1241
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001242 # Then remove the actual data.
1243 if name not in self._lru:
1244 return
1245 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001246 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001247 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001248 file_path.rmtree(abs_path)
1249 self._lru.pop(name)
1250
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001251 def _save(self):
1252 self._lock.assert_locked()
1253 file_path.ensure_tree(self.cache_dir)
1254 self._lru.save(self.state_file)
1255
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001256 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001257 return os.path.join(self.cache_dir, self.NAMED_DIR, name)