blob: 8a804bdeb7b20596c537c41547daa7e548cd0168 [file] [log] [blame]
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -04001# Copyright 2018 The LUCI Authors. All rights reserved.
2# Use of this source code is governed under the Apache License, Version 2.0
3# that can be found in the LICENSE file.
4
5"""Define local cache policies."""
6
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04007import contextlib
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -04008import errno
9import io
10import logging
11import os
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040012import random
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +000013import stat
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040014import string
Junji Watanabe7b720782020-07-01 01:51:07 +000015import subprocess
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040016import sys
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000017import time
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040018
19from utils import file_path
20from utils import fs
21from utils import lru
22from utils import threading_utils
23from utils import tools
Lei Leife202df2019-06-11 17:33:34 +000024tools.force_local_third_party()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040025
Lei Leife202df2019-06-11 17:33:34 +000026# third_party/
Takuto Ikutabf06ebf2019-08-02 07:28:23 +000027from scandir import scandir
Lei Leife202df2019-06-11 17:33:34 +000028import six
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040029
Junji Watanabe5e73aab2020-04-09 04:20:27 +000030import isolated_format
31
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040032# The file size to be used when we don't know the correct file size,
33# generally used for .isolated files.
34UNKNOWN_FILE_SIZE = None
35
36
37def file_write(path, content_generator):
38 """Writes file content as generated by content_generator.
39
40 Creates the intermediary directory as needed.
41
42 Returns the number of bytes written.
43
44 Meant to be mocked out in unit tests.
45 """
46 file_path.ensure_tree(os.path.dirname(path))
47 total = 0
48 with fs.open(path, 'wb') as f:
49 for d in content_generator:
50 total += len(d)
51 f.write(d)
52 return total
53
54
55def is_valid_file(path, size):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +000056 """Returns if the given files appears valid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040057
58 Currently it just checks the file exists and its size matches the expectation.
59 """
60 if size == UNKNOWN_FILE_SIZE:
61 return fs.isfile(path)
62 try:
63 actual_size = fs.stat(path).st_size
64 except OSError as e:
Junji Watanabe38b28b02020-04-23 10:23:30 +000065 logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
66 os.path.basename(path), e)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -040067 return False
68 if size != actual_size:
69 logging.warning(
70 'Found invalid item %s; %d != %d',
71 os.path.basename(path), actual_size, size)
72 return False
73 return True
74
75
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +000076def trim_caches(caches, path, min_free_space, max_age_secs):
77 """Trims multiple caches.
78
79 The goal here is to coherently trim all caches in a coherent LRU fashion,
80 deleting older items independent of which container they belong to.
81
82 Two policies are enforced first:
83 - max_age_secs
84 - min_free_space
85
86 Once that's done, then we enforce each cache's own policies.
87
88 Returns:
89 Slice containing the size of all items evicted.
90 """
91 min_ts = time.time() - max_age_secs if max_age_secs else 0
92 free_disk = file_path.get_free_space(path) if min_free_space else 0
93 total = []
94 if min_ts or free_disk:
95 while True:
96 oldest = [(c, c.get_oldest()) for c in caches if len(c) > 0]
97 if not oldest:
98 break
Lei Leife202df2019-06-11 17:33:34 +000099 oldest.sort(key=lambda k: k[1])
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000100 c, ts = oldest[0]
101 if ts >= min_ts and free_disk >= min_free_space:
102 break
103 total.append(c.remove_oldest())
104 if min_free_space:
105 free_disk = file_path.get_free_space(path)
106 # Evaluate each cache's own policies.
107 for c in caches:
108 total.extend(c.trim())
109 return total
110
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000111def _use_scandir():
112 # Use scandir in windows for faster execution.
113 # Do not use in other OS due to crbug.com/989409
114 return sys.platform == 'win32'
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000115
Junji Watanabe38b28b02020-04-23 10:23:30 +0000116
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000117def _get_recursive_size(path):
118 """Returns the total data size for the specified path.
119
120 This function can be surprisingly slow on OSX, so its output should be cached.
121 """
122 try:
123 total = 0
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000124 if _use_scandir():
125
126 if sys.platform == 'win32':
Junji Watanabe38b28b02020-04-23 10:23:30 +0000127
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000128 def direntIsJunction(entry):
129 # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
130 # windows-only symbols.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000131 return bool(entry.stat().st_file_attributes & scandir
132 .FILE_ATTRIBUTE_REPARSE_POINT)
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000133 else:
Junji Watanabe38b28b02020-04-23 10:23:30 +0000134
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000135 def direntIsJunction(_entry):
136 return False
137
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000138 stack = [path]
139 while stack:
140 for entry in scandir.scandir(stack.pop()):
Takuto Ikuta0671dac2019-08-06 22:38:48 +0000141 if entry.is_symlink() or direntIsJunction(entry):
Takuto Ikutabf06ebf2019-08-02 07:28:23 +0000142 continue
143 if entry.is_file():
144 total += entry.stat().st_size
145 elif entry.is_dir():
146 stack.append(entry.path)
147 else:
148 logging.warning('non directory/file entry: %s', entry)
149 return total
150
Takuto Ikuta54c8b062019-07-31 08:38:41 +0000151 for root, _, files in fs.walk(path):
152 for f in files:
Takuto Ikuta6ba1d0c2019-08-01 05:37:00 +0000153 st = fs.lstat(os.path.join(root, f))
154 if stat.S_ISLNK(st.st_mode):
155 continue
156 total += st.st_size
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000157 return total
Takuto Ikuta74b13b02020-06-08 07:13:50 +0000158 except (IOError, OSError, UnicodeEncodeError):
159 logging.exception('Exception while getting the size of %s', path)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000160 return None
161
162
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400163class NamedCacheError(Exception):
164 """Named cache specific error."""
165
166
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400167class NoMoreSpace(Exception):
168 """Not enough space to map the whole directory."""
169 pass
170
Marc-Antoine Ruel34f5f282018-05-16 16:04:31 -0400171
172class CachePolicies(object):
173 def __init__(self, max_cache_size, min_free_space, max_items, max_age_secs):
174 """Common caching policies for the multiple caches (isolated, named, cipd).
175
176 Arguments:
177 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
178 cache is effectively a leak.
179 - min_free_space: Trim if disk free space becomes lower than this value. If
180 0, it will unconditionally fill the disk.
181 - max_items: Maximum number of items to keep in the cache. If 0, do not
182 enforce a limit.
183 - max_age_secs: Maximum age an item is kept in the cache until it is
184 automatically evicted. Having a lot of dead luggage slows
185 everything down.
186 """
187 self.max_cache_size = max_cache_size
188 self.min_free_space = min_free_space
189 self.max_items = max_items
190 self.max_age_secs = max_age_secs
191
192 def __str__(self):
Takuto Ikutaa953f272020-01-20 02:59:17 +0000193 return ('CachePolicies(max_cache_size=%s (%.3f GiB); max_items=%s; '
194 'min_free_space=%s (%.3f GiB); max_age_secs=%s)') % (
195 self.max_cache_size, float(self.max_cache_size) / 1024**3,
196 self.max_items, self.min_free_space,
197 float(self.min_free_space) / 1024**3, self.max_age_secs)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400198
199
200class CacheMiss(Exception):
201 """Raised when an item is not in cache."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400202 def __init__(self, digest):
203 self.digest = digest
Junji Watanabe38b28b02020-04-23 10:23:30 +0000204 super(CacheMiss,
205 self).__init__('Item with digest %r is not found in cache' % digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400206
207
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400208class Cache(object):
Junji Watanabe38b28b02020-04-23 10:23:30 +0000209
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400210 def __init__(self, cache_dir):
211 if cache_dir is not None:
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000212 assert isinstance(cache_dir, six.text_type), cache_dir
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400213 assert file_path.isabs(cache_dir), cache_dir
214 self.cache_dir = cache_dir
215 self._lock = threading_utils.LockWithAssert()
216 # Profiling values.
217 self._added = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400218 self._used = []
219
Marc-Antoine Ruel6c3be5a2018-09-04 17:19:59 +0000220 def __nonzero__(self):
221 """A cache is always True.
222
223 Otherwise it falls back to __len__, which is surprising.
224 """
225 return True
226
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000227 def __bool__(self):
228 """A cache is always True.
229
230 Otherwise it falls back to __len__, which is surprising.
231 """
232 return True
233
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000234 def __len__(self):
235 """Returns the number of entries in the cache."""
236 raise NotImplementedError()
237
238 def __iter__(self):
239 """Iterates over all the entries names."""
240 raise NotImplementedError()
241
242 def __contains__(self, name):
243 """Returns if an entry is in the cache."""
244 raise NotImplementedError()
245
246 @property
247 def total_size(self):
248 """Returns the total size of the cache in bytes."""
249 raise NotImplementedError()
250
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400251 @property
252 def added(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000253 """Returns a list of the size for each entry added."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400254 with self._lock:
255 return self._added[:]
256
257 @property
258 def used(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000259 """Returns a list of the size for each entry used."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400260 with self._lock:
261 return self._used[:]
262
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000263 def get_oldest(self):
264 """Returns timestamp of oldest cache entry or None.
265
266 Returns:
267 Timestamp of the oldest item.
268
269 Used for manual trimming.
270 """
271 raise NotImplementedError()
272
273 def remove_oldest(self):
274 """Removes the oldest item from the cache.
275
276 Returns:
277 Size of the oldest item.
278
279 Used for manual trimming.
280 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400281 raise NotImplementedError()
282
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000283 def save(self):
284 """Saves the current cache to disk."""
285 raise NotImplementedError()
286
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400287 def trim(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000288 """Enforces cache policies, then calls save().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400289
290 Returns:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000291 Slice with the size of evicted items.
292 """
293 raise NotImplementedError()
294
295 def cleanup(self):
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000296 """Deletes any corrupted item from the cache, then calls trim(), then
297 save().
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000298
299 It is assumed to take significantly more time than trim().
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400300 """
301 raise NotImplementedError()
302
303
304class ContentAddressedCache(Cache):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400305 """Content addressed cache that stores objects temporarily.
306
307 It can be accessed concurrently from multiple threads, so it should protect
308 its internal state with some lock.
309 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400310
311 def __enter__(self):
312 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000313 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400314 return self
315
316 def __exit__(self, _exc_type, _exec_value, _traceback):
317 """Context manager interface."""
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000318 # TODO(maruel): Remove.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400319 return False
320
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400321 def touch(self, digest, size):
322 """Ensures item is not corrupted and updates its LRU position.
323
324 Arguments:
325 digest: hash digest of item to check.
326 size: expected size of this item.
327
328 Returns:
329 True if item is in cache and not corrupted.
330 """
331 raise NotImplementedError()
332
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400333 def getfileobj(self, digest):
334 """Returns a readable file like object.
335
336 If file exists on the file system it will have a .name attribute with an
337 absolute path to the file.
338 """
339 raise NotImplementedError()
340
341 def write(self, digest, content):
342 """Reads data from |content| generator and stores it in cache.
343
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000344 It is possible to write to an object that already exists. It may be
345 ignored (sent to /dev/null) but the timestamp is still updated.
346
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400347 Returns digest to simplify chaining.
348 """
349 raise NotImplementedError()
350
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400351
352class MemoryContentAddressedCache(ContentAddressedCache):
353 """ContentAddressedCache implementation that stores everything in memory."""
354
Lei Leife202df2019-06-11 17:33:34 +0000355 def __init__(self, file_mode_mask=0o500):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400356 """Args:
357 file_mode_mask: bit mask to AND file mode with. Default value will make
358 all mapped files to be read only.
359 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400360 super(MemoryContentAddressedCache, self).__init__(None)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400361 self._file_mode_mask = file_mode_mask
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000362 # Items in a LRU lookup dict(digest: size).
363 self._lru = lru.LRUDict()
364
365 # Cache interface implementation.
366
367 def __len__(self):
368 with self._lock:
369 return len(self._lru)
370
371 def __iter__(self):
372 # This is not thread-safe.
373 return self._lru.__iter__()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400374
375 def __contains__(self, digest):
376 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000377 return digest in self._lru
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400378
379 @property
380 def total_size(self):
381 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000382 return sum(len(i) for i in self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400383
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000384 def get_oldest(self):
385 with self._lock:
386 try:
387 # (key, (value, ts))
388 return self._lru.get_oldest()[1][1]
389 except KeyError:
390 return None
391
392 def remove_oldest(self):
393 with self._lock:
394 # TODO(maruel): Update self._added.
395 # (key, (value, ts))
396 return len(self._lru.pop_oldest()[1][0])
397
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000398 def save(self):
399 pass
400
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000401 def trim(self):
402 """Trimming is not implemented for MemoryContentAddressedCache."""
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000403 return []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400404
405 def cleanup(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000406 """Cleaning is irrelevant, as there's no stateful serialization."""
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400407 pass
408
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000409 # ContentAddressedCache interface implementation.
410
411 def __contains__(self, digest):
412 with self._lock:
413 return digest in self._lru
414
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400415 def touch(self, digest, size):
416 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000417 try:
418 self._lru.touch(digest)
419 except KeyError:
420 return False
421 return True
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400422
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400423 def getfileobj(self, digest):
424 with self._lock:
425 try:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000426 d = self._lru[digest]
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400427 except KeyError:
428 raise CacheMiss(digest)
429 self._used.append(len(d))
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000430 self._lru.touch(digest)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400431 return io.BytesIO(d)
432
433 def write(self, digest, content):
434 # Assemble whole stream before taking the lock.
Lei Lei73a5f732020-03-23 20:36:14 +0000435 data = six.b('').join(content)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400436 with self._lock:
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000437 self._lru.add(digest, data)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400438 self._added.append(len(data))
439 return digest
440
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400441
442class DiskContentAddressedCache(ContentAddressedCache):
443 """Stateful LRU cache in a flat hash table in a directory.
444
445 Saves its state as json file.
446 """
447 STATE_FILE = u'state.json'
448
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000449 def __init__(self, cache_dir, policies, trim, time_fn=None):
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400450 """
451 Arguments:
452 cache_dir: directory where to place the cache.
453 policies: CachePolicies instance, cache retention policies.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400454 trim: if True to enforce |policies| right away.
Marc-Antoine Ruel79d42192019-02-06 19:24:16 +0000455 It can be done later by calling trim() explicitly.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400456 """
457 # All protected methods (starting with '_') except _path should be called
458 # with self._lock held.
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400459 super(DiskContentAddressedCache, self).__init__(cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400460 self.policies = policies
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400461 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
462 # Items in a LRU lookup dict(digest: size).
463 self._lru = lru.LRUDict()
464 # Current cached free disk space. It is updated by self._trim().
465 file_path.ensure_tree(self.cache_dir)
466 self._free_disk = file_path.get_free_space(self.cache_dir)
467 # The first item in the LRU cache that must not be evicted during this run
468 # since it was referenced. All items more recent that _protected in the LRU
469 # cache are also inherently protected. It could be a set() of all items
470 # referenced but this increases memory usage without a use case.
471 self._protected = None
472 # Cleanup operations done by self._load(), if any.
473 self._operations = []
474 with tools.Profiler('Setup'):
475 with self._lock:
476 self._load(trim, time_fn)
477
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000478 # Cache interface implementation.
479
480 def __len__(self):
481 with self._lock:
482 return len(self._lru)
483
484 def __iter__(self):
485 # This is not thread-safe.
486 return self._lru.__iter__()
487
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400488 def __contains__(self, digest):
489 with self._lock:
490 return digest in self._lru
491
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400492 @property
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400493 def total_size(self):
494 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000495 return sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400496
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000497 def get_oldest(self):
498 with self._lock:
499 try:
500 # (key, (value, ts))
501 return self._lru.get_oldest()[1][1]
502 except KeyError:
503 return None
504
505 def remove_oldest(self):
506 with self._lock:
507 # TODO(maruel): Update self._added.
508 return self._remove_lru_file(True)
509
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +0000510 def save(self):
511 with self._lock:
512 return self._save()
513
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000514 def trim(self):
515 """Forces retention policies."""
516 with self._lock:
517 return self._trim()
518
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400519 def cleanup(self):
520 """Cleans up the cache directory.
521
522 Ensures there is no unknown files in cache_dir.
523 Ensures the read-only bits are set correctly.
524
525 At that point, the cache was already loaded, trimmed to respect cache
526 policies.
527 """
528 with self._lock:
Lei Leife202df2019-06-11 17:33:34 +0000529 fs.chmod(self.cache_dir, 0o700)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400530 # Ensure that all files listed in the state still exist and add new ones.
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000531 previous = set(self._lru)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400532 # It'd be faster if there were a readdir() function.
533 for filename in fs.listdir(self.cache_dir):
534 if filename == self.STATE_FILE:
Lei Leife202df2019-06-11 17:33:34 +0000535 fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400536 continue
537 if filename in previous:
Lei Leife202df2019-06-11 17:33:34 +0000538 fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400539 previous.remove(filename)
540 continue
541
542 # An untracked file. Delete it.
543 logging.warning('Removing unknown file %s from cache', filename)
544 p = self._path(filename)
545 if fs.isdir(p):
546 try:
547 file_path.rmtree(p)
548 except OSError:
549 pass
550 else:
551 file_path.try_remove(p)
552 continue
553
554 if previous:
555 # Filter out entries that were not found.
556 logging.warning('Removed %d lost files', len(previous))
557 for filename in previous:
558 self._lru.pop(filename)
559 self._save()
560
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000561 # Verify hash of every single item to detect corruption. the corrupted
562 # files will be evicted.
563 with self._lock:
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000564 for digest, (_, timestamp) in list(self._lru._items.items()):
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000565 # verify only if the mtime is grather than the timestamp in state.json
566 # to avoid take too long time.
567 if self._get_mtime(digest) <= timestamp:
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000568 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000569 logging.warning('Item has been modified. item: %s', digest)
570 if self._is_valid_hash(digest):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000571 # Update timestamp in state.json
572 self._lru.touch(digest)
573 continue
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000574 # remove corrupted file from LRU and file system
575 self._lru.pop(digest)
576 self._delete_file(digest, UNKNOWN_FILE_SIZE)
577 logging.error('Deleted corrupted item: %s', digest)
578 self._save()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400579
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000580 # ContentAddressedCache interface implementation.
581
582 def __contains__(self, digest):
583 with self._lock:
584 return digest in self._lru
585
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400586 def touch(self, digest, size):
587 """Verifies an actual file is valid and bumps its LRU position.
588
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000589 Returns False if the file is missing or invalid.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400590
591 Note that is doesn't compute the hash so it could still be corrupted if the
592 file size didn't change.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400593 """
594 # Do the check outside the lock.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000595 looks_valid = is_valid_file(self._path(digest), size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400596
597 # Update its LRU position.
598 with self._lock:
599 if digest not in self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000600 if looks_valid:
601 # Exists but not in the LRU anymore.
602 self._delete_file(digest, size)
603 return False
604 if not looks_valid:
605 self._lru.pop(digest)
606 # Exists but not in the LRU anymore.
607 self._delete_file(digest, size)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400608 return False
609 self._lru.touch(digest)
610 self._protected = self._protected or digest
611 return True
612
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400613 def getfileobj(self, digest):
614 try:
615 f = fs.open(self._path(digest), 'rb')
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400616 except IOError:
617 raise CacheMiss(digest)
Vadim Shtayura33054fa2018-11-01 12:47:59 +0000618 with self._lock:
619 try:
620 self._used.append(self._lru[digest])
621 except KeyError:
622 # If the digest is not actually in _lru, assume it is a cache miss.
623 # Existing file will be overwritten by whoever uses the cache and added
624 # to _lru.
625 f.close()
626 raise CacheMiss(digest)
627 return f
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400628
629 def write(self, digest, content):
630 assert content is not None
631 with self._lock:
632 self._protected = self._protected or digest
633 path = self._path(digest)
634 # A stale broken file may remain. It is possible for the file to have write
635 # access bit removed which would cause the file_write() call to fail to open
636 # in write mode. Take no chance here.
637 file_path.try_remove(path)
638 try:
639 size = file_write(path, content)
640 except:
641 # There are two possible places were an exception can occur:
642 # 1) Inside |content| generator in case of network or unzipping errors.
643 # 2) Inside file_write itself in case of disk IO errors.
644 # In any case delete an incomplete file and propagate the exception to
645 # caller, it will be logged there.
646 file_path.try_remove(path)
647 raise
648 # Make the file read-only in the cache. This has a few side-effects since
649 # the file node is modified, so every directory entries to this file becomes
650 # read-only. It's fine here because it is a new file.
651 file_path.set_read_only(path, True)
652 with self._lock:
653 self._add(digest, size)
654 return digest
655
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000656 # Internal functions.
657
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400658 def _load(self, trim, time_fn):
659 """Loads state of the cache from json file.
660
661 If cache_dir does not exist on disk, it is created.
662 """
663 self._lock.assert_locked()
664
665 if not fs.isfile(self.state_file):
666 if not fs.isdir(self.cache_dir):
667 fs.makedirs(self.cache_dir)
668 else:
669 # Load state of the cache.
670 try:
671 self._lru = lru.LRUDict.load(self.state_file)
672 except ValueError as err:
673 logging.error('Failed to load cache state: %s' % (err,))
Takuto Ikutaeccc88c2019-12-13 14:46:32 +0000674 # Don't want to keep broken cache dir.
675 file_path.rmtree(self.cache_dir)
676 fs.makedirs(self.cache_dir)
Matt Kotsenasefe30092020-03-19 01:12:55 +0000677 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400678 if time_fn:
679 self._lru.time_fn = time_fn
680 if trim:
681 self._trim()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400682
683 def _save(self):
684 """Saves the LRU ordering."""
685 self._lock.assert_locked()
686 if sys.platform != 'win32':
687 d = os.path.dirname(self.state_file)
688 if fs.isdir(d):
689 # Necessary otherwise the file can't be created.
690 file_path.set_read_only(d, False)
691 if fs.isfile(self.state_file):
692 file_path.set_read_only(self.state_file, False)
693 self._lru.save(self.state_file)
694
695 def _trim(self):
696 """Trims anything we don't know, make sure enough free space exists."""
697 self._lock.assert_locked()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000698 evicted = []
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400699
700 # Trim old items.
701 if self.policies.max_age_secs:
702 cutoff = self._lru.time_fn() - self.policies.max_age_secs
703 while self._lru:
704 oldest = self._lru.get_oldest()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000705 # (key, (data, ts)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400706 if oldest[1][1] >= cutoff:
707 break
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000708 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400709
710 # Ensure maximum cache size.
711 if self.policies.max_cache_size:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000712 total_size = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400713 while total_size > self.policies.max_cache_size:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000714 e = self._remove_lru_file(True)
715 evicted.append(e)
716 total_size -= e
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400717
718 # Ensure maximum number of items in the cache.
719 if self.policies.max_items and len(self._lru) > self.policies.max_items:
Marc-Antoine Ruel0fdee222019-10-10 14:42:40 +0000720 for _ in range(len(self._lru) - self.policies.max_items):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000721 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400722
723 # Ensure enough free space.
724 self._free_disk = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400725 while (
726 self.policies.min_free_space and
727 self._lru and
728 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000729 # self._free_disk is updated by this call.
730 evicted.append(self._remove_lru_file(True))
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400731
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000732 if evicted:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000733 total_usage = sum(self._lru.values())
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400734 usage_percent = 0.
735 if total_usage:
736 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
737
738 logging.warning(
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000739 'Trimmed %d file(s) (%.1fkb) due to not enough free disk space:'
740 ' %.1fkb free, %.1fkb cache (%.1f%% of its maximum capacity of '
Junji Watanabe38b28b02020-04-23 10:23:30 +0000741 '%.1fkb)', len(evicted),
742 sum(evicted) / 1024., self._free_disk / 1024., total_usage / 1024.,
743 usage_percent, self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400744 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000745 return evicted
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400746
747 def _path(self, digest):
748 """Returns the path to one item."""
749 return os.path.join(self.cache_dir, digest)
750
751 def _remove_lru_file(self, allow_protected):
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000752 """Removes the latest recently used file and returns its size.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000753
754 Updates self._free_disk.
755 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400756 self._lock.assert_locked()
757 try:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000758 digest, _ = self._lru.get_oldest()
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400759 if not allow_protected and digest == self._protected:
Takuto Ikutae40f76a2020-01-20 01:22:17 +0000760 total_size = sum(self._lru.values())
761 msg = ('Not enough space to fetch the whole isolated tree.\n'
Takuto Ikutaa953f272020-01-20 02:59:17 +0000762 ' %s\n cache=%d bytes (%.3f GiB), %d items; '
763 '%s bytes (%.3f GiB) free_space') % (
764 self.policies, total_size, float(total_size) / 1024**3,
765 len(self._lru), self._free_disk,
766 float(self._free_disk) / 1024**3)
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400767 raise NoMoreSpace(msg)
768 except KeyError:
769 # That means an internal error.
770 raise NoMoreSpace('Nothing to remove, can\'t happend')
771 digest, (size, _) = self._lru.pop_oldest()
772 logging.debug('Removing LRU file %s', digest)
773 self._delete_file(digest, size)
774 return size
775
776 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
777 """Adds an item into LRU cache marking it as a newest one."""
778 self._lock.assert_locked()
779 if size == UNKNOWN_FILE_SIZE:
780 size = fs.stat(self._path(digest)).st_size
781 self._added.append(size)
782 self._lru.add(digest, size)
783 self._free_disk -= size
784 # Do a quicker version of self._trim(). It only enforces free disk space,
785 # not cache size limits. It doesn't actually look at real free disk space,
786 # only uses its cache values. self._trim() will be called later to enforce
787 # real trimming but doing this quick version here makes it possible to map
788 # an isolated that is larger than the current amount of free disk space when
789 # the cache size is already large.
Junji Watanabe38b28b02020-04-23 10:23:30 +0000790 while (self.policies.min_free_space and self._lru and
791 self._free_disk < self.policies.min_free_space):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000792 # self._free_disk is updated by this call.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400793 if self._remove_lru_file(False) == -1:
794 break
795
796 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000797 """Deletes cache file from the file system.
798
799 Updates self._free_disk.
800 """
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400801 self._lock.assert_locked()
802 try:
803 if size == UNKNOWN_FILE_SIZE:
804 try:
805 size = fs.stat(self._path(digest)).st_size
806 except OSError:
807 size = 0
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000808 if file_path.try_remove(self._path(digest)):
809 self._free_disk += size
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400810 except OSError as e:
811 if e.errno != errno.ENOENT:
812 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400813
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000814 def _get_mtime(self, digest):
815 """Get mtime of cache file."""
816 return os.path.getmtime(self._path(digest))
817
818 def _is_valid_hash(self, digest):
819 """Verify digest with supported hash algos."""
820 for _, algo in isolated_format.SUPPORTED_ALGOS.items():
Quinten Yearsley0bc84ce2020-04-09 22:38:08 +0000821 if digest == isolated_format.hash_file(self._path(digest), algo):
822 return True
Junji Watanabe5e73aab2020-04-09 04:20:27 +0000823 return False
824
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400825
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400826class NamedCache(Cache):
827 """Manages cache directories.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400828
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400829 A cache entry is a tuple (name, path), where
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400830 name is a short identifier that describes the contents of the cache, e.g.
831 "git_v8" could be all git repositories required by v8 builds, or
832 "build_chromium" could be build artefacts of the Chromium.
833 path is a directory path relative to the task run dir. Cache installation
834 puts the requested cache directory at the path.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400835 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400836 _DIR_ALPHABET = string.ascii_letters + string.digits
837 STATE_FILE = u'state.json'
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +0000838 NAMED_DIR = u'named'
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400839
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400840 def __init__(self, cache_dir, policies, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400841 """Initializes NamedCaches.
842
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400843 Arguments:
844 - cache_dir is a directory for persistent cache storage.
845 - policies is a CachePolicies instance.
846 - time_fn is a function that returns timestamp (float) and used to take
847 timestamps when new caches are requested. Used in unit tests.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400848 """
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400849 super(NamedCache, self).__init__(cache_dir)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400850 self._policies = policies
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000851 # LRU {cache_name -> tuple(cache_location, size)}
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400852 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
853 self._lru = lru.LRUDict()
854 if not fs.isdir(self.cache_dir):
855 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000856 elif fs.isfile(self.state_file):
Marc-Antoine Ruel3543e212018-05-23 01:04:34 +0000857 try:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400858 self._lru = lru.LRUDict.load(self.state_file)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000859 for _, size in self._lru.values():
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000860 if not isinstance(size, six.integer_types):
Takuto Ikuta6acf8f92020-07-02 02:06:42 +0000861 with open(self.state_file, 'r') as f:
862 logging.info('named cache state file: %s\n%s', self.state_file,
863 f.read())
Junji Watanabeedcf47d2020-06-11 08:41:01 +0000864 raise ValueError("size is not integer: %s" % size)
Takuto Ikutac4b85ec2020-06-09 03:42:39 +0000865
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400866 except ValueError:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000867 logging.exception(
868 'NamedCache: failed to load named cache state file; obliterating')
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400869 file_path.rmtree(self.cache_dir)
Takuto Ikuta568ddb22020-01-20 23:24:16 +0000870 fs.makedirs(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000871 with self._lock:
872 self._try_upgrade()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400873 if time_fn:
874 self._lru.time_fn = time_fn
875
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400876 @property
877 def available(self):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000878 """Returns a set of names of available caches."""
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400879 with self._lock:
Marc-Antoine Ruel09a76e42018-06-14 19:02:00 +0000880 return set(self._lru)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400881
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000882 def install(self, dst, name):
883 """Creates the directory |dst| and moves a previous named cache |name| if it
884 was in the local named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400885
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000886 dst must be absolute, unicode and must not exist.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400887
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000888 Returns the reused named cache size in bytes, or 0 if none was present.
889
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400890 Raises NamedCacheError if cannot install the cache.
891 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000892 logging.info('NamedCache.install(%r, %r)', dst, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400893 with self._lock:
894 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000895 if fs.isdir(dst):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400896 raise NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000897 'installation directory %r already exists' % dst)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400898
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000899 # Remove the named symlink if it exists.
900 link_name = self._get_named_path(name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000901 if fs.exists(link_name):
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000902 # Remove the symlink itself, not its destination.
903 fs.remove(link_name)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000904
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000905 if name in self._lru:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000906 rel_cache, size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400907 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000908 if fs.isdir(abs_cache):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000909 logging.info('- reusing %r; size was %d', rel_cache, size)
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000910 file_path.ensure_tree(os.path.dirname(dst))
911 fs.rename(abs_cache, dst)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400912 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000913 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400914
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000915 logging.warning('- expected directory %r, does not exist', rel_cache)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400916 self._remove(name)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400917
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000918 # The named cache does not exist, create an empty directory. When
919 # uninstalling, we will move it back to the cache and create an an
920 # entry.
921 logging.info('- creating new directory')
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000922 file_path.ensure_tree(dst)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000923 return 0
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400924 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +0000925 # Raise using the original traceback.
926 exc = NamedCacheError(
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000927 'cannot install cache named %r at %r: %s' % (name, dst, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +0000928 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000929 finally:
930 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400931
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000932 def uninstall(self, src, name):
933 """Moves the cache directory back into the named cache hive for an eventual
934 reuse.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400935
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000936 The opposite of install().
937
938 src must be absolute and unicode. Its content is moved back into the local
939 named caches cache.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400940
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000941 Returns the named cache size in bytes.
942
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400943 Raises NamedCacheError if cannot uninstall the cache.
944 """
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000945 logging.info('NamedCache.uninstall(%r, %r)', src, name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400946 with self._lock:
947 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000948 if not fs.isdir(src):
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400949 logging.warning(
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000950 'NamedCache: Directory %r does not exist anymore. Cache lost.',
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000951 src)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400952 return
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400953
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000954 if name in self._lru:
955 # This shouldn't happen but just remove the preexisting one and move
956 # on.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000957 logging.error('- overwriting existing cache!')
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000958 self._remove(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000959
Takuto Ikuta93483272020-06-05 09:06:34 +0000960 # Calculate the size of the named cache to keep.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000961 size = _get_recursive_size(src)
962 logging.info('- Size is %d', size)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400963
964 # Move the dir and create an entry for the named cache.
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000965 rel_cache = self._allocate_dir()
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400966 abs_cache = os.path.join(self.cache_dir, rel_cache)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000967 logging.info('- Moving to %r', rel_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400968 file_path.ensure_tree(os.path.dirname(abs_cache))
Takuto Ikuta17a27ac2020-06-24 08:11:55 +0000969
970 if sys.platform != 'win32':
971 uid = os.getuid()
972 if os.stat(src).st_uid != uid:
973 # Maybe owner of |src| is different from runner of this script. This
974 # is to make fs.rename work in that case.
975 # https://crbug.com/986676
976 subprocess.check_call(['sudo', '-n', 'chown', str(uid), src])
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +0000977 fs.rename(src, abs_cache)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -0400978
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000979 self._lru.add(name, (rel_cache, size))
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +0000980 self._added.append(size)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000981
982 # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
983 # for user convenience.
984 named_path = self._get_named_path(name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000985 if fs.exists(named_path):
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000986 file_path.remove(named_path)
987 else:
988 file_path.ensure_tree(os.path.dirname(named_path))
989
990 try:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000991 fs.symlink(os.path.join(u'..', rel_cache), named_path)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +0000992 logging.info(
993 'NamedCache: Created symlink %r to %r', named_path, abs_cache)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +0000994 except OSError:
995 # Ignore on Windows. It happens when running as a normal user or when
996 # UAC is enabled and the user is a filtered administrator account.
997 if sys.platform != 'win32':
998 raise
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +0000999 return size
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001000 except (IOError, OSError) as ex:
Marc-Antoine Ruel799bc4f2019-01-30 22:54:47 +00001001 # Raise using the original traceback.
1002 exc = NamedCacheError(
Marc-Antoine Ruel97430be2019-01-25 18:26:34 +00001003 'cannot uninstall cache named %r at %r: %s' % (name, src, ex))
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001004 six.reraise(type(exc), exc, sys.exc_info()[2])
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001005 finally:
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001006 # Call save() at every uninstall. The assumptions are:
1007 # - The total the number of named caches is low, so the state.json file
1008 # is small, so the time it takes to write it to disk is short.
1009 # - The number of mapped named caches per task is low, so the number of
1010 # times save() is called on tear-down isn't high enough to be
1011 # significant.
1012 # - uninstall() sometimes throws due to file locking on Windows or
1013 # access rights on Linux. We want to keep as many as possible.
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001014 self._save()
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001015
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001016 # Cache interface implementation.
1017
1018 def __len__(self):
1019 with self._lock:
1020 return len(self._lru)
1021
1022 def __iter__(self):
1023 # This is not thread-safe.
1024 return self._lru.__iter__()
1025
John Budorickc6186972020-02-26 00:58:14 +00001026 def __contains__(self, name):
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001027 with self._lock:
John Budorickc6186972020-02-26 00:58:14 +00001028 return name in self._lru
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001029
1030 @property
1031 def total_size(self):
1032 with self._lock:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001033 return sum(size for _rel_path, size in self._lru.values())
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001034
1035 def get_oldest(self):
1036 with self._lock:
1037 try:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001038 # (key, (value, ts))
1039 return self._lru.get_oldest()[1][1]
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001040 except KeyError:
1041 return None
1042
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001043 def remove_oldest(self):
1044 with self._lock:
1045 # TODO(maruel): Update self._added.
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001046 _name, size = self._remove_lru_item()
1047 return size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001048
Marc-Antoine Ruel29db8452018-08-01 17:46:33 +00001049 def save(self):
1050 with self._lock:
1051 return self._save()
1052
John Budorickc6186972020-02-26 00:58:14 +00001053 def touch(self, *names):
1054 with self._lock:
1055 for name in names:
1056 if name in self._lru:
1057 self._lru.touch(name)
1058 self._save()
1059
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001060 def trim(self):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001061 evicted = []
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001062 with self._lock:
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001063 if not fs.isdir(self.cache_dir):
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001064 return evicted
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001065
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001066 # Trim according to maximum number of items.
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001067 if self._policies.max_items:
1068 while len(self._lru) > self._policies.max_items:
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001069 name, size = self._remove_lru_item()
1070 evicted.append(size)
1071 logging.info(
1072 'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
1073 name, size, self._policies.max_items)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001074
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001075 # Trim according to maximum age.
1076 if self._policies.max_age_secs:
1077 cutoff = self._lru.time_fn() - self._policies.max_age_secs
1078 while self._lru:
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001079 _name, (_data, ts) = self._lru.get_oldest()
1080 if ts >= cutoff:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001081 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001082 name, size = self._remove_lru_item()
1083 evicted.append(size)
1084 logging.info(
1085 'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
1086 name, size, self._policies.max_age_secs)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001087
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001088 # Trim according to minimum free space.
1089 if self._policies.min_free_space:
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001090 while self._lru:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001091 free_space = file_path.get_free_space(self.cache_dir)
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001092 if free_space >= self._policies.min_free_space:
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001093 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001094 name, size = self._remove_lru_item()
1095 evicted.append(size)
1096 logging.info(
1097 'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
1098 name, size, self._policies.min_free_space)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001099
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001100 # Trim according to maximum total size.
1101 if self._policies.max_cache_size:
1102 while self._lru:
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001103 total = sum(size for _rel_cache, size in self._lru.values())
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001104 if total <= self._policies.max_cache_size:
1105 break
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001106 name, size = self._remove_lru_item()
1107 evicted.append(size)
1108 logging.info(
1109 'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
1110 name, size, self._policies.max_cache_size)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001111
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001112 self._save()
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001113 return evicted
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001114
1115 def cleanup(self):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001116 """Removes unknown directories.
1117
1118 Does not recalculate the cache size since it's surprisingly slow on some
1119 OSes.
1120 """
1121 success = True
1122 with self._lock:
1123 try:
1124 actual = set(fs.listdir(self.cache_dir))
1125 actual.discard(self.NAMED_DIR)
1126 actual.discard(self.STATE_FILE)
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001127 expected = {v[0]: k for k, v in self._lru.items()}
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001128 # First, handle the actual cache content.
1129 # Remove missing entries.
1130 for missing in (set(expected) - actual):
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001131 name, size = self._lru.pop(expected[missing])
1132 logging.warning(
1133 'NamedCache.cleanup(): Missing on disk %r(%d)', name, size)
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001134 # Remove unexpected items.
1135 for unexpected in (actual - set(expected)):
1136 try:
1137 p = os.path.join(self.cache_dir, unexpected)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001138 logging.warning(
1139 'NamedCache.cleanup(): Unexpected %r', unexpected)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001140 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001141 file_path.rmtree(p)
1142 else:
1143 fs.remove(p)
1144 except (IOError, OSError) as e:
1145 logging.error('Failed to remove %s: %s', unexpected, e)
1146 success = False
1147
1148 # Second, fix named cache links.
1149 named = os.path.join(self.cache_dir, self.NAMED_DIR)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001150 if fs.isdir(named):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001151 actual = set(fs.listdir(named))
1152 expected = set(self._lru)
1153 # Confirm entries. Do not add missing ones for now.
1154 for name in expected.intersection(actual):
1155 p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001156 expected_link = os.path.join(u'..', self._lru[name][0])
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001157 if fs.islink(p):
1158 link = fs.readlink(p)
1159 if expected_link == link:
1160 continue
1161 logging.warning(
1162 'Unexpected symlink for cache %s: %s, expected %s',
1163 name, link, expected_link)
1164 else:
1165 logging.warning('Unexpected non symlink for cache %s', name)
Marc-Antoine Ruel41362222018-06-28 14:52:34 +00001166 if fs.isdir(p) and not fs.islink(p):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001167 file_path.rmtree(p)
1168 else:
1169 fs.remove(p)
1170 # Remove unexpected items.
1171 for unexpected in (actual - expected):
1172 try:
1173 p = os.path.join(self.cache_dir, self.NAMED_DIR, unexpected)
1174 if fs.isdir(p):
1175 file_path.rmtree(p)
1176 else:
1177 fs.remove(p)
1178 except (IOError, OSError) as e:
1179 logging.error('Failed to remove %s: %s', unexpected, e)
1180 success = False
1181 finally:
1182 self._save()
1183 return success
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001184
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001185 # Internal functions.
1186
1187 def _try_upgrade(self):
1188 """Upgrades from the old format to the new one if necessary.
1189
1190 This code can be removed so all bots are known to have the right new format.
1191 """
1192 if not self._lru:
1193 return
1194 _name, (data, _ts) = self._lru.get_oldest()
1195 if isinstance(data, (list, tuple)):
1196 return
1197 # Update to v2.
1198 def upgrade(_name, rel_cache):
1199 abs_cache = os.path.join(self.cache_dir, rel_cache)
1200 return rel_cache, _get_recursive_size(abs_cache)
1201 self._lru.transform(upgrade)
1202 self._save()
1203
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001204 def _remove_lru_item(self):
1205 """Removes the oldest LRU entry. LRU must not be empty."""
1206 name, ((_rel_path, size), _ts) = self._lru.get_oldest()
1207 logging.info('Removing named cache %r', name)
1208 self._remove(name)
Marc-Antoine Ruel44699b32018-09-24 23:31:50 +00001209 return name, size
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001210
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001211 def _allocate_dir(self):
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001212 """Creates and returns relative path of a new cache directory.
1213
1214 In practice, it is a 2-letter string.
1215 """
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001216 # We randomly generate directory names that have two lower/upper case
1217 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
1218 abc_len = len(self._DIR_ALPHABET)
1219 tried = set()
1220 while len(tried) < 1000:
1221 i = random.randint(0, abc_len * abc_len - 1)
1222 rel_path = (
Takuto Ikuta1c717d72020-06-29 10:15:09 +00001223 self._DIR_ALPHABET[i // abc_len] + self._DIR_ALPHABET[i % abc_len])
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001224 if rel_path in tried:
1225 continue
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001226 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001227 if not fs.exists(abs_path):
1228 return rel_path
1229 tried.add(rel_path)
1230 raise NamedCacheError(
1231 'could not allocate a new cache dir, too many cache dirs')
1232
1233 def _remove(self, name):
1234 """Removes a cache directory and entry.
1235
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001236 Returns:
1237 Number of caches deleted.
1238 """
1239 self._lock.assert_locked()
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001240 # First try to remove the alias if it exists.
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001241 named_dir = self._get_named_path(name)
1242 if fs.islink(named_dir):
1243 fs.unlink(named_dir)
1244
Marc-Antoine Ruel33e9f102018-06-14 19:08:01 +00001245 # Then remove the actual data.
1246 if name not in self._lru:
1247 return
1248 rel_path, _size = self._lru.get(name)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001249 abs_path = os.path.join(self.cache_dir, rel_path)
Marc-Antoine Ruel957c7c22019-01-25 22:21:05 +00001250 if fs.isdir(abs_path):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001251 file_path.rmtree(abs_path)
1252 self._lru.pop(name)
1253
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001254 def _save(self):
1255 self._lock.assert_locked()
1256 file_path.ensure_tree(self.cache_dir)
1257 self._lru.save(self.state_file)
1258
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001259 def _get_named_path(self, name):
Marc-Antoine Ruel9a518d02018-06-16 14:41:12 +00001260 return os.path.join(self.cache_dir, self.NAMED_DIR, name)